18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (C) 2011 Red Hat, Inc. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This file is released under the GPL. 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#include "dm-btree.h" 88c2ecf20Sopenharmony_ci#include "dm-btree-internal.h" 98c2ecf20Sopenharmony_ci#include "dm-transaction-manager.h" 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include <linux/export.h> 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci/* 148c2ecf20Sopenharmony_ci * Removing an entry from a btree 158c2ecf20Sopenharmony_ci * ============================== 168c2ecf20Sopenharmony_ci * 178c2ecf20Sopenharmony_ci * A very important constraint for our btree is that no node, except the 188c2ecf20Sopenharmony_ci * root, may have fewer than a certain number of entries. 198c2ecf20Sopenharmony_ci * (MIN_ENTRIES <= nr_entries <= MAX_ENTRIES). 208c2ecf20Sopenharmony_ci * 218c2ecf20Sopenharmony_ci * Ensuring this is complicated by the way we want to only ever hold the 228c2ecf20Sopenharmony_ci * locks on 2 nodes concurrently, and only change nodes in a top to bottom 238c2ecf20Sopenharmony_ci * fashion. 248c2ecf20Sopenharmony_ci * 258c2ecf20Sopenharmony_ci * Each node may have a left or right sibling. When decending the spine, 268c2ecf20Sopenharmony_ci * if a node contains only MIN_ENTRIES then we try and increase this to at 278c2ecf20Sopenharmony_ci * least MIN_ENTRIES + 1. We do this in the following ways: 288c2ecf20Sopenharmony_ci * 298c2ecf20Sopenharmony_ci * [A] No siblings => this can only happen if the node is the root, in which 308c2ecf20Sopenharmony_ci * case we copy the childs contents over the root. 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci * [B] No left sibling 338c2ecf20Sopenharmony_ci * ==> rebalance(node, right sibling) 348c2ecf20Sopenharmony_ci * 358c2ecf20Sopenharmony_ci * [C] No right sibling 368c2ecf20Sopenharmony_ci * ==> rebalance(left sibling, node) 378c2ecf20Sopenharmony_ci * 388c2ecf20Sopenharmony_ci * [D] Both siblings, total_entries(left, node, right) <= DEL_THRESHOLD 398c2ecf20Sopenharmony_ci * ==> delete node adding it's contents to left and right 408c2ecf20Sopenharmony_ci * 418c2ecf20Sopenharmony_ci * [E] Both siblings, total_entries(left, node, right) > DEL_THRESHOLD 428c2ecf20Sopenharmony_ci * ==> rebalance(left, node, right) 438c2ecf20Sopenharmony_ci * 448c2ecf20Sopenharmony_ci * After these operations it's possible that the our original node no 458c2ecf20Sopenharmony_ci * longer contains the desired sub tree. For this reason this rebalancing 468c2ecf20Sopenharmony_ci * is performed on the children of the current node. This also avoids 478c2ecf20Sopenharmony_ci * having a special case for the root. 488c2ecf20Sopenharmony_ci * 498c2ecf20Sopenharmony_ci * Once this rebalancing has occurred we can then step into the child node 508c2ecf20Sopenharmony_ci * for internal nodes. Or delete the entry for leaf nodes. 518c2ecf20Sopenharmony_ci */ 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci/* 548c2ecf20Sopenharmony_ci * Some little utilities for moving node data around. 558c2ecf20Sopenharmony_ci */ 568c2ecf20Sopenharmony_cistatic void node_shift(struct btree_node *n, int shift) 578c2ecf20Sopenharmony_ci{ 588c2ecf20Sopenharmony_ci uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); 598c2ecf20Sopenharmony_ci uint32_t value_size = le32_to_cpu(n->header.value_size); 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci if (shift < 0) { 628c2ecf20Sopenharmony_ci shift = -shift; 638c2ecf20Sopenharmony_ci BUG_ON(shift > nr_entries); 648c2ecf20Sopenharmony_ci BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift)); 658c2ecf20Sopenharmony_ci memmove(key_ptr(n, 0), 668c2ecf20Sopenharmony_ci key_ptr(n, shift), 678c2ecf20Sopenharmony_ci (nr_entries - shift) * sizeof(__le64)); 688c2ecf20Sopenharmony_ci memmove(value_ptr(n, 0), 698c2ecf20Sopenharmony_ci value_ptr(n, shift), 708c2ecf20Sopenharmony_ci (nr_entries - shift) * value_size); 718c2ecf20Sopenharmony_ci } else { 728c2ecf20Sopenharmony_ci BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries)); 738c2ecf20Sopenharmony_ci memmove(key_ptr(n, shift), 748c2ecf20Sopenharmony_ci key_ptr(n, 0), 758c2ecf20Sopenharmony_ci nr_entries * sizeof(__le64)); 768c2ecf20Sopenharmony_ci memmove(value_ptr(n, shift), 778c2ecf20Sopenharmony_ci value_ptr(n, 0), 788c2ecf20Sopenharmony_ci nr_entries * value_size); 798c2ecf20Sopenharmony_ci } 808c2ecf20Sopenharmony_ci} 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_cistatic void node_copy(struct btree_node *left, struct btree_node *right, int shift) 838c2ecf20Sopenharmony_ci{ 848c2ecf20Sopenharmony_ci uint32_t nr_left = le32_to_cpu(left->header.nr_entries); 858c2ecf20Sopenharmony_ci uint32_t value_size = le32_to_cpu(left->header.value_size); 868c2ecf20Sopenharmony_ci BUG_ON(value_size != le32_to_cpu(right->header.value_size)); 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci if (shift < 0) { 898c2ecf20Sopenharmony_ci shift = -shift; 908c2ecf20Sopenharmony_ci BUG_ON(nr_left + shift > le32_to_cpu(left->header.max_entries)); 918c2ecf20Sopenharmony_ci memcpy(key_ptr(left, nr_left), 928c2ecf20Sopenharmony_ci key_ptr(right, 0), 938c2ecf20Sopenharmony_ci shift * sizeof(__le64)); 948c2ecf20Sopenharmony_ci memcpy(value_ptr(left, nr_left), 958c2ecf20Sopenharmony_ci value_ptr(right, 0), 968c2ecf20Sopenharmony_ci shift * value_size); 978c2ecf20Sopenharmony_ci } else { 988c2ecf20Sopenharmony_ci BUG_ON(shift > le32_to_cpu(right->header.max_entries)); 998c2ecf20Sopenharmony_ci memcpy(key_ptr(right, 0), 1008c2ecf20Sopenharmony_ci key_ptr(left, nr_left - shift), 1018c2ecf20Sopenharmony_ci shift * sizeof(__le64)); 1028c2ecf20Sopenharmony_ci memcpy(value_ptr(right, 0), 1038c2ecf20Sopenharmony_ci value_ptr(left, nr_left - shift), 1048c2ecf20Sopenharmony_ci shift * value_size); 1058c2ecf20Sopenharmony_ci } 1068c2ecf20Sopenharmony_ci} 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci/* 1098c2ecf20Sopenharmony_ci * Delete a specific entry from a leaf node. 1108c2ecf20Sopenharmony_ci */ 1118c2ecf20Sopenharmony_cistatic void delete_at(struct btree_node *n, unsigned index) 1128c2ecf20Sopenharmony_ci{ 1138c2ecf20Sopenharmony_ci unsigned nr_entries = le32_to_cpu(n->header.nr_entries); 1148c2ecf20Sopenharmony_ci unsigned nr_to_copy = nr_entries - (index + 1); 1158c2ecf20Sopenharmony_ci uint32_t value_size = le32_to_cpu(n->header.value_size); 1168c2ecf20Sopenharmony_ci BUG_ON(index >= nr_entries); 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci if (nr_to_copy) { 1198c2ecf20Sopenharmony_ci memmove(key_ptr(n, index), 1208c2ecf20Sopenharmony_ci key_ptr(n, index + 1), 1218c2ecf20Sopenharmony_ci nr_to_copy * sizeof(__le64)); 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci memmove(value_ptr(n, index), 1248c2ecf20Sopenharmony_ci value_ptr(n, index + 1), 1258c2ecf20Sopenharmony_ci nr_to_copy * value_size); 1268c2ecf20Sopenharmony_ci } 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci n->header.nr_entries = cpu_to_le32(nr_entries - 1); 1298c2ecf20Sopenharmony_ci} 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_cistatic unsigned merge_threshold(struct btree_node *n) 1328c2ecf20Sopenharmony_ci{ 1338c2ecf20Sopenharmony_ci return le32_to_cpu(n->header.max_entries) / 3; 1348c2ecf20Sopenharmony_ci} 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_cistruct child { 1378c2ecf20Sopenharmony_ci unsigned index; 1388c2ecf20Sopenharmony_ci struct dm_block *block; 1398c2ecf20Sopenharmony_ci struct btree_node *n; 1408c2ecf20Sopenharmony_ci}; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_cistatic int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt, 1438c2ecf20Sopenharmony_ci struct btree_node *parent, 1448c2ecf20Sopenharmony_ci unsigned index, struct child *result) 1458c2ecf20Sopenharmony_ci{ 1468c2ecf20Sopenharmony_ci int r, inc; 1478c2ecf20Sopenharmony_ci dm_block_t root; 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci result->index = index; 1508c2ecf20Sopenharmony_ci root = value64(parent, index); 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci r = dm_tm_shadow_block(info->tm, root, &btree_node_validator, 1538c2ecf20Sopenharmony_ci &result->block, &inc); 1548c2ecf20Sopenharmony_ci if (r) 1558c2ecf20Sopenharmony_ci return r; 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci result->n = dm_block_data(result->block); 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci if (inc) 1608c2ecf20Sopenharmony_ci inc_children(info->tm, result->n, vt); 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci *((__le64 *) value_ptr(parent, index)) = 1638c2ecf20Sopenharmony_ci cpu_to_le64(dm_block_location(result->block)); 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci return 0; 1668c2ecf20Sopenharmony_ci} 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_cistatic void exit_child(struct dm_btree_info *info, struct child *c) 1698c2ecf20Sopenharmony_ci{ 1708c2ecf20Sopenharmony_ci dm_tm_unlock(info->tm, c->block); 1718c2ecf20Sopenharmony_ci} 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_cistatic void shift(struct btree_node *left, struct btree_node *right, int count) 1748c2ecf20Sopenharmony_ci{ 1758c2ecf20Sopenharmony_ci uint32_t nr_left = le32_to_cpu(left->header.nr_entries); 1768c2ecf20Sopenharmony_ci uint32_t nr_right = le32_to_cpu(right->header.nr_entries); 1778c2ecf20Sopenharmony_ci uint32_t max_entries = le32_to_cpu(left->header.max_entries); 1788c2ecf20Sopenharmony_ci uint32_t r_max_entries = le32_to_cpu(right->header.max_entries); 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci BUG_ON(max_entries != r_max_entries); 1818c2ecf20Sopenharmony_ci BUG_ON(nr_left - count > max_entries); 1828c2ecf20Sopenharmony_ci BUG_ON(nr_right + count > max_entries); 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci if (!count) 1858c2ecf20Sopenharmony_ci return; 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci if (count > 0) { 1888c2ecf20Sopenharmony_ci node_shift(right, count); 1898c2ecf20Sopenharmony_ci node_copy(left, right, count); 1908c2ecf20Sopenharmony_ci } else { 1918c2ecf20Sopenharmony_ci node_copy(left, right, count); 1928c2ecf20Sopenharmony_ci node_shift(right, count); 1938c2ecf20Sopenharmony_ci } 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci left->header.nr_entries = cpu_to_le32(nr_left - count); 1968c2ecf20Sopenharmony_ci right->header.nr_entries = cpu_to_le32(nr_right + count); 1978c2ecf20Sopenharmony_ci} 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_cistatic void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, 2008c2ecf20Sopenharmony_ci struct child *l, struct child *r) 2018c2ecf20Sopenharmony_ci{ 2028c2ecf20Sopenharmony_ci struct btree_node *left = l->n; 2038c2ecf20Sopenharmony_ci struct btree_node *right = r->n; 2048c2ecf20Sopenharmony_ci uint32_t nr_left = le32_to_cpu(left->header.nr_entries); 2058c2ecf20Sopenharmony_ci uint32_t nr_right = le32_to_cpu(right->header.nr_entries); 2068c2ecf20Sopenharmony_ci /* 2078c2ecf20Sopenharmony_ci * Ensure the number of entries in each child will be greater 2088c2ecf20Sopenharmony_ci * than or equal to (max_entries / 3 + 1), so no matter which 2098c2ecf20Sopenharmony_ci * child is used for removal, the number will still be not 2108c2ecf20Sopenharmony_ci * less than (max_entries / 3). 2118c2ecf20Sopenharmony_ci */ 2128c2ecf20Sopenharmony_ci unsigned int threshold = 2 * (merge_threshold(left) + 1); 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci if (nr_left + nr_right < threshold) { 2158c2ecf20Sopenharmony_ci /* 2168c2ecf20Sopenharmony_ci * Merge 2178c2ecf20Sopenharmony_ci */ 2188c2ecf20Sopenharmony_ci node_copy(left, right, -nr_right); 2198c2ecf20Sopenharmony_ci left->header.nr_entries = cpu_to_le32(nr_left + nr_right); 2208c2ecf20Sopenharmony_ci delete_at(parent, r->index); 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci /* 2238c2ecf20Sopenharmony_ci * We need to decrement the right block, but not it's 2248c2ecf20Sopenharmony_ci * children, since they're still referenced by left. 2258c2ecf20Sopenharmony_ci */ 2268c2ecf20Sopenharmony_ci dm_tm_dec(info->tm, dm_block_location(r->block)); 2278c2ecf20Sopenharmony_ci } else { 2288c2ecf20Sopenharmony_ci /* 2298c2ecf20Sopenharmony_ci * Rebalance. 2308c2ecf20Sopenharmony_ci */ 2318c2ecf20Sopenharmony_ci unsigned target_left = (nr_left + nr_right) / 2; 2328c2ecf20Sopenharmony_ci shift(left, right, nr_left - target_left); 2338c2ecf20Sopenharmony_ci *key_ptr(parent, r->index) = right->keys[0]; 2348c2ecf20Sopenharmony_ci } 2358c2ecf20Sopenharmony_ci} 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_cistatic int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, 2388c2ecf20Sopenharmony_ci struct dm_btree_value_type *vt, unsigned left_index) 2398c2ecf20Sopenharmony_ci{ 2408c2ecf20Sopenharmony_ci int r; 2418c2ecf20Sopenharmony_ci struct btree_node *parent; 2428c2ecf20Sopenharmony_ci struct child left, right; 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci parent = dm_block_data(shadow_current(s)); 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci r = init_child(info, vt, parent, left_index, &left); 2478c2ecf20Sopenharmony_ci if (r) 2488c2ecf20Sopenharmony_ci return r; 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci r = init_child(info, vt, parent, left_index + 1, &right); 2518c2ecf20Sopenharmony_ci if (r) { 2528c2ecf20Sopenharmony_ci exit_child(info, &left); 2538c2ecf20Sopenharmony_ci return r; 2548c2ecf20Sopenharmony_ci } 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci __rebalance2(info, parent, &left, &right); 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci exit_child(info, &left); 2598c2ecf20Sopenharmony_ci exit_child(info, &right); 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci return 0; 2628c2ecf20Sopenharmony_ci} 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci/* 2658c2ecf20Sopenharmony_ci * We dump as many entries from center as possible into left, then the rest 2668c2ecf20Sopenharmony_ci * in right, then rebalance2. This wastes some cpu, but I want something 2678c2ecf20Sopenharmony_ci * simple atm. 2688c2ecf20Sopenharmony_ci */ 2698c2ecf20Sopenharmony_cistatic void delete_center_node(struct dm_btree_info *info, struct btree_node *parent, 2708c2ecf20Sopenharmony_ci struct child *l, struct child *c, struct child *r, 2718c2ecf20Sopenharmony_ci struct btree_node *left, struct btree_node *center, struct btree_node *right, 2728c2ecf20Sopenharmony_ci uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) 2738c2ecf20Sopenharmony_ci{ 2748c2ecf20Sopenharmony_ci uint32_t max_entries = le32_to_cpu(left->header.max_entries); 2758c2ecf20Sopenharmony_ci unsigned shift = min(max_entries - nr_left, nr_center); 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci BUG_ON(nr_left + shift > max_entries); 2788c2ecf20Sopenharmony_ci node_copy(left, center, -shift); 2798c2ecf20Sopenharmony_ci left->header.nr_entries = cpu_to_le32(nr_left + shift); 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci if (shift != nr_center) { 2828c2ecf20Sopenharmony_ci shift = nr_center - shift; 2838c2ecf20Sopenharmony_ci BUG_ON((nr_right + shift) > max_entries); 2848c2ecf20Sopenharmony_ci node_shift(right, shift); 2858c2ecf20Sopenharmony_ci node_copy(center, right, shift); 2868c2ecf20Sopenharmony_ci right->header.nr_entries = cpu_to_le32(nr_right + shift); 2878c2ecf20Sopenharmony_ci } 2888c2ecf20Sopenharmony_ci *key_ptr(parent, r->index) = right->keys[0]; 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci delete_at(parent, c->index); 2918c2ecf20Sopenharmony_ci r->index--; 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci dm_tm_dec(info->tm, dm_block_location(c->block)); 2948c2ecf20Sopenharmony_ci __rebalance2(info, parent, l, r); 2958c2ecf20Sopenharmony_ci} 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci/* 2988c2ecf20Sopenharmony_ci * Redistributes entries among 3 sibling nodes. 2998c2ecf20Sopenharmony_ci */ 3008c2ecf20Sopenharmony_cistatic void redistribute3(struct dm_btree_info *info, struct btree_node *parent, 3018c2ecf20Sopenharmony_ci struct child *l, struct child *c, struct child *r, 3028c2ecf20Sopenharmony_ci struct btree_node *left, struct btree_node *center, struct btree_node *right, 3038c2ecf20Sopenharmony_ci uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) 3048c2ecf20Sopenharmony_ci{ 3058c2ecf20Sopenharmony_ci int s; 3068c2ecf20Sopenharmony_ci uint32_t max_entries = le32_to_cpu(left->header.max_entries); 3078c2ecf20Sopenharmony_ci unsigned total = nr_left + nr_center + nr_right; 3088c2ecf20Sopenharmony_ci unsigned target_right = total / 3; 3098c2ecf20Sopenharmony_ci unsigned remainder = (target_right * 3) != total; 3108c2ecf20Sopenharmony_ci unsigned target_left = target_right + remainder; 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci BUG_ON(target_left > max_entries); 3138c2ecf20Sopenharmony_ci BUG_ON(target_right > max_entries); 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci if (nr_left < nr_right) { 3168c2ecf20Sopenharmony_ci s = nr_left - target_left; 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci if (s < 0 && nr_center < -s) { 3198c2ecf20Sopenharmony_ci /* not enough in central node */ 3208c2ecf20Sopenharmony_ci shift(left, center, -nr_center); 3218c2ecf20Sopenharmony_ci s += nr_center; 3228c2ecf20Sopenharmony_ci shift(left, right, s); 3238c2ecf20Sopenharmony_ci nr_right += s; 3248c2ecf20Sopenharmony_ci } else 3258c2ecf20Sopenharmony_ci shift(left, center, s); 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci shift(center, right, target_right - nr_right); 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci } else { 3308c2ecf20Sopenharmony_ci s = target_right - nr_right; 3318c2ecf20Sopenharmony_ci if (s > 0 && nr_center < s) { 3328c2ecf20Sopenharmony_ci /* not enough in central node */ 3338c2ecf20Sopenharmony_ci shift(center, right, nr_center); 3348c2ecf20Sopenharmony_ci s -= nr_center; 3358c2ecf20Sopenharmony_ci shift(left, right, s); 3368c2ecf20Sopenharmony_ci nr_left -= s; 3378c2ecf20Sopenharmony_ci } else 3388c2ecf20Sopenharmony_ci shift(center, right, s); 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci shift(left, center, nr_left - target_left); 3418c2ecf20Sopenharmony_ci } 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci *key_ptr(parent, c->index) = center->keys[0]; 3448c2ecf20Sopenharmony_ci *key_ptr(parent, r->index) = right->keys[0]; 3458c2ecf20Sopenharmony_ci} 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_cistatic void __rebalance3(struct dm_btree_info *info, struct btree_node *parent, 3488c2ecf20Sopenharmony_ci struct child *l, struct child *c, struct child *r) 3498c2ecf20Sopenharmony_ci{ 3508c2ecf20Sopenharmony_ci struct btree_node *left = l->n; 3518c2ecf20Sopenharmony_ci struct btree_node *center = c->n; 3528c2ecf20Sopenharmony_ci struct btree_node *right = r->n; 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci uint32_t nr_left = le32_to_cpu(left->header.nr_entries); 3558c2ecf20Sopenharmony_ci uint32_t nr_center = le32_to_cpu(center->header.nr_entries); 3568c2ecf20Sopenharmony_ci uint32_t nr_right = le32_to_cpu(right->header.nr_entries); 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci unsigned threshold = merge_threshold(left) * 4 + 1; 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci BUG_ON(left->header.max_entries != center->header.max_entries); 3618c2ecf20Sopenharmony_ci BUG_ON(center->header.max_entries != right->header.max_entries); 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_ci if ((nr_left + nr_center + nr_right) < threshold) 3648c2ecf20Sopenharmony_ci delete_center_node(info, parent, l, c, r, left, center, right, 3658c2ecf20Sopenharmony_ci nr_left, nr_center, nr_right); 3668c2ecf20Sopenharmony_ci else 3678c2ecf20Sopenharmony_ci redistribute3(info, parent, l, c, r, left, center, right, 3688c2ecf20Sopenharmony_ci nr_left, nr_center, nr_right); 3698c2ecf20Sopenharmony_ci} 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_cistatic int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, 3728c2ecf20Sopenharmony_ci struct dm_btree_value_type *vt, unsigned left_index) 3738c2ecf20Sopenharmony_ci{ 3748c2ecf20Sopenharmony_ci int r; 3758c2ecf20Sopenharmony_ci struct btree_node *parent = dm_block_data(shadow_current(s)); 3768c2ecf20Sopenharmony_ci struct child left, center, right; 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci /* 3798c2ecf20Sopenharmony_ci * FIXME: fill out an array? 3808c2ecf20Sopenharmony_ci */ 3818c2ecf20Sopenharmony_ci r = init_child(info, vt, parent, left_index, &left); 3828c2ecf20Sopenharmony_ci if (r) 3838c2ecf20Sopenharmony_ci return r; 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci r = init_child(info, vt, parent, left_index + 1, ¢er); 3868c2ecf20Sopenharmony_ci if (r) { 3878c2ecf20Sopenharmony_ci exit_child(info, &left); 3888c2ecf20Sopenharmony_ci return r; 3898c2ecf20Sopenharmony_ci } 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci r = init_child(info, vt, parent, left_index + 2, &right); 3928c2ecf20Sopenharmony_ci if (r) { 3938c2ecf20Sopenharmony_ci exit_child(info, &left); 3948c2ecf20Sopenharmony_ci exit_child(info, ¢er); 3958c2ecf20Sopenharmony_ci return r; 3968c2ecf20Sopenharmony_ci } 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci __rebalance3(info, parent, &left, ¢er, &right); 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci exit_child(info, &left); 4018c2ecf20Sopenharmony_ci exit_child(info, ¢er); 4028c2ecf20Sopenharmony_ci exit_child(info, &right); 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci return 0; 4058c2ecf20Sopenharmony_ci} 4068c2ecf20Sopenharmony_ci 4078c2ecf20Sopenharmony_cistatic int rebalance_children(struct shadow_spine *s, 4088c2ecf20Sopenharmony_ci struct dm_btree_info *info, 4098c2ecf20Sopenharmony_ci struct dm_btree_value_type *vt, uint64_t key) 4108c2ecf20Sopenharmony_ci{ 4118c2ecf20Sopenharmony_ci int i, r, has_left_sibling, has_right_sibling; 4128c2ecf20Sopenharmony_ci struct btree_node *n; 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci n = dm_block_data(shadow_current(s)); 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci if (le32_to_cpu(n->header.nr_entries) == 1) { 4178c2ecf20Sopenharmony_ci struct dm_block *child; 4188c2ecf20Sopenharmony_ci dm_block_t b = value64(n, 0); 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci r = dm_tm_read_lock(info->tm, b, &btree_node_validator, &child); 4218c2ecf20Sopenharmony_ci if (r) 4228c2ecf20Sopenharmony_ci return r; 4238c2ecf20Sopenharmony_ci 4248c2ecf20Sopenharmony_ci memcpy(n, dm_block_data(child), 4258c2ecf20Sopenharmony_ci dm_bm_block_size(dm_tm_get_bm(info->tm))); 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci dm_tm_dec(info->tm, dm_block_location(child)); 4288c2ecf20Sopenharmony_ci dm_tm_unlock(info->tm, child); 4298c2ecf20Sopenharmony_ci return 0; 4308c2ecf20Sopenharmony_ci } 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci i = lower_bound(n, key); 4338c2ecf20Sopenharmony_ci if (i < 0) 4348c2ecf20Sopenharmony_ci return -ENODATA; 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci has_left_sibling = i > 0; 4378c2ecf20Sopenharmony_ci has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci if (!has_left_sibling) 4408c2ecf20Sopenharmony_ci r = rebalance2(s, info, vt, i); 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci else if (!has_right_sibling) 4438c2ecf20Sopenharmony_ci r = rebalance2(s, info, vt, i - 1); 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci else 4468c2ecf20Sopenharmony_ci r = rebalance3(s, info, vt, i - 1); 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci return r; 4498c2ecf20Sopenharmony_ci} 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_cistatic int do_leaf(struct btree_node *n, uint64_t key, unsigned *index) 4528c2ecf20Sopenharmony_ci{ 4538c2ecf20Sopenharmony_ci int i = lower_bound(n, key); 4548c2ecf20Sopenharmony_ci 4558c2ecf20Sopenharmony_ci if ((i < 0) || 4568c2ecf20Sopenharmony_ci (i >= le32_to_cpu(n->header.nr_entries)) || 4578c2ecf20Sopenharmony_ci (le64_to_cpu(n->keys[i]) != key)) 4588c2ecf20Sopenharmony_ci return -ENODATA; 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci *index = i; 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci return 0; 4638c2ecf20Sopenharmony_ci} 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_ci/* 4668c2ecf20Sopenharmony_ci * Prepares for removal from one level of the hierarchy. The caller must 4678c2ecf20Sopenharmony_ci * call delete_at() to remove the entry at index. 4688c2ecf20Sopenharmony_ci */ 4698c2ecf20Sopenharmony_cistatic int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, 4708c2ecf20Sopenharmony_ci struct dm_btree_value_type *vt, dm_block_t root, 4718c2ecf20Sopenharmony_ci uint64_t key, unsigned *index) 4728c2ecf20Sopenharmony_ci{ 4738c2ecf20Sopenharmony_ci int i = *index, r; 4748c2ecf20Sopenharmony_ci struct btree_node *n; 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci for (;;) { 4778c2ecf20Sopenharmony_ci r = shadow_step(s, root, vt); 4788c2ecf20Sopenharmony_ci if (r < 0) 4798c2ecf20Sopenharmony_ci break; 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci /* 4828c2ecf20Sopenharmony_ci * We have to patch up the parent node, ugly, but I don't 4838c2ecf20Sopenharmony_ci * see a way to do this automatically as part of the spine 4848c2ecf20Sopenharmony_ci * op. 4858c2ecf20Sopenharmony_ci */ 4868c2ecf20Sopenharmony_ci if (shadow_has_parent(s)) { 4878c2ecf20Sopenharmony_ci __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); 4888c2ecf20Sopenharmony_ci memcpy(value_ptr(dm_block_data(shadow_parent(s)), i), 4898c2ecf20Sopenharmony_ci &location, sizeof(__le64)); 4908c2ecf20Sopenharmony_ci } 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci n = dm_block_data(shadow_current(s)); 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci if (le32_to_cpu(n->header.flags) & LEAF_NODE) 4958c2ecf20Sopenharmony_ci return do_leaf(n, key, index); 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci r = rebalance_children(s, info, vt, key); 4988c2ecf20Sopenharmony_ci if (r) 4998c2ecf20Sopenharmony_ci break; 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ci n = dm_block_data(shadow_current(s)); 5028c2ecf20Sopenharmony_ci if (le32_to_cpu(n->header.flags) & LEAF_NODE) 5038c2ecf20Sopenharmony_ci return do_leaf(n, key, index); 5048c2ecf20Sopenharmony_ci 5058c2ecf20Sopenharmony_ci i = lower_bound(n, key); 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_ci /* 5088c2ecf20Sopenharmony_ci * We know the key is present, or else 5098c2ecf20Sopenharmony_ci * rebalance_children would have returned 5108c2ecf20Sopenharmony_ci * -ENODATA 5118c2ecf20Sopenharmony_ci */ 5128c2ecf20Sopenharmony_ci root = value64(n, i); 5138c2ecf20Sopenharmony_ci } 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci return r; 5168c2ecf20Sopenharmony_ci} 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ciint dm_btree_remove(struct dm_btree_info *info, dm_block_t root, 5198c2ecf20Sopenharmony_ci uint64_t *keys, dm_block_t *new_root) 5208c2ecf20Sopenharmony_ci{ 5218c2ecf20Sopenharmony_ci unsigned level, last_level = info->levels - 1; 5228c2ecf20Sopenharmony_ci int index = 0, r = 0; 5238c2ecf20Sopenharmony_ci struct shadow_spine spine; 5248c2ecf20Sopenharmony_ci struct btree_node *n; 5258c2ecf20Sopenharmony_ci struct dm_btree_value_type le64_vt; 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci init_le64_type(info->tm, &le64_vt); 5288c2ecf20Sopenharmony_ci init_shadow_spine(&spine, info); 5298c2ecf20Sopenharmony_ci for (level = 0; level < info->levels; level++) { 5308c2ecf20Sopenharmony_ci r = remove_raw(&spine, info, 5318c2ecf20Sopenharmony_ci (level == last_level ? 5328c2ecf20Sopenharmony_ci &info->value_type : &le64_vt), 5338c2ecf20Sopenharmony_ci root, keys[level], (unsigned *)&index); 5348c2ecf20Sopenharmony_ci if (r < 0) 5358c2ecf20Sopenharmony_ci break; 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci n = dm_block_data(shadow_current(&spine)); 5388c2ecf20Sopenharmony_ci if (level != last_level) { 5398c2ecf20Sopenharmony_ci root = value64(n, index); 5408c2ecf20Sopenharmony_ci continue; 5418c2ecf20Sopenharmony_ci } 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ci BUG_ON(index < 0 || index >= le32_to_cpu(n->header.nr_entries)); 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci if (info->value_type.dec) 5468c2ecf20Sopenharmony_ci info->value_type.dec(info->value_type.context, 5478c2ecf20Sopenharmony_ci value_ptr(n, index)); 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci delete_at(n, index); 5508c2ecf20Sopenharmony_ci } 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_ci if (!r) 5538c2ecf20Sopenharmony_ci *new_root = shadow_root(&spine); 5548c2ecf20Sopenharmony_ci exit_shadow_spine(&spine); 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_ci return r; 5578c2ecf20Sopenharmony_ci} 5588c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_btree_remove); 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci/*----------------------------------------------------------------*/ 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_cistatic int remove_nearest(struct shadow_spine *s, struct dm_btree_info *info, 5638c2ecf20Sopenharmony_ci struct dm_btree_value_type *vt, dm_block_t root, 5648c2ecf20Sopenharmony_ci uint64_t key, int *index) 5658c2ecf20Sopenharmony_ci{ 5668c2ecf20Sopenharmony_ci int i = *index, r; 5678c2ecf20Sopenharmony_ci struct btree_node *n; 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci for (;;) { 5708c2ecf20Sopenharmony_ci r = shadow_step(s, root, vt); 5718c2ecf20Sopenharmony_ci if (r < 0) 5728c2ecf20Sopenharmony_ci break; 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci /* 5758c2ecf20Sopenharmony_ci * We have to patch up the parent node, ugly, but I don't 5768c2ecf20Sopenharmony_ci * see a way to do this automatically as part of the spine 5778c2ecf20Sopenharmony_ci * op. 5788c2ecf20Sopenharmony_ci */ 5798c2ecf20Sopenharmony_ci if (shadow_has_parent(s)) { 5808c2ecf20Sopenharmony_ci __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); 5818c2ecf20Sopenharmony_ci memcpy(value_ptr(dm_block_data(shadow_parent(s)), i), 5828c2ecf20Sopenharmony_ci &location, sizeof(__le64)); 5838c2ecf20Sopenharmony_ci } 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci n = dm_block_data(shadow_current(s)); 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ci if (le32_to_cpu(n->header.flags) & LEAF_NODE) { 5888c2ecf20Sopenharmony_ci *index = lower_bound(n, key); 5898c2ecf20Sopenharmony_ci return 0; 5908c2ecf20Sopenharmony_ci } 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_ci r = rebalance_children(s, info, vt, key); 5938c2ecf20Sopenharmony_ci if (r) 5948c2ecf20Sopenharmony_ci break; 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci n = dm_block_data(shadow_current(s)); 5978c2ecf20Sopenharmony_ci if (le32_to_cpu(n->header.flags) & LEAF_NODE) { 5988c2ecf20Sopenharmony_ci *index = lower_bound(n, key); 5998c2ecf20Sopenharmony_ci return 0; 6008c2ecf20Sopenharmony_ci } 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci i = lower_bound(n, key); 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci /* 6058c2ecf20Sopenharmony_ci * We know the key is present, or else 6068c2ecf20Sopenharmony_ci * rebalance_children would have returned 6078c2ecf20Sopenharmony_ci * -ENODATA 6088c2ecf20Sopenharmony_ci */ 6098c2ecf20Sopenharmony_ci root = value64(n, i); 6108c2ecf20Sopenharmony_ci } 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci return r; 6138c2ecf20Sopenharmony_ci} 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_cistatic int remove_one(struct dm_btree_info *info, dm_block_t root, 6168c2ecf20Sopenharmony_ci uint64_t *keys, uint64_t end_key, 6178c2ecf20Sopenharmony_ci dm_block_t *new_root, unsigned *nr_removed) 6188c2ecf20Sopenharmony_ci{ 6198c2ecf20Sopenharmony_ci unsigned level, last_level = info->levels - 1; 6208c2ecf20Sopenharmony_ci int index = 0, r = 0; 6218c2ecf20Sopenharmony_ci struct shadow_spine spine; 6228c2ecf20Sopenharmony_ci struct btree_node *n; 6238c2ecf20Sopenharmony_ci struct dm_btree_value_type le64_vt; 6248c2ecf20Sopenharmony_ci uint64_t k; 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci init_le64_type(info->tm, &le64_vt); 6278c2ecf20Sopenharmony_ci init_shadow_spine(&spine, info); 6288c2ecf20Sopenharmony_ci for (level = 0; level < last_level; level++) { 6298c2ecf20Sopenharmony_ci r = remove_raw(&spine, info, &le64_vt, 6308c2ecf20Sopenharmony_ci root, keys[level], (unsigned *) &index); 6318c2ecf20Sopenharmony_ci if (r < 0) 6328c2ecf20Sopenharmony_ci goto out; 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_ci n = dm_block_data(shadow_current(&spine)); 6358c2ecf20Sopenharmony_ci root = value64(n, index); 6368c2ecf20Sopenharmony_ci } 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci r = remove_nearest(&spine, info, &info->value_type, 6398c2ecf20Sopenharmony_ci root, keys[last_level], &index); 6408c2ecf20Sopenharmony_ci if (r < 0) 6418c2ecf20Sopenharmony_ci goto out; 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci n = dm_block_data(shadow_current(&spine)); 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_ci if (index < 0) 6468c2ecf20Sopenharmony_ci index = 0; 6478c2ecf20Sopenharmony_ci 6488c2ecf20Sopenharmony_ci if (index >= le32_to_cpu(n->header.nr_entries)) { 6498c2ecf20Sopenharmony_ci r = -ENODATA; 6508c2ecf20Sopenharmony_ci goto out; 6518c2ecf20Sopenharmony_ci } 6528c2ecf20Sopenharmony_ci 6538c2ecf20Sopenharmony_ci k = le64_to_cpu(n->keys[index]); 6548c2ecf20Sopenharmony_ci if (k >= keys[last_level] && k < end_key) { 6558c2ecf20Sopenharmony_ci if (info->value_type.dec) 6568c2ecf20Sopenharmony_ci info->value_type.dec(info->value_type.context, 6578c2ecf20Sopenharmony_ci value_ptr(n, index)); 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci delete_at(n, index); 6608c2ecf20Sopenharmony_ci keys[last_level] = k + 1ull; 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci } else 6638c2ecf20Sopenharmony_ci r = -ENODATA; 6648c2ecf20Sopenharmony_ci 6658c2ecf20Sopenharmony_ciout: 6668c2ecf20Sopenharmony_ci *new_root = shadow_root(&spine); 6678c2ecf20Sopenharmony_ci exit_shadow_spine(&spine); 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci return r; 6708c2ecf20Sopenharmony_ci} 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_ciint dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root, 6738c2ecf20Sopenharmony_ci uint64_t *first_key, uint64_t end_key, 6748c2ecf20Sopenharmony_ci dm_block_t *new_root, unsigned *nr_removed) 6758c2ecf20Sopenharmony_ci{ 6768c2ecf20Sopenharmony_ci int r; 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci *nr_removed = 0; 6798c2ecf20Sopenharmony_ci do { 6808c2ecf20Sopenharmony_ci r = remove_one(info, root, first_key, end_key, &root, nr_removed); 6818c2ecf20Sopenharmony_ci if (!r) 6828c2ecf20Sopenharmony_ci (*nr_removed)++; 6838c2ecf20Sopenharmony_ci } while (!r); 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci *new_root = root; 6868c2ecf20Sopenharmony_ci return r == -ENODATA ? 0 : r; 6878c2ecf20Sopenharmony_ci} 6888c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_btree_remove_leaves); 689