18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * lib/btree.c	- Simple In-memory B+Tree
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (c) 2007-2008 Joern Engel <joern@purestorage.com>
68c2ecf20Sopenharmony_ci * Bits and pieces stolen from Peter Zijlstra's code, which is
78c2ecf20Sopenharmony_ci * Copyright 2007, Red Hat Inc. Peter Zijlstra
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci * A relatively simple B+Tree implementation.  I have written it as a learning
128c2ecf20Sopenharmony_ci * exercise to understand how B+Trees work.  Turned out to be useful as well.
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * B+Trees can be used similar to Linux radix trees (which don't have anything
158c2ecf20Sopenharmony_ci * in common with textbook radix trees, beware).  Prerequisite for them working
168c2ecf20Sopenharmony_ci * well is that access to a random tree node is much faster than a large number
178c2ecf20Sopenharmony_ci * of operations within each node.
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci * Disks have fulfilled the prerequisite for a long time.  More recently DRAM
208c2ecf20Sopenharmony_ci * has gained similar properties, as memory access times, when measured in cpu
218c2ecf20Sopenharmony_ci * cycles, have increased.  Cacheline sizes have increased as well, which also
228c2ecf20Sopenharmony_ci * helps B+Trees.
238c2ecf20Sopenharmony_ci *
248c2ecf20Sopenharmony_ci * Compared to radix trees, B+Trees are more efficient when dealing with a
258c2ecf20Sopenharmony_ci * sparsely populated address space.  Between 25% and 50% of the memory is
268c2ecf20Sopenharmony_ci * occupied with valid pointers.  When densely populated, radix trees contain
278c2ecf20Sopenharmony_ci * ~98% pointers - hard to beat.  Very sparse radix trees contain only ~2%
288c2ecf20Sopenharmony_ci * pointers.
298c2ecf20Sopenharmony_ci *
308c2ecf20Sopenharmony_ci * This particular implementation stores pointers identified by a long value.
318c2ecf20Sopenharmony_ci * Storing NULL pointers is illegal, lookup will return NULL when no entry
328c2ecf20Sopenharmony_ci * was found.
338c2ecf20Sopenharmony_ci *
348c2ecf20Sopenharmony_ci * A tricks was used that is not commonly found in textbooks.  The lowest
358c2ecf20Sopenharmony_ci * values are to the right, not to the left.  All used slots within a node
368c2ecf20Sopenharmony_ci * are on the left, all unused slots contain NUL values.  Most operations
378c2ecf20Sopenharmony_ci * simply loop once over all slots and terminate on the first NUL.
388c2ecf20Sopenharmony_ci */
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci#include <linux/btree.h>
418c2ecf20Sopenharmony_ci#include <linux/cache.h>
428c2ecf20Sopenharmony_ci#include <linux/kernel.h>
438c2ecf20Sopenharmony_ci#include <linux/slab.h>
448c2ecf20Sopenharmony_ci#include <linux/module.h>
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci#define MAX(a, b) ((a) > (b) ? (a) : (b))
478c2ecf20Sopenharmony_ci#define NODESIZE MAX(L1_CACHE_BYTES, 128)
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_cistruct btree_geo {
508c2ecf20Sopenharmony_ci	int keylen;
518c2ecf20Sopenharmony_ci	int no_pairs;
528c2ecf20Sopenharmony_ci	int no_longs;
538c2ecf20Sopenharmony_ci};
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_cistruct btree_geo btree_geo32 = {
568c2ecf20Sopenharmony_ci	.keylen = 1,
578c2ecf20Sopenharmony_ci	.no_pairs = NODESIZE / sizeof(long) / 2,
588c2ecf20Sopenharmony_ci	.no_longs = NODESIZE / sizeof(long) / 2,
598c2ecf20Sopenharmony_ci};
608c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_geo32);
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci#define LONG_PER_U64 (64 / BITS_PER_LONG)
638c2ecf20Sopenharmony_cistruct btree_geo btree_geo64 = {
648c2ecf20Sopenharmony_ci	.keylen = LONG_PER_U64,
658c2ecf20Sopenharmony_ci	.no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64),
668c2ecf20Sopenharmony_ci	.no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)),
678c2ecf20Sopenharmony_ci};
688c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_geo64);
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_cistruct btree_geo btree_geo128 = {
718c2ecf20Sopenharmony_ci	.keylen = 2 * LONG_PER_U64,
728c2ecf20Sopenharmony_ci	.no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64),
738c2ecf20Sopenharmony_ci	.no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)),
748c2ecf20Sopenharmony_ci};
758c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_geo128);
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci#define MAX_KEYLEN	(2 * LONG_PER_U64)
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_cistatic struct kmem_cache *btree_cachep;
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_civoid *btree_alloc(gfp_t gfp_mask, void *pool_data)
828c2ecf20Sopenharmony_ci{
838c2ecf20Sopenharmony_ci	return kmem_cache_alloc(btree_cachep, gfp_mask);
848c2ecf20Sopenharmony_ci}
858c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_alloc);
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_civoid btree_free(void *element, void *pool_data)
888c2ecf20Sopenharmony_ci{
898c2ecf20Sopenharmony_ci	kmem_cache_free(btree_cachep, element);
908c2ecf20Sopenharmony_ci}
918c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_free);
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_cistatic unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
948c2ecf20Sopenharmony_ci{
958c2ecf20Sopenharmony_ci	unsigned long *node;
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	node = mempool_alloc(head->mempool, gfp);
988c2ecf20Sopenharmony_ci	if (likely(node))
998c2ecf20Sopenharmony_ci		memset(node, 0, NODESIZE);
1008c2ecf20Sopenharmony_ci	return node;
1018c2ecf20Sopenharmony_ci}
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_cistatic int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n)
1048c2ecf20Sopenharmony_ci{
1058c2ecf20Sopenharmony_ci	size_t i;
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	for (i = 0; i < n; i++) {
1088c2ecf20Sopenharmony_ci		if (l1[i] < l2[i])
1098c2ecf20Sopenharmony_ci			return -1;
1108c2ecf20Sopenharmony_ci		if (l1[i] > l2[i])
1118c2ecf20Sopenharmony_ci			return 1;
1128c2ecf20Sopenharmony_ci	}
1138c2ecf20Sopenharmony_ci	return 0;
1148c2ecf20Sopenharmony_ci}
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_cistatic unsigned long *longcpy(unsigned long *dest, const unsigned long *src,
1178c2ecf20Sopenharmony_ci		size_t n)
1188c2ecf20Sopenharmony_ci{
1198c2ecf20Sopenharmony_ci	size_t i;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	for (i = 0; i < n; i++)
1228c2ecf20Sopenharmony_ci		dest[i] = src[i];
1238c2ecf20Sopenharmony_ci	return dest;
1248c2ecf20Sopenharmony_ci}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_cistatic unsigned long *longset(unsigned long *s, unsigned long c, size_t n)
1278c2ecf20Sopenharmony_ci{
1288c2ecf20Sopenharmony_ci	size_t i;
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	for (i = 0; i < n; i++)
1318c2ecf20Sopenharmony_ci		s[i] = c;
1328c2ecf20Sopenharmony_ci	return s;
1338c2ecf20Sopenharmony_ci}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_cistatic void dec_key(struct btree_geo *geo, unsigned long *key)
1368c2ecf20Sopenharmony_ci{
1378c2ecf20Sopenharmony_ci	unsigned long val;
1388c2ecf20Sopenharmony_ci	int i;
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	for (i = geo->keylen - 1; i >= 0; i--) {
1418c2ecf20Sopenharmony_ci		val = key[i];
1428c2ecf20Sopenharmony_ci		key[i] = val - 1;
1438c2ecf20Sopenharmony_ci		if (val)
1448c2ecf20Sopenharmony_ci			break;
1458c2ecf20Sopenharmony_ci	}
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_cistatic unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n)
1498c2ecf20Sopenharmony_ci{
1508c2ecf20Sopenharmony_ci	return &node[n * geo->keylen];
1518c2ecf20Sopenharmony_ci}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_cistatic void *bval(struct btree_geo *geo, unsigned long *node, int n)
1548c2ecf20Sopenharmony_ci{
1558c2ecf20Sopenharmony_ci	return (void *)node[geo->no_longs + n];
1568c2ecf20Sopenharmony_ci}
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_cistatic void setkey(struct btree_geo *geo, unsigned long *node, int n,
1598c2ecf20Sopenharmony_ci		   unsigned long *key)
1608c2ecf20Sopenharmony_ci{
1618c2ecf20Sopenharmony_ci	longcpy(bkey(geo, node, n), key, geo->keylen);
1628c2ecf20Sopenharmony_ci}
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_cistatic void setval(struct btree_geo *geo, unsigned long *node, int n,
1658c2ecf20Sopenharmony_ci		   void *val)
1668c2ecf20Sopenharmony_ci{
1678c2ecf20Sopenharmony_ci	node[geo->no_longs + n] = (unsigned long) val;
1688c2ecf20Sopenharmony_ci}
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_cistatic void clearpair(struct btree_geo *geo, unsigned long *node, int n)
1718c2ecf20Sopenharmony_ci{
1728c2ecf20Sopenharmony_ci	longset(bkey(geo, node, n), 0, geo->keylen);
1738c2ecf20Sopenharmony_ci	node[geo->no_longs + n] = 0;
1748c2ecf20Sopenharmony_ci}
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_cistatic inline void __btree_init(struct btree_head *head)
1778c2ecf20Sopenharmony_ci{
1788c2ecf20Sopenharmony_ci	head->node = NULL;
1798c2ecf20Sopenharmony_ci	head->height = 0;
1808c2ecf20Sopenharmony_ci}
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_civoid btree_init_mempool(struct btree_head *head, mempool_t *mempool)
1838c2ecf20Sopenharmony_ci{
1848c2ecf20Sopenharmony_ci	__btree_init(head);
1858c2ecf20Sopenharmony_ci	head->mempool = mempool;
1868c2ecf20Sopenharmony_ci}
1878c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_init_mempool);
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ciint btree_init(struct btree_head *head)
1908c2ecf20Sopenharmony_ci{
1918c2ecf20Sopenharmony_ci	__btree_init(head);
1928c2ecf20Sopenharmony_ci	head->mempool = mempool_create(0, btree_alloc, btree_free, NULL);
1938c2ecf20Sopenharmony_ci	if (!head->mempool)
1948c2ecf20Sopenharmony_ci		return -ENOMEM;
1958c2ecf20Sopenharmony_ci	return 0;
1968c2ecf20Sopenharmony_ci}
1978c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_init);
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_civoid btree_destroy(struct btree_head *head)
2008c2ecf20Sopenharmony_ci{
2018c2ecf20Sopenharmony_ci	mempool_free(head->node, head->mempool);
2028c2ecf20Sopenharmony_ci	mempool_destroy(head->mempool);
2038c2ecf20Sopenharmony_ci	head->mempool = NULL;
2048c2ecf20Sopenharmony_ci}
2058c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_destroy);
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_civoid *btree_last(struct btree_head *head, struct btree_geo *geo,
2088c2ecf20Sopenharmony_ci		 unsigned long *key)
2098c2ecf20Sopenharmony_ci{
2108c2ecf20Sopenharmony_ci	int height = head->height;
2118c2ecf20Sopenharmony_ci	unsigned long *node = head->node;
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	if (height == 0)
2148c2ecf20Sopenharmony_ci		return NULL;
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	for ( ; height > 1; height--)
2178c2ecf20Sopenharmony_ci		node = bval(geo, node, 0);
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	longcpy(key, bkey(geo, node, 0), geo->keylen);
2208c2ecf20Sopenharmony_ci	return bval(geo, node, 0);
2218c2ecf20Sopenharmony_ci}
2228c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_last);
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_cistatic int keycmp(struct btree_geo *geo, unsigned long *node, int pos,
2258c2ecf20Sopenharmony_ci		  unsigned long *key)
2268c2ecf20Sopenharmony_ci{
2278c2ecf20Sopenharmony_ci	return longcmp(bkey(geo, node, pos), key, geo->keylen);
2288c2ecf20Sopenharmony_ci}
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_cistatic int keyzero(struct btree_geo *geo, unsigned long *key)
2318c2ecf20Sopenharmony_ci{
2328c2ecf20Sopenharmony_ci	int i;
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci	for (i = 0; i < geo->keylen; i++)
2358c2ecf20Sopenharmony_ci		if (key[i])
2368c2ecf20Sopenharmony_ci			return 0;
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	return 1;
2398c2ecf20Sopenharmony_ci}
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_civoid *btree_lookup(struct btree_head *head, struct btree_geo *geo,
2428c2ecf20Sopenharmony_ci		unsigned long *key)
2438c2ecf20Sopenharmony_ci{
2448c2ecf20Sopenharmony_ci	int i, height = head->height;
2458c2ecf20Sopenharmony_ci	unsigned long *node = head->node;
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci	if (height == 0)
2488c2ecf20Sopenharmony_ci		return NULL;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci	for ( ; height > 1; height--) {
2518c2ecf20Sopenharmony_ci		for (i = 0; i < geo->no_pairs; i++)
2528c2ecf20Sopenharmony_ci			if (keycmp(geo, node, i, key) <= 0)
2538c2ecf20Sopenharmony_ci				break;
2548c2ecf20Sopenharmony_ci		if (i == geo->no_pairs)
2558c2ecf20Sopenharmony_ci			return NULL;
2568c2ecf20Sopenharmony_ci		node = bval(geo, node, i);
2578c2ecf20Sopenharmony_ci		if (!node)
2588c2ecf20Sopenharmony_ci			return NULL;
2598c2ecf20Sopenharmony_ci	}
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci	if (!node)
2628c2ecf20Sopenharmony_ci		return NULL;
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci	for (i = 0; i < geo->no_pairs; i++)
2658c2ecf20Sopenharmony_ci		if (keycmp(geo, node, i, key) == 0)
2668c2ecf20Sopenharmony_ci			return bval(geo, node, i);
2678c2ecf20Sopenharmony_ci	return NULL;
2688c2ecf20Sopenharmony_ci}
2698c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_lookup);
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ciint btree_update(struct btree_head *head, struct btree_geo *geo,
2728c2ecf20Sopenharmony_ci		 unsigned long *key, void *val)
2738c2ecf20Sopenharmony_ci{
2748c2ecf20Sopenharmony_ci	int i, height = head->height;
2758c2ecf20Sopenharmony_ci	unsigned long *node = head->node;
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci	if (height == 0)
2788c2ecf20Sopenharmony_ci		return -ENOENT;
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci	for ( ; height > 1; height--) {
2818c2ecf20Sopenharmony_ci		for (i = 0; i < geo->no_pairs; i++)
2828c2ecf20Sopenharmony_ci			if (keycmp(geo, node, i, key) <= 0)
2838c2ecf20Sopenharmony_ci				break;
2848c2ecf20Sopenharmony_ci		if (i == geo->no_pairs)
2858c2ecf20Sopenharmony_ci			return -ENOENT;
2868c2ecf20Sopenharmony_ci		node = bval(geo, node, i);
2878c2ecf20Sopenharmony_ci		if (!node)
2888c2ecf20Sopenharmony_ci			return -ENOENT;
2898c2ecf20Sopenharmony_ci	}
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	if (!node)
2928c2ecf20Sopenharmony_ci		return -ENOENT;
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_ci	for (i = 0; i < geo->no_pairs; i++)
2958c2ecf20Sopenharmony_ci		if (keycmp(geo, node, i, key) == 0) {
2968c2ecf20Sopenharmony_ci			setval(geo, node, i, val);
2978c2ecf20Sopenharmony_ci			return 0;
2988c2ecf20Sopenharmony_ci		}
2998c2ecf20Sopenharmony_ci	return -ENOENT;
3008c2ecf20Sopenharmony_ci}
3018c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_update);
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci/*
3048c2ecf20Sopenharmony_ci * Usually this function is quite similar to normal lookup.  But the key of
3058c2ecf20Sopenharmony_ci * a parent node may be smaller than the smallest key of all its siblings.
3068c2ecf20Sopenharmony_ci * In such a case we cannot just return NULL, as we have only proven that no
3078c2ecf20Sopenharmony_ci * key smaller than __key, but larger than this parent key exists.
3088c2ecf20Sopenharmony_ci * So we set __key to the parent key and retry.  We have to use the smallest
3098c2ecf20Sopenharmony_ci * such parent key, which is the last parent key we encountered.
3108c2ecf20Sopenharmony_ci */
3118c2ecf20Sopenharmony_civoid *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
3128c2ecf20Sopenharmony_ci		     unsigned long *__key)
3138c2ecf20Sopenharmony_ci{
3148c2ecf20Sopenharmony_ci	int i, height;
3158c2ecf20Sopenharmony_ci	unsigned long *node, *oldnode;
3168c2ecf20Sopenharmony_ci	unsigned long *retry_key = NULL, key[MAX_KEYLEN];
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	if (keyzero(geo, __key))
3198c2ecf20Sopenharmony_ci		return NULL;
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	if (head->height == 0)
3228c2ecf20Sopenharmony_ci		return NULL;
3238c2ecf20Sopenharmony_ci	longcpy(key, __key, geo->keylen);
3248c2ecf20Sopenharmony_ciretry:
3258c2ecf20Sopenharmony_ci	dec_key(geo, key);
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	node = head->node;
3288c2ecf20Sopenharmony_ci	for (height = head->height ; height > 1; height--) {
3298c2ecf20Sopenharmony_ci		for (i = 0; i < geo->no_pairs; i++)
3308c2ecf20Sopenharmony_ci			if (keycmp(geo, node, i, key) <= 0)
3318c2ecf20Sopenharmony_ci				break;
3328c2ecf20Sopenharmony_ci		if (i == geo->no_pairs)
3338c2ecf20Sopenharmony_ci			goto miss;
3348c2ecf20Sopenharmony_ci		oldnode = node;
3358c2ecf20Sopenharmony_ci		node = bval(geo, node, i);
3368c2ecf20Sopenharmony_ci		if (!node)
3378c2ecf20Sopenharmony_ci			goto miss;
3388c2ecf20Sopenharmony_ci		retry_key = bkey(geo, oldnode, i);
3398c2ecf20Sopenharmony_ci	}
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	if (!node)
3428c2ecf20Sopenharmony_ci		goto miss;
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	for (i = 0; i < geo->no_pairs; i++) {
3458c2ecf20Sopenharmony_ci		if (keycmp(geo, node, i, key) <= 0) {
3468c2ecf20Sopenharmony_ci			if (bval(geo, node, i)) {
3478c2ecf20Sopenharmony_ci				longcpy(__key, bkey(geo, node, i), geo->keylen);
3488c2ecf20Sopenharmony_ci				return bval(geo, node, i);
3498c2ecf20Sopenharmony_ci			} else
3508c2ecf20Sopenharmony_ci				goto miss;
3518c2ecf20Sopenharmony_ci		}
3528c2ecf20Sopenharmony_ci	}
3538c2ecf20Sopenharmony_cimiss:
3548c2ecf20Sopenharmony_ci	if (retry_key) {
3558c2ecf20Sopenharmony_ci		longcpy(key, retry_key, geo->keylen);
3568c2ecf20Sopenharmony_ci		retry_key = NULL;
3578c2ecf20Sopenharmony_ci		goto retry;
3588c2ecf20Sopenharmony_ci	}
3598c2ecf20Sopenharmony_ci	return NULL;
3608c2ecf20Sopenharmony_ci}
3618c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_get_prev);
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_cistatic int getpos(struct btree_geo *geo, unsigned long *node,
3648c2ecf20Sopenharmony_ci		unsigned long *key)
3658c2ecf20Sopenharmony_ci{
3668c2ecf20Sopenharmony_ci	int i;
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci	for (i = 0; i < geo->no_pairs; i++) {
3698c2ecf20Sopenharmony_ci		if (keycmp(geo, node, i, key) <= 0)
3708c2ecf20Sopenharmony_ci			break;
3718c2ecf20Sopenharmony_ci	}
3728c2ecf20Sopenharmony_ci	return i;
3738c2ecf20Sopenharmony_ci}
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_cistatic int getfill(struct btree_geo *geo, unsigned long *node, int start)
3768c2ecf20Sopenharmony_ci{
3778c2ecf20Sopenharmony_ci	int i;
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci	for (i = start; i < geo->no_pairs; i++)
3808c2ecf20Sopenharmony_ci		if (!bval(geo, node, i))
3818c2ecf20Sopenharmony_ci			break;
3828c2ecf20Sopenharmony_ci	return i;
3838c2ecf20Sopenharmony_ci}
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_ci/*
3868c2ecf20Sopenharmony_ci * locate the correct leaf node in the btree
3878c2ecf20Sopenharmony_ci */
3888c2ecf20Sopenharmony_cistatic unsigned long *find_level(struct btree_head *head, struct btree_geo *geo,
3898c2ecf20Sopenharmony_ci		unsigned long *key, int level)
3908c2ecf20Sopenharmony_ci{
3918c2ecf20Sopenharmony_ci	unsigned long *node = head->node;
3928c2ecf20Sopenharmony_ci	int i, height;
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	for (height = head->height; height > level; height--) {
3958c2ecf20Sopenharmony_ci		for (i = 0; i < geo->no_pairs; i++)
3968c2ecf20Sopenharmony_ci			if (keycmp(geo, node, i, key) <= 0)
3978c2ecf20Sopenharmony_ci				break;
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci		if ((i == geo->no_pairs) || !bval(geo, node, i)) {
4008c2ecf20Sopenharmony_ci			/* right-most key is too large, update it */
4018c2ecf20Sopenharmony_ci			/* FIXME: If the right-most key on higher levels is
4028c2ecf20Sopenharmony_ci			 * always zero, this wouldn't be necessary. */
4038c2ecf20Sopenharmony_ci			i--;
4048c2ecf20Sopenharmony_ci			setkey(geo, node, i, key);
4058c2ecf20Sopenharmony_ci		}
4068c2ecf20Sopenharmony_ci		BUG_ON(i < 0);
4078c2ecf20Sopenharmony_ci		node = bval(geo, node, i);
4088c2ecf20Sopenharmony_ci	}
4098c2ecf20Sopenharmony_ci	BUG_ON(!node);
4108c2ecf20Sopenharmony_ci	return node;
4118c2ecf20Sopenharmony_ci}
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_cistatic int btree_grow(struct btree_head *head, struct btree_geo *geo,
4148c2ecf20Sopenharmony_ci		      gfp_t gfp)
4158c2ecf20Sopenharmony_ci{
4168c2ecf20Sopenharmony_ci	unsigned long *node;
4178c2ecf20Sopenharmony_ci	int fill;
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci	node = btree_node_alloc(head, gfp);
4208c2ecf20Sopenharmony_ci	if (!node)
4218c2ecf20Sopenharmony_ci		return -ENOMEM;
4228c2ecf20Sopenharmony_ci	if (head->node) {
4238c2ecf20Sopenharmony_ci		fill = getfill(geo, head->node, 0);
4248c2ecf20Sopenharmony_ci		setkey(geo, node, 0, bkey(geo, head->node, fill - 1));
4258c2ecf20Sopenharmony_ci		setval(geo, node, 0, head->node);
4268c2ecf20Sopenharmony_ci	}
4278c2ecf20Sopenharmony_ci	head->node = node;
4288c2ecf20Sopenharmony_ci	head->height++;
4298c2ecf20Sopenharmony_ci	return 0;
4308c2ecf20Sopenharmony_ci}
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_cistatic void btree_shrink(struct btree_head *head, struct btree_geo *geo)
4338c2ecf20Sopenharmony_ci{
4348c2ecf20Sopenharmony_ci	unsigned long *node;
4358c2ecf20Sopenharmony_ci	int fill;
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci	if (head->height <= 1)
4388c2ecf20Sopenharmony_ci		return;
4398c2ecf20Sopenharmony_ci
4408c2ecf20Sopenharmony_ci	node = head->node;
4418c2ecf20Sopenharmony_ci	fill = getfill(geo, node, 0);
4428c2ecf20Sopenharmony_ci	BUG_ON(fill > 1);
4438c2ecf20Sopenharmony_ci	head->node = bval(geo, node, 0);
4448c2ecf20Sopenharmony_ci	head->height--;
4458c2ecf20Sopenharmony_ci	mempool_free(node, head->mempool);
4468c2ecf20Sopenharmony_ci}
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_cistatic int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
4498c2ecf20Sopenharmony_ci			      unsigned long *key, void *val, int level,
4508c2ecf20Sopenharmony_ci			      gfp_t gfp)
4518c2ecf20Sopenharmony_ci{
4528c2ecf20Sopenharmony_ci	unsigned long *node;
4538c2ecf20Sopenharmony_ci	int i, pos, fill, err;
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci	BUG_ON(!val);
4568c2ecf20Sopenharmony_ci	if (head->height < level) {
4578c2ecf20Sopenharmony_ci		err = btree_grow(head, geo, gfp);
4588c2ecf20Sopenharmony_ci		if (err)
4598c2ecf20Sopenharmony_ci			return err;
4608c2ecf20Sopenharmony_ci	}
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ciretry:
4638c2ecf20Sopenharmony_ci	node = find_level(head, geo, key, level);
4648c2ecf20Sopenharmony_ci	pos = getpos(geo, node, key);
4658c2ecf20Sopenharmony_ci	fill = getfill(geo, node, pos);
4668c2ecf20Sopenharmony_ci	/* two identical keys are not allowed */
4678c2ecf20Sopenharmony_ci	BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0);
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	if (fill == geo->no_pairs) {
4708c2ecf20Sopenharmony_ci		/* need to split node */
4718c2ecf20Sopenharmony_ci		unsigned long *new;
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci		new = btree_node_alloc(head, gfp);
4748c2ecf20Sopenharmony_ci		if (!new)
4758c2ecf20Sopenharmony_ci			return -ENOMEM;
4768c2ecf20Sopenharmony_ci		err = btree_insert_level(head, geo,
4778c2ecf20Sopenharmony_ci				bkey(geo, node, fill / 2 - 1),
4788c2ecf20Sopenharmony_ci				new, level + 1, gfp);
4798c2ecf20Sopenharmony_ci		if (err) {
4808c2ecf20Sopenharmony_ci			mempool_free(new, head->mempool);
4818c2ecf20Sopenharmony_ci			return err;
4828c2ecf20Sopenharmony_ci		}
4838c2ecf20Sopenharmony_ci		for (i = 0; i < fill / 2; i++) {
4848c2ecf20Sopenharmony_ci			setkey(geo, new, i, bkey(geo, node, i));
4858c2ecf20Sopenharmony_ci			setval(geo, new, i, bval(geo, node, i));
4868c2ecf20Sopenharmony_ci			setkey(geo, node, i, bkey(geo, node, i + fill / 2));
4878c2ecf20Sopenharmony_ci			setval(geo, node, i, bval(geo, node, i + fill / 2));
4888c2ecf20Sopenharmony_ci			clearpair(geo, node, i + fill / 2);
4898c2ecf20Sopenharmony_ci		}
4908c2ecf20Sopenharmony_ci		if (fill & 1) {
4918c2ecf20Sopenharmony_ci			setkey(geo, node, i, bkey(geo, node, fill - 1));
4928c2ecf20Sopenharmony_ci			setval(geo, node, i, bval(geo, node, fill - 1));
4938c2ecf20Sopenharmony_ci			clearpair(geo, node, fill - 1);
4948c2ecf20Sopenharmony_ci		}
4958c2ecf20Sopenharmony_ci		goto retry;
4968c2ecf20Sopenharmony_ci	}
4978c2ecf20Sopenharmony_ci	BUG_ON(fill >= geo->no_pairs);
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci	/* shift and insert */
5008c2ecf20Sopenharmony_ci	for (i = fill; i > pos; i--) {
5018c2ecf20Sopenharmony_ci		setkey(geo, node, i, bkey(geo, node, i - 1));
5028c2ecf20Sopenharmony_ci		setval(geo, node, i, bval(geo, node, i - 1));
5038c2ecf20Sopenharmony_ci	}
5048c2ecf20Sopenharmony_ci	setkey(geo, node, pos, key);
5058c2ecf20Sopenharmony_ci	setval(geo, node, pos, val);
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci	return 0;
5088c2ecf20Sopenharmony_ci}
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ciint btree_insert(struct btree_head *head, struct btree_geo *geo,
5118c2ecf20Sopenharmony_ci		unsigned long *key, void *val, gfp_t gfp)
5128c2ecf20Sopenharmony_ci{
5138c2ecf20Sopenharmony_ci	BUG_ON(!val);
5148c2ecf20Sopenharmony_ci	return btree_insert_level(head, geo, key, val, 1, gfp);
5158c2ecf20Sopenharmony_ci}
5168c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_insert);
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_cistatic void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
5198c2ecf20Sopenharmony_ci		unsigned long *key, int level);
5208c2ecf20Sopenharmony_cistatic void merge(struct btree_head *head, struct btree_geo *geo, int level,
5218c2ecf20Sopenharmony_ci		unsigned long *left, int lfill,
5228c2ecf20Sopenharmony_ci		unsigned long *right, int rfill,
5238c2ecf20Sopenharmony_ci		unsigned long *parent, int lpos)
5248c2ecf20Sopenharmony_ci{
5258c2ecf20Sopenharmony_ci	int i;
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	for (i = 0; i < rfill; i++) {
5288c2ecf20Sopenharmony_ci		/* Move all keys to the left */
5298c2ecf20Sopenharmony_ci		setkey(geo, left, lfill + i, bkey(geo, right, i));
5308c2ecf20Sopenharmony_ci		setval(geo, left, lfill + i, bval(geo, right, i));
5318c2ecf20Sopenharmony_ci	}
5328c2ecf20Sopenharmony_ci	/* Exchange left and right child in parent */
5338c2ecf20Sopenharmony_ci	setval(geo, parent, lpos, right);
5348c2ecf20Sopenharmony_ci	setval(geo, parent, lpos + 1, left);
5358c2ecf20Sopenharmony_ci	/* Remove left (formerly right) child from parent */
5368c2ecf20Sopenharmony_ci	btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1);
5378c2ecf20Sopenharmony_ci	mempool_free(right, head->mempool);
5388c2ecf20Sopenharmony_ci}
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_cistatic void rebalance(struct btree_head *head, struct btree_geo *geo,
5418c2ecf20Sopenharmony_ci		unsigned long *key, int level, unsigned long *child, int fill)
5428c2ecf20Sopenharmony_ci{
5438c2ecf20Sopenharmony_ci	unsigned long *parent, *left = NULL, *right = NULL;
5448c2ecf20Sopenharmony_ci	int i, no_left, no_right;
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_ci	if (fill == 0) {
5478c2ecf20Sopenharmony_ci		/* Because we don't steal entries from a neighbour, this case
5488c2ecf20Sopenharmony_ci		 * can happen.  Parent node contains a single child, this
5498c2ecf20Sopenharmony_ci		 * node, so merging with a sibling never happens.
5508c2ecf20Sopenharmony_ci		 */
5518c2ecf20Sopenharmony_ci		btree_remove_level(head, geo, key, level + 1);
5528c2ecf20Sopenharmony_ci		mempool_free(child, head->mempool);
5538c2ecf20Sopenharmony_ci		return;
5548c2ecf20Sopenharmony_ci	}
5558c2ecf20Sopenharmony_ci
5568c2ecf20Sopenharmony_ci	parent = find_level(head, geo, key, level + 1);
5578c2ecf20Sopenharmony_ci	i = getpos(geo, parent, key);
5588c2ecf20Sopenharmony_ci	BUG_ON(bval(geo, parent, i) != child);
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	if (i > 0) {
5618c2ecf20Sopenharmony_ci		left = bval(geo, parent, i - 1);
5628c2ecf20Sopenharmony_ci		no_left = getfill(geo, left, 0);
5638c2ecf20Sopenharmony_ci		if (fill + no_left <= geo->no_pairs) {
5648c2ecf20Sopenharmony_ci			merge(head, geo, level,
5658c2ecf20Sopenharmony_ci					left, no_left,
5668c2ecf20Sopenharmony_ci					child, fill,
5678c2ecf20Sopenharmony_ci					parent, i - 1);
5688c2ecf20Sopenharmony_ci			return;
5698c2ecf20Sopenharmony_ci		}
5708c2ecf20Sopenharmony_ci	}
5718c2ecf20Sopenharmony_ci	if (i + 1 < getfill(geo, parent, i)) {
5728c2ecf20Sopenharmony_ci		right = bval(geo, parent, i + 1);
5738c2ecf20Sopenharmony_ci		no_right = getfill(geo, right, 0);
5748c2ecf20Sopenharmony_ci		if (fill + no_right <= geo->no_pairs) {
5758c2ecf20Sopenharmony_ci			merge(head, geo, level,
5768c2ecf20Sopenharmony_ci					child, fill,
5778c2ecf20Sopenharmony_ci					right, no_right,
5788c2ecf20Sopenharmony_ci					parent, i);
5798c2ecf20Sopenharmony_ci			return;
5808c2ecf20Sopenharmony_ci		}
5818c2ecf20Sopenharmony_ci	}
5828c2ecf20Sopenharmony_ci	/*
5838c2ecf20Sopenharmony_ci	 * We could also try to steal one entry from the left or right
5848c2ecf20Sopenharmony_ci	 * neighbor.  By not doing so we changed the invariant from
5858c2ecf20Sopenharmony_ci	 * "all nodes are at least half full" to "no two neighboring
5868c2ecf20Sopenharmony_ci	 * nodes can be merged".  Which means that the average fill of
5878c2ecf20Sopenharmony_ci	 * all nodes is still half or better.
5888c2ecf20Sopenharmony_ci	 */
5898c2ecf20Sopenharmony_ci}
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_cistatic void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
5928c2ecf20Sopenharmony_ci		unsigned long *key, int level)
5938c2ecf20Sopenharmony_ci{
5948c2ecf20Sopenharmony_ci	unsigned long *node;
5958c2ecf20Sopenharmony_ci	int i, pos, fill;
5968c2ecf20Sopenharmony_ci	void *ret;
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci	if (level > head->height) {
5998c2ecf20Sopenharmony_ci		/* we recursed all the way up */
6008c2ecf20Sopenharmony_ci		head->height = 0;
6018c2ecf20Sopenharmony_ci		head->node = NULL;
6028c2ecf20Sopenharmony_ci		return NULL;
6038c2ecf20Sopenharmony_ci	}
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	node = find_level(head, geo, key, level);
6068c2ecf20Sopenharmony_ci	pos = getpos(geo, node, key);
6078c2ecf20Sopenharmony_ci	fill = getfill(geo, node, pos);
6088c2ecf20Sopenharmony_ci	if ((level == 1) && (keycmp(geo, node, pos, key) != 0))
6098c2ecf20Sopenharmony_ci		return NULL;
6108c2ecf20Sopenharmony_ci	ret = bval(geo, node, pos);
6118c2ecf20Sopenharmony_ci
6128c2ecf20Sopenharmony_ci	/* remove and shift */
6138c2ecf20Sopenharmony_ci	for (i = pos; i < fill - 1; i++) {
6148c2ecf20Sopenharmony_ci		setkey(geo, node, i, bkey(geo, node, i + 1));
6158c2ecf20Sopenharmony_ci		setval(geo, node, i, bval(geo, node, i + 1));
6168c2ecf20Sopenharmony_ci	}
6178c2ecf20Sopenharmony_ci	clearpair(geo, node, fill - 1);
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci	if (fill - 1 < geo->no_pairs / 2) {
6208c2ecf20Sopenharmony_ci		if (level < head->height)
6218c2ecf20Sopenharmony_ci			rebalance(head, geo, key, level, node, fill - 1);
6228c2ecf20Sopenharmony_ci		else if (fill - 1 == 1)
6238c2ecf20Sopenharmony_ci			btree_shrink(head, geo);
6248c2ecf20Sopenharmony_ci	}
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci	return ret;
6278c2ecf20Sopenharmony_ci}
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_civoid *btree_remove(struct btree_head *head, struct btree_geo *geo,
6308c2ecf20Sopenharmony_ci		unsigned long *key)
6318c2ecf20Sopenharmony_ci{
6328c2ecf20Sopenharmony_ci	if (head->height == 0)
6338c2ecf20Sopenharmony_ci		return NULL;
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci	return btree_remove_level(head, geo, key, 1);
6368c2ecf20Sopenharmony_ci}
6378c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_remove);
6388c2ecf20Sopenharmony_ci
6398c2ecf20Sopenharmony_ciint btree_merge(struct btree_head *target, struct btree_head *victim,
6408c2ecf20Sopenharmony_ci		struct btree_geo *geo, gfp_t gfp)
6418c2ecf20Sopenharmony_ci{
6428c2ecf20Sopenharmony_ci	unsigned long key[MAX_KEYLEN];
6438c2ecf20Sopenharmony_ci	unsigned long dup[MAX_KEYLEN];
6448c2ecf20Sopenharmony_ci	void *val;
6458c2ecf20Sopenharmony_ci	int err;
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci	BUG_ON(target == victim);
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ci	if (!(target->node)) {
6508c2ecf20Sopenharmony_ci		/* target is empty, just copy fields over */
6518c2ecf20Sopenharmony_ci		target->node = victim->node;
6528c2ecf20Sopenharmony_ci		target->height = victim->height;
6538c2ecf20Sopenharmony_ci		__btree_init(victim);
6548c2ecf20Sopenharmony_ci		return 0;
6558c2ecf20Sopenharmony_ci	}
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci	/* TODO: This needs some optimizations.  Currently we do three tree
6588c2ecf20Sopenharmony_ci	 * walks to remove a single object from the victim.
6598c2ecf20Sopenharmony_ci	 */
6608c2ecf20Sopenharmony_ci	for (;;) {
6618c2ecf20Sopenharmony_ci		if (!btree_last(victim, geo, key))
6628c2ecf20Sopenharmony_ci			break;
6638c2ecf20Sopenharmony_ci		val = btree_lookup(victim, geo, key);
6648c2ecf20Sopenharmony_ci		err = btree_insert(target, geo, key, val, gfp);
6658c2ecf20Sopenharmony_ci		if (err)
6668c2ecf20Sopenharmony_ci			return err;
6678c2ecf20Sopenharmony_ci		/* We must make a copy of the key, as the original will get
6688c2ecf20Sopenharmony_ci		 * mangled inside btree_remove. */
6698c2ecf20Sopenharmony_ci		longcpy(dup, key, geo->keylen);
6708c2ecf20Sopenharmony_ci		btree_remove(victim, geo, dup);
6718c2ecf20Sopenharmony_ci	}
6728c2ecf20Sopenharmony_ci	return 0;
6738c2ecf20Sopenharmony_ci}
6748c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_merge);
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_cistatic size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo,
6778c2ecf20Sopenharmony_ci			       unsigned long *node, unsigned long opaque,
6788c2ecf20Sopenharmony_ci			       void (*func)(void *elem, unsigned long opaque,
6798c2ecf20Sopenharmony_ci					    unsigned long *key, size_t index,
6808c2ecf20Sopenharmony_ci					    void *func2),
6818c2ecf20Sopenharmony_ci			       void *func2, int reap, int height, size_t count)
6828c2ecf20Sopenharmony_ci{
6838c2ecf20Sopenharmony_ci	int i;
6848c2ecf20Sopenharmony_ci	unsigned long *child;
6858c2ecf20Sopenharmony_ci
6868c2ecf20Sopenharmony_ci	for (i = 0; i < geo->no_pairs; i++) {
6878c2ecf20Sopenharmony_ci		child = bval(geo, node, i);
6888c2ecf20Sopenharmony_ci		if (!child)
6898c2ecf20Sopenharmony_ci			break;
6908c2ecf20Sopenharmony_ci		if (height > 1)
6918c2ecf20Sopenharmony_ci			count = __btree_for_each(head, geo, child, opaque,
6928c2ecf20Sopenharmony_ci					func, func2, reap, height - 1, count);
6938c2ecf20Sopenharmony_ci		else
6948c2ecf20Sopenharmony_ci			func(child, opaque, bkey(geo, node, i), count++,
6958c2ecf20Sopenharmony_ci					func2);
6968c2ecf20Sopenharmony_ci	}
6978c2ecf20Sopenharmony_ci	if (reap)
6988c2ecf20Sopenharmony_ci		mempool_free(node, head->mempool);
6998c2ecf20Sopenharmony_ci	return count;
7008c2ecf20Sopenharmony_ci}
7018c2ecf20Sopenharmony_ci
7028c2ecf20Sopenharmony_cistatic void empty(void *elem, unsigned long opaque, unsigned long *key,
7038c2ecf20Sopenharmony_ci		  size_t index, void *func2)
7048c2ecf20Sopenharmony_ci{
7058c2ecf20Sopenharmony_ci}
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_civoid visitorl(void *elem, unsigned long opaque, unsigned long *key,
7088c2ecf20Sopenharmony_ci	      size_t index, void *__func)
7098c2ecf20Sopenharmony_ci{
7108c2ecf20Sopenharmony_ci	visitorl_t func = __func;
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	func(elem, opaque, *key, index);
7138c2ecf20Sopenharmony_ci}
7148c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(visitorl);
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_civoid visitor32(void *elem, unsigned long opaque, unsigned long *__key,
7178c2ecf20Sopenharmony_ci	       size_t index, void *__func)
7188c2ecf20Sopenharmony_ci{
7198c2ecf20Sopenharmony_ci	visitor32_t func = __func;
7208c2ecf20Sopenharmony_ci	u32 *key = (void *)__key;
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci	func(elem, opaque, *key, index);
7238c2ecf20Sopenharmony_ci}
7248c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(visitor32);
7258c2ecf20Sopenharmony_ci
7268c2ecf20Sopenharmony_civoid visitor64(void *elem, unsigned long opaque, unsigned long *__key,
7278c2ecf20Sopenharmony_ci	       size_t index, void *__func)
7288c2ecf20Sopenharmony_ci{
7298c2ecf20Sopenharmony_ci	visitor64_t func = __func;
7308c2ecf20Sopenharmony_ci	u64 *key = (void *)__key;
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_ci	func(elem, opaque, *key, index);
7338c2ecf20Sopenharmony_ci}
7348c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(visitor64);
7358c2ecf20Sopenharmony_ci
7368c2ecf20Sopenharmony_civoid visitor128(void *elem, unsigned long opaque, unsigned long *__key,
7378c2ecf20Sopenharmony_ci		size_t index, void *__func)
7388c2ecf20Sopenharmony_ci{
7398c2ecf20Sopenharmony_ci	visitor128_t func = __func;
7408c2ecf20Sopenharmony_ci	u64 *key = (void *)__key;
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_ci	func(elem, opaque, key[0], key[1], index);
7438c2ecf20Sopenharmony_ci}
7448c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(visitor128);
7458c2ecf20Sopenharmony_ci
7468c2ecf20Sopenharmony_cisize_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
7478c2ecf20Sopenharmony_ci		     unsigned long opaque,
7488c2ecf20Sopenharmony_ci		     void (*func)(void *elem, unsigned long opaque,
7498c2ecf20Sopenharmony_ci		     		  unsigned long *key,
7508c2ecf20Sopenharmony_ci		     		  size_t index, void *func2),
7518c2ecf20Sopenharmony_ci		     void *func2)
7528c2ecf20Sopenharmony_ci{
7538c2ecf20Sopenharmony_ci	size_t count = 0;
7548c2ecf20Sopenharmony_ci
7558c2ecf20Sopenharmony_ci	if (!func2)
7568c2ecf20Sopenharmony_ci		func = empty;
7578c2ecf20Sopenharmony_ci	if (head->node)
7588c2ecf20Sopenharmony_ci		count = __btree_for_each(head, geo, head->node, opaque, func,
7598c2ecf20Sopenharmony_ci				func2, 0, head->height, 0);
7608c2ecf20Sopenharmony_ci	return count;
7618c2ecf20Sopenharmony_ci}
7628c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_visitor);
7638c2ecf20Sopenharmony_ci
7648c2ecf20Sopenharmony_cisize_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
7658c2ecf20Sopenharmony_ci			  unsigned long opaque,
7668c2ecf20Sopenharmony_ci			  void (*func)(void *elem, unsigned long opaque,
7678c2ecf20Sopenharmony_ci				       unsigned long *key,
7688c2ecf20Sopenharmony_ci				       size_t index, void *func2),
7698c2ecf20Sopenharmony_ci			  void *func2)
7708c2ecf20Sopenharmony_ci{
7718c2ecf20Sopenharmony_ci	size_t count = 0;
7728c2ecf20Sopenharmony_ci
7738c2ecf20Sopenharmony_ci	if (!func2)
7748c2ecf20Sopenharmony_ci		func = empty;
7758c2ecf20Sopenharmony_ci	if (head->node)
7768c2ecf20Sopenharmony_ci		count = __btree_for_each(head, geo, head->node, opaque, func,
7778c2ecf20Sopenharmony_ci				func2, 1, head->height, 0);
7788c2ecf20Sopenharmony_ci	__btree_init(head);
7798c2ecf20Sopenharmony_ci	return count;
7808c2ecf20Sopenharmony_ci}
7818c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(btree_grim_visitor);
7828c2ecf20Sopenharmony_ci
7838c2ecf20Sopenharmony_cistatic int __init btree_module_init(void)
7848c2ecf20Sopenharmony_ci{
7858c2ecf20Sopenharmony_ci	btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0,
7868c2ecf20Sopenharmony_ci			SLAB_HWCACHE_ALIGN, NULL);
7878c2ecf20Sopenharmony_ci	return 0;
7888c2ecf20Sopenharmony_ci}
7898c2ecf20Sopenharmony_ci
7908c2ecf20Sopenharmony_cistatic void __exit btree_module_exit(void)
7918c2ecf20Sopenharmony_ci{
7928c2ecf20Sopenharmony_ci	kmem_cache_destroy(btree_cachep);
7938c2ecf20Sopenharmony_ci}
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_ci/* If core code starts using btree, initialization should happen even earlier */
7968c2ecf20Sopenharmony_cimodule_init(btree_module_init);
7978c2ecf20Sopenharmony_cimodule_exit(btree_module_exit);
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ciMODULE_AUTHOR("Joern Engel <joern@logfs.org>");
8008c2ecf20Sopenharmony_ciMODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
8018c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
802