18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Code for working with individual keys, and sorted sets of keys with in a
48c2ecf20Sopenharmony_ci * btree node
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Copyright 2012 Google, Inc.
78c2ecf20Sopenharmony_ci */
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include "util.h"
128c2ecf20Sopenharmony_ci#include "bset.h"
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci#include <linux/console.h>
158c2ecf20Sopenharmony_ci#include <linux/sched/clock.h>
168c2ecf20Sopenharmony_ci#include <linux/random.h>
178c2ecf20Sopenharmony_ci#include <linux/prefetch.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#ifdef CONFIG_BCACHE_DEBUG
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_civoid bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set)
228c2ecf20Sopenharmony_ci{
238c2ecf20Sopenharmony_ci	struct bkey *k, *next;
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci	for (k = i->start; k < bset_bkey_last(i); k = next) {
268c2ecf20Sopenharmony_ci		next = bkey_next(k);
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci		pr_err("block %u key %u/%u: ", set,
298c2ecf20Sopenharmony_ci		       (unsigned int) ((u64 *) k - i->d), i->keys);
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci		if (b->ops->key_dump)
328c2ecf20Sopenharmony_ci			b->ops->key_dump(b, k);
338c2ecf20Sopenharmony_ci		else
348c2ecf20Sopenharmony_ci			pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci		if (next < bset_bkey_last(i) &&
378c2ecf20Sopenharmony_ci		    bkey_cmp(k, b->ops->is_extents ?
388c2ecf20Sopenharmony_ci			     &START_KEY(next) : next) > 0)
398c2ecf20Sopenharmony_ci			pr_err("Key skipped backwards\n");
408c2ecf20Sopenharmony_ci	}
418c2ecf20Sopenharmony_ci}
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_civoid bch_dump_bucket(struct btree_keys *b)
448c2ecf20Sopenharmony_ci{
458c2ecf20Sopenharmony_ci	unsigned int i;
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	console_lock();
488c2ecf20Sopenharmony_ci	for (i = 0; i <= b->nsets; i++)
498c2ecf20Sopenharmony_ci		bch_dump_bset(b, b->set[i].data,
508c2ecf20Sopenharmony_ci			      bset_sector_offset(b, b->set[i].data));
518c2ecf20Sopenharmony_ci	console_unlock();
528c2ecf20Sopenharmony_ci}
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ciint __bch_count_data(struct btree_keys *b)
558c2ecf20Sopenharmony_ci{
568c2ecf20Sopenharmony_ci	unsigned int ret = 0;
578c2ecf20Sopenharmony_ci	struct btree_iter iter;
588c2ecf20Sopenharmony_ci	struct bkey *k;
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	if (b->ops->is_extents)
618c2ecf20Sopenharmony_ci		for_each_key(b, k, &iter)
628c2ecf20Sopenharmony_ci			ret += KEY_SIZE(k);
638c2ecf20Sopenharmony_ci	return ret;
648c2ecf20Sopenharmony_ci}
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_civoid __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
678c2ecf20Sopenharmony_ci{
688c2ecf20Sopenharmony_ci	va_list args;
698c2ecf20Sopenharmony_ci	struct bkey *k, *p = NULL;
708c2ecf20Sopenharmony_ci	struct btree_iter iter;
718c2ecf20Sopenharmony_ci	const char *err;
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	for_each_key(b, k, &iter) {
748c2ecf20Sopenharmony_ci		if (b->ops->is_extents) {
758c2ecf20Sopenharmony_ci			err = "Keys out of order";
768c2ecf20Sopenharmony_ci			if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
778c2ecf20Sopenharmony_ci				goto bug;
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci			if (bch_ptr_invalid(b, k))
808c2ecf20Sopenharmony_ci				continue;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci			err =  "Overlapping keys";
838c2ecf20Sopenharmony_ci			if (p && bkey_cmp(p, &START_KEY(k)) > 0)
848c2ecf20Sopenharmony_ci				goto bug;
858c2ecf20Sopenharmony_ci		} else {
868c2ecf20Sopenharmony_ci			if (bch_ptr_bad(b, k))
878c2ecf20Sopenharmony_ci				continue;
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci			err = "Duplicate keys";
908c2ecf20Sopenharmony_ci			if (p && !bkey_cmp(p, k))
918c2ecf20Sopenharmony_ci				goto bug;
928c2ecf20Sopenharmony_ci		}
938c2ecf20Sopenharmony_ci		p = k;
948c2ecf20Sopenharmony_ci	}
958c2ecf20Sopenharmony_ci#if 0
968c2ecf20Sopenharmony_ci	err = "Key larger than btree node key";
978c2ecf20Sopenharmony_ci	if (p && bkey_cmp(p, &b->key) > 0)
988c2ecf20Sopenharmony_ci		goto bug;
998c2ecf20Sopenharmony_ci#endif
1008c2ecf20Sopenharmony_ci	return;
1018c2ecf20Sopenharmony_cibug:
1028c2ecf20Sopenharmony_ci	bch_dump_bucket(b);
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	va_start(args, fmt);
1058c2ecf20Sopenharmony_ci	vprintk(fmt, args);
1068c2ecf20Sopenharmony_ci	va_end(args);
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci	panic("bch_check_keys error:  %s:\n", err);
1098c2ecf20Sopenharmony_ci}
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_cistatic void bch_btree_iter_next_check(struct btree_iter *iter)
1128c2ecf20Sopenharmony_ci{
1138c2ecf20Sopenharmony_ci	struct bkey *k = iter->data->k, *next = bkey_next(k);
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	if (next < iter->data->end &&
1168c2ecf20Sopenharmony_ci	    bkey_cmp(k, iter->b->ops->is_extents ?
1178c2ecf20Sopenharmony_ci		     &START_KEY(next) : next) > 0) {
1188c2ecf20Sopenharmony_ci		bch_dump_bucket(iter->b);
1198c2ecf20Sopenharmony_ci		panic("Key skipped backwards\n");
1208c2ecf20Sopenharmony_ci	}
1218c2ecf20Sopenharmony_ci}
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci#else
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_cistatic inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci#endif
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci/* Keylists */
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ciint __bch_keylist_realloc(struct keylist *l, unsigned int u64s)
1328c2ecf20Sopenharmony_ci{
1338c2ecf20Sopenharmony_ci	size_t oldsize = bch_keylist_nkeys(l);
1348c2ecf20Sopenharmony_ci	size_t newsize = oldsize + u64s;
1358c2ecf20Sopenharmony_ci	uint64_t *old_keys = l->keys_p == l->inline_keys ? NULL : l->keys_p;
1368c2ecf20Sopenharmony_ci	uint64_t *new_keys;
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	newsize = roundup_pow_of_two(newsize);
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	if (newsize <= KEYLIST_INLINE ||
1418c2ecf20Sopenharmony_ci	    roundup_pow_of_two(oldsize) == newsize)
1428c2ecf20Sopenharmony_ci		return 0;
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci	new_keys = krealloc(old_keys, sizeof(uint64_t) * newsize, GFP_NOIO);
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	if (!new_keys)
1478c2ecf20Sopenharmony_ci		return -ENOMEM;
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci	if (!old_keys)
1508c2ecf20Sopenharmony_ci		memcpy(new_keys, l->inline_keys, sizeof(uint64_t) * oldsize);
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	l->keys_p = new_keys;
1538c2ecf20Sopenharmony_ci	l->top_p = new_keys + oldsize;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	return 0;
1568c2ecf20Sopenharmony_ci}
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci/* Pop the top key of keylist by pointing l->top to its previous key */
1598c2ecf20Sopenharmony_cistruct bkey *bch_keylist_pop(struct keylist *l)
1608c2ecf20Sopenharmony_ci{
1618c2ecf20Sopenharmony_ci	struct bkey *k = l->keys;
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	if (k == l->top)
1648c2ecf20Sopenharmony_ci		return NULL;
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	while (bkey_next(k) != l->top)
1678c2ecf20Sopenharmony_ci		k = bkey_next(k);
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci	return l->top = k;
1708c2ecf20Sopenharmony_ci}
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci/* Pop the bottom key of keylist and update l->top_p */
1738c2ecf20Sopenharmony_civoid bch_keylist_pop_front(struct keylist *l)
1748c2ecf20Sopenharmony_ci{
1758c2ecf20Sopenharmony_ci	l->top_p -= bkey_u64s(l->keys);
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	memmove(l->keys,
1788c2ecf20Sopenharmony_ci		bkey_next(l->keys),
1798c2ecf20Sopenharmony_ci		bch_keylist_bytes(l));
1808c2ecf20Sopenharmony_ci}
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci/* Key/pointer manipulation */
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_civoid bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
1858c2ecf20Sopenharmony_ci			      unsigned int i)
1868c2ecf20Sopenharmony_ci{
1878c2ecf20Sopenharmony_ci	BUG_ON(i > KEY_PTRS(src));
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci	/* Only copy the header, key, and one pointer. */
1908c2ecf20Sopenharmony_ci	memcpy(dest, src, 2 * sizeof(uint64_t));
1918c2ecf20Sopenharmony_ci	dest->ptr[0] = src->ptr[i];
1928c2ecf20Sopenharmony_ci	SET_KEY_PTRS(dest, 1);
1938c2ecf20Sopenharmony_ci	/* We didn't copy the checksum so clear that bit. */
1948c2ecf20Sopenharmony_ci	SET_KEY_CSUM(dest, 0);
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_cibool __bch_cut_front(const struct bkey *where, struct bkey *k)
1988c2ecf20Sopenharmony_ci{
1998c2ecf20Sopenharmony_ci	unsigned int i, len = 0;
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	if (bkey_cmp(where, &START_KEY(k)) <= 0)
2028c2ecf20Sopenharmony_ci		return false;
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	if (bkey_cmp(where, k) < 0)
2058c2ecf20Sopenharmony_ci		len = KEY_OFFSET(k) - KEY_OFFSET(where);
2068c2ecf20Sopenharmony_ci	else
2078c2ecf20Sopenharmony_ci		bkey_copy_key(k, where);
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci	for (i = 0; i < KEY_PTRS(k); i++)
2108c2ecf20Sopenharmony_ci		SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + KEY_SIZE(k) - len);
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci	BUG_ON(len > KEY_SIZE(k));
2138c2ecf20Sopenharmony_ci	SET_KEY_SIZE(k, len);
2148c2ecf20Sopenharmony_ci	return true;
2158c2ecf20Sopenharmony_ci}
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_cibool __bch_cut_back(const struct bkey *where, struct bkey *k)
2188c2ecf20Sopenharmony_ci{
2198c2ecf20Sopenharmony_ci	unsigned int len = 0;
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	if (bkey_cmp(where, k) >= 0)
2228c2ecf20Sopenharmony_ci		return false;
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci	BUG_ON(KEY_INODE(where) != KEY_INODE(k));
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	if (bkey_cmp(where, &START_KEY(k)) > 0)
2278c2ecf20Sopenharmony_ci		len = KEY_OFFSET(where) - KEY_START(k);
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci	bkey_copy_key(k, where);
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	BUG_ON(len > KEY_SIZE(k));
2328c2ecf20Sopenharmony_ci	SET_KEY_SIZE(k, len);
2338c2ecf20Sopenharmony_ci	return true;
2348c2ecf20Sopenharmony_ci}
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci/* Auxiliary search trees */
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci/* 32 bits total: */
2398c2ecf20Sopenharmony_ci#define BKEY_MID_BITS		3
2408c2ecf20Sopenharmony_ci#define BKEY_EXPONENT_BITS	7
2418c2ecf20Sopenharmony_ci#define BKEY_MANTISSA_BITS	(32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS)
2428c2ecf20Sopenharmony_ci#define BKEY_MANTISSA_MASK	((1 << BKEY_MANTISSA_BITS) - 1)
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_cistruct bkey_float {
2458c2ecf20Sopenharmony_ci	unsigned int	exponent:BKEY_EXPONENT_BITS;
2468c2ecf20Sopenharmony_ci	unsigned int	m:BKEY_MID_BITS;
2478c2ecf20Sopenharmony_ci	unsigned int	mantissa:BKEY_MANTISSA_BITS;
2488c2ecf20Sopenharmony_ci} __packed;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci/*
2518c2ecf20Sopenharmony_ci * BSET_CACHELINE was originally intended to match the hardware cacheline size -
2528c2ecf20Sopenharmony_ci * it used to be 64, but I realized the lookup code would touch slightly less
2538c2ecf20Sopenharmony_ci * memory if it was 128.
2548c2ecf20Sopenharmony_ci *
2558c2ecf20Sopenharmony_ci * It definites the number of bytes (in struct bset) per struct bkey_float in
2568c2ecf20Sopenharmony_ci * the auxiliar search tree - when we're done searching the bset_float tree we
2578c2ecf20Sopenharmony_ci * have this many bytes left that we do a linear search over.
2588c2ecf20Sopenharmony_ci *
2598c2ecf20Sopenharmony_ci * Since (after level 5) every level of the bset_tree is on a new cacheline,
2608c2ecf20Sopenharmony_ci * we're touching one fewer cacheline in the bset tree in exchange for one more
2618c2ecf20Sopenharmony_ci * cacheline in the linear search - but the linear search might stop before it
2628c2ecf20Sopenharmony_ci * gets to the second cacheline.
2638c2ecf20Sopenharmony_ci */
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci#define BSET_CACHELINE		128
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci/* Space required for the btree node keys */
2688c2ecf20Sopenharmony_cistatic inline size_t btree_keys_bytes(struct btree_keys *b)
2698c2ecf20Sopenharmony_ci{
2708c2ecf20Sopenharmony_ci	return PAGE_SIZE << b->page_order;
2718c2ecf20Sopenharmony_ci}
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_cistatic inline size_t btree_keys_cachelines(struct btree_keys *b)
2748c2ecf20Sopenharmony_ci{
2758c2ecf20Sopenharmony_ci	return btree_keys_bytes(b) / BSET_CACHELINE;
2768c2ecf20Sopenharmony_ci}
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci/* Space required for the auxiliary search trees */
2798c2ecf20Sopenharmony_cistatic inline size_t bset_tree_bytes(struct btree_keys *b)
2808c2ecf20Sopenharmony_ci{
2818c2ecf20Sopenharmony_ci	return btree_keys_cachelines(b) * sizeof(struct bkey_float);
2828c2ecf20Sopenharmony_ci}
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci/* Space required for the prev pointers */
2858c2ecf20Sopenharmony_cistatic inline size_t bset_prev_bytes(struct btree_keys *b)
2868c2ecf20Sopenharmony_ci{
2878c2ecf20Sopenharmony_ci	return btree_keys_cachelines(b) * sizeof(uint8_t);
2888c2ecf20Sopenharmony_ci}
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci/* Memory allocation */
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_civoid bch_btree_keys_free(struct btree_keys *b)
2938c2ecf20Sopenharmony_ci{
2948c2ecf20Sopenharmony_ci	struct bset_tree *t = b->set;
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci	if (bset_prev_bytes(b) < PAGE_SIZE)
2978c2ecf20Sopenharmony_ci		kfree(t->prev);
2988c2ecf20Sopenharmony_ci	else
2998c2ecf20Sopenharmony_ci		free_pages((unsigned long) t->prev,
3008c2ecf20Sopenharmony_ci			   get_order(bset_prev_bytes(b)));
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci	if (bset_tree_bytes(b) < PAGE_SIZE)
3038c2ecf20Sopenharmony_ci		kfree(t->tree);
3048c2ecf20Sopenharmony_ci	else
3058c2ecf20Sopenharmony_ci		free_pages((unsigned long) t->tree,
3068c2ecf20Sopenharmony_ci			   get_order(bset_tree_bytes(b)));
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci	free_pages((unsigned long) t->data, b->page_order);
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	t->prev = NULL;
3118c2ecf20Sopenharmony_ci	t->tree = NULL;
3128c2ecf20Sopenharmony_ci	t->data = NULL;
3138c2ecf20Sopenharmony_ci}
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ciint bch_btree_keys_alloc(struct btree_keys *b,
3168c2ecf20Sopenharmony_ci			 unsigned int page_order,
3178c2ecf20Sopenharmony_ci			 gfp_t gfp)
3188c2ecf20Sopenharmony_ci{
3198c2ecf20Sopenharmony_ci	struct bset_tree *t = b->set;
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	BUG_ON(t->data);
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci	b->page_order = page_order;
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_ci	t->data = (void *) __get_free_pages(__GFP_COMP|gfp, b->page_order);
3268c2ecf20Sopenharmony_ci	if (!t->data)
3278c2ecf20Sopenharmony_ci		goto err;
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci	t->tree = bset_tree_bytes(b) < PAGE_SIZE
3308c2ecf20Sopenharmony_ci		? kmalloc(bset_tree_bytes(b), gfp)
3318c2ecf20Sopenharmony_ci		: (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
3328c2ecf20Sopenharmony_ci	if (!t->tree)
3338c2ecf20Sopenharmony_ci		goto err;
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci	t->prev = bset_prev_bytes(b) < PAGE_SIZE
3368c2ecf20Sopenharmony_ci		? kmalloc(bset_prev_bytes(b), gfp)
3378c2ecf20Sopenharmony_ci		: (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
3388c2ecf20Sopenharmony_ci	if (!t->prev)
3398c2ecf20Sopenharmony_ci		goto err;
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	return 0;
3428c2ecf20Sopenharmony_cierr:
3438c2ecf20Sopenharmony_ci	bch_btree_keys_free(b);
3448c2ecf20Sopenharmony_ci	return -ENOMEM;
3458c2ecf20Sopenharmony_ci}
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_civoid bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
3488c2ecf20Sopenharmony_ci			 bool *expensive_debug_checks)
3498c2ecf20Sopenharmony_ci{
3508c2ecf20Sopenharmony_ci	b->ops = ops;
3518c2ecf20Sopenharmony_ci	b->expensive_debug_checks = expensive_debug_checks;
3528c2ecf20Sopenharmony_ci	b->nsets = 0;
3538c2ecf20Sopenharmony_ci	b->last_set_unwritten = 0;
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci	/*
3568c2ecf20Sopenharmony_ci	 * struct btree_keys in embedded in struct btree, and struct
3578c2ecf20Sopenharmony_ci	 * bset_tree is embedded into struct btree_keys. They are all
3588c2ecf20Sopenharmony_ci	 * initialized as 0 by kzalloc() in mca_bucket_alloc(), and
3598c2ecf20Sopenharmony_ci	 * b->set[0].data is allocated in bch_btree_keys_alloc(), so we
3608c2ecf20Sopenharmony_ci	 * don't have to initiate b->set[].size and b->set[].data here
3618c2ecf20Sopenharmony_ci	 * any more.
3628c2ecf20Sopenharmony_ci	 */
3638c2ecf20Sopenharmony_ci}
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_ci/* Binary tree stuff for auxiliary search trees */
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci/*
3688c2ecf20Sopenharmony_ci * return array index next to j when does in-order traverse
3698c2ecf20Sopenharmony_ci * of a binary tree which is stored in a linear array
3708c2ecf20Sopenharmony_ci */
3718c2ecf20Sopenharmony_cistatic unsigned int inorder_next(unsigned int j, unsigned int size)
3728c2ecf20Sopenharmony_ci{
3738c2ecf20Sopenharmony_ci	if (j * 2 + 1 < size) {
3748c2ecf20Sopenharmony_ci		j = j * 2 + 1;
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_ci		while (j * 2 < size)
3778c2ecf20Sopenharmony_ci			j *= 2;
3788c2ecf20Sopenharmony_ci	} else
3798c2ecf20Sopenharmony_ci		j >>= ffz(j) + 1;
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci	return j;
3828c2ecf20Sopenharmony_ci}
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci/*
3858c2ecf20Sopenharmony_ci * return array index previous to j when does in-order traverse
3868c2ecf20Sopenharmony_ci * of a binary tree which is stored in a linear array
3878c2ecf20Sopenharmony_ci */
3888c2ecf20Sopenharmony_cistatic unsigned int inorder_prev(unsigned int j, unsigned int size)
3898c2ecf20Sopenharmony_ci{
3908c2ecf20Sopenharmony_ci	if (j * 2 < size) {
3918c2ecf20Sopenharmony_ci		j = j * 2;
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_ci		while (j * 2 + 1 < size)
3948c2ecf20Sopenharmony_ci			j = j * 2 + 1;
3958c2ecf20Sopenharmony_ci	} else
3968c2ecf20Sopenharmony_ci		j >>= ffs(j);
3978c2ecf20Sopenharmony_ci
3988c2ecf20Sopenharmony_ci	return j;
3998c2ecf20Sopenharmony_ci}
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci/*
4028c2ecf20Sopenharmony_ci * I have no idea why this code works... and I'm the one who wrote it
4038c2ecf20Sopenharmony_ci *
4048c2ecf20Sopenharmony_ci * However, I do know what it does:
4058c2ecf20Sopenharmony_ci * Given a binary tree constructed in an array (i.e. how you normally implement
4068c2ecf20Sopenharmony_ci * a heap), it converts a node in the tree - referenced by array index - to the
4078c2ecf20Sopenharmony_ci * index it would have if you did an inorder traversal.
4088c2ecf20Sopenharmony_ci *
4098c2ecf20Sopenharmony_ci * Also tested for every j, size up to size somewhere around 6 million.
4108c2ecf20Sopenharmony_ci *
4118c2ecf20Sopenharmony_ci * The binary tree starts at array index 1, not 0
4128c2ecf20Sopenharmony_ci * extra is a function of size:
4138c2ecf20Sopenharmony_ci *   extra = (size - rounddown_pow_of_two(size - 1)) << 1;
4148c2ecf20Sopenharmony_ci */
4158c2ecf20Sopenharmony_cistatic unsigned int __to_inorder(unsigned int j,
4168c2ecf20Sopenharmony_ci				  unsigned int size,
4178c2ecf20Sopenharmony_ci				  unsigned int extra)
4188c2ecf20Sopenharmony_ci{
4198c2ecf20Sopenharmony_ci	unsigned int b = fls(j);
4208c2ecf20Sopenharmony_ci	unsigned int shift = fls(size - 1) - b;
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	j  ^= 1U << (b - 1);
4238c2ecf20Sopenharmony_ci	j <<= 1;
4248c2ecf20Sopenharmony_ci	j  |= 1;
4258c2ecf20Sopenharmony_ci	j <<= shift;
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci	if (j > extra)
4288c2ecf20Sopenharmony_ci		j -= (j - extra) >> 1;
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	return j;
4318c2ecf20Sopenharmony_ci}
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci/*
4348c2ecf20Sopenharmony_ci * Return the cacheline index in bset_tree->data, where j is index
4358c2ecf20Sopenharmony_ci * from a linear array which stores the auxiliar binary tree
4368c2ecf20Sopenharmony_ci */
4378c2ecf20Sopenharmony_cistatic unsigned int to_inorder(unsigned int j, struct bset_tree *t)
4388c2ecf20Sopenharmony_ci{
4398c2ecf20Sopenharmony_ci	return __to_inorder(j, t->size, t->extra);
4408c2ecf20Sopenharmony_ci}
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_cistatic unsigned int __inorder_to_tree(unsigned int j,
4438c2ecf20Sopenharmony_ci				      unsigned int size,
4448c2ecf20Sopenharmony_ci				      unsigned int extra)
4458c2ecf20Sopenharmony_ci{
4468c2ecf20Sopenharmony_ci	unsigned int shift;
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	if (j > extra)
4498c2ecf20Sopenharmony_ci		j += j - extra;
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	shift = ffs(j);
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	j >>= shift;
4548c2ecf20Sopenharmony_ci	j  |= roundup_pow_of_two(size) >> shift;
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci	return j;
4578c2ecf20Sopenharmony_ci}
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci/*
4608c2ecf20Sopenharmony_ci * Return an index from a linear array which stores the auxiliar binary
4618c2ecf20Sopenharmony_ci * tree, j is the cacheline index of t->data.
4628c2ecf20Sopenharmony_ci */
4638c2ecf20Sopenharmony_cistatic unsigned int inorder_to_tree(unsigned int j, struct bset_tree *t)
4648c2ecf20Sopenharmony_ci{
4658c2ecf20Sopenharmony_ci	return __inorder_to_tree(j, t->size, t->extra);
4668c2ecf20Sopenharmony_ci}
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci#if 0
4698c2ecf20Sopenharmony_civoid inorder_test(void)
4708c2ecf20Sopenharmony_ci{
4718c2ecf20Sopenharmony_ci	unsigned long done = 0;
4728c2ecf20Sopenharmony_ci	ktime_t start = ktime_get();
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	for (unsigned int size = 2;
4758c2ecf20Sopenharmony_ci	     size < 65536000;
4768c2ecf20Sopenharmony_ci	     size++) {
4778c2ecf20Sopenharmony_ci		unsigned int extra =
4788c2ecf20Sopenharmony_ci			(size - rounddown_pow_of_two(size - 1)) << 1;
4798c2ecf20Sopenharmony_ci		unsigned int i = 1, j = rounddown_pow_of_two(size - 1);
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ci		if (!(size % 4096))
4828c2ecf20Sopenharmony_ci			pr_notice("loop %u, %llu per us\n", size,
4838c2ecf20Sopenharmony_ci			       done / ktime_us_delta(ktime_get(), start));
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci		while (1) {
4868c2ecf20Sopenharmony_ci			if (__inorder_to_tree(i, size, extra) != j)
4878c2ecf20Sopenharmony_ci				panic("size %10u j %10u i %10u", size, j, i);
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci			if (__to_inorder(j, size, extra) != i)
4908c2ecf20Sopenharmony_ci				panic("size %10u j %10u i %10u", size, j, i);
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci			if (j == rounddown_pow_of_two(size) - 1)
4938c2ecf20Sopenharmony_ci				break;
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci			BUG_ON(inorder_prev(inorder_next(j, size), size) != j);
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci			j = inorder_next(j, size);
4988c2ecf20Sopenharmony_ci			i++;
4998c2ecf20Sopenharmony_ci		}
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ci		done += size - 1;
5028c2ecf20Sopenharmony_ci	}
5038c2ecf20Sopenharmony_ci}
5048c2ecf20Sopenharmony_ci#endif
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_ci/*
5078c2ecf20Sopenharmony_ci * Cacheline/offset <-> bkey pointer arithmetic:
5088c2ecf20Sopenharmony_ci *
5098c2ecf20Sopenharmony_ci * t->tree is a binary search tree in an array; each node corresponds to a key
5108c2ecf20Sopenharmony_ci * in one cacheline in t->set (BSET_CACHELINE bytes).
5118c2ecf20Sopenharmony_ci *
5128c2ecf20Sopenharmony_ci * This means we don't have to store the full index of the key that a node in
5138c2ecf20Sopenharmony_ci * the binary tree points to; to_inorder() gives us the cacheline, and then
5148c2ecf20Sopenharmony_ci * bkey_float->m gives us the offset within that cacheline, in units of 8 bytes.
5158c2ecf20Sopenharmony_ci *
5168c2ecf20Sopenharmony_ci * cacheline_to_bkey() and friends abstract out all the pointer arithmetic to
5178c2ecf20Sopenharmony_ci * make this work.
5188c2ecf20Sopenharmony_ci *
5198c2ecf20Sopenharmony_ci * To construct the bfloat for an arbitrary key we need to know what the key
5208c2ecf20Sopenharmony_ci * immediately preceding it is: we have to check if the two keys differ in the
5218c2ecf20Sopenharmony_ci * bits we're going to store in bkey_float->mantissa. t->prev[j] stores the size
5228c2ecf20Sopenharmony_ci * of the previous key so we can walk backwards to it from t->tree[j]'s key.
5238c2ecf20Sopenharmony_ci */
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_cistatic struct bkey *cacheline_to_bkey(struct bset_tree *t,
5268c2ecf20Sopenharmony_ci				      unsigned int cacheline,
5278c2ecf20Sopenharmony_ci				      unsigned int offset)
5288c2ecf20Sopenharmony_ci{
5298c2ecf20Sopenharmony_ci	return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8;
5308c2ecf20Sopenharmony_ci}
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_cistatic unsigned int bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
5338c2ecf20Sopenharmony_ci{
5348c2ecf20Sopenharmony_ci	return ((void *) k - (void *) t->data) / BSET_CACHELINE;
5358c2ecf20Sopenharmony_ci}
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_cistatic unsigned int bkey_to_cacheline_offset(struct bset_tree *t,
5388c2ecf20Sopenharmony_ci					 unsigned int cacheline,
5398c2ecf20Sopenharmony_ci					 struct bkey *k)
5408c2ecf20Sopenharmony_ci{
5418c2ecf20Sopenharmony_ci	return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
5428c2ecf20Sopenharmony_ci}
5438c2ecf20Sopenharmony_ci
5448c2ecf20Sopenharmony_cistatic struct bkey *tree_to_bkey(struct bset_tree *t, unsigned int j)
5458c2ecf20Sopenharmony_ci{
5468c2ecf20Sopenharmony_ci	return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m);
5478c2ecf20Sopenharmony_ci}
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_cistatic struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned int j)
5508c2ecf20Sopenharmony_ci{
5518c2ecf20Sopenharmony_ci	return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]);
5528c2ecf20Sopenharmony_ci}
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_ci/*
5558c2ecf20Sopenharmony_ci * For the write set - the one we're currently inserting keys into - we don't
5568c2ecf20Sopenharmony_ci * maintain a full search tree, we just keep a simple lookup table in t->prev.
5578c2ecf20Sopenharmony_ci */
5588c2ecf20Sopenharmony_cistatic struct bkey *table_to_bkey(struct bset_tree *t, unsigned int cacheline)
5598c2ecf20Sopenharmony_ci{
5608c2ecf20Sopenharmony_ci	return cacheline_to_bkey(t, cacheline, t->prev[cacheline]);
5618c2ecf20Sopenharmony_ci}
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_cistatic inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift)
5648c2ecf20Sopenharmony_ci{
5658c2ecf20Sopenharmony_ci	low >>= shift;
5668c2ecf20Sopenharmony_ci	low  |= (high << 1) << (63U - shift);
5678c2ecf20Sopenharmony_ci	return low;
5688c2ecf20Sopenharmony_ci}
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci/*
5718c2ecf20Sopenharmony_ci * Calculate mantissa value for struct bkey_float.
5728c2ecf20Sopenharmony_ci * If most significant bit of f->exponent is not set, then
5738c2ecf20Sopenharmony_ci *  - f->exponent >> 6 is 0
5748c2ecf20Sopenharmony_ci *  - p[0] points to bkey->low
5758c2ecf20Sopenharmony_ci *  - p[-1] borrows bits from KEY_INODE() of bkey->high
5768c2ecf20Sopenharmony_ci * if most isgnificant bits of f->exponent is set, then
5778c2ecf20Sopenharmony_ci *  - f->exponent >> 6 is 1
5788c2ecf20Sopenharmony_ci *  - p[0] points to bits from KEY_INODE() of bkey->high
5798c2ecf20Sopenharmony_ci *  - p[-1] points to other bits from KEY_INODE() of
5808c2ecf20Sopenharmony_ci *    bkey->high too.
5818c2ecf20Sopenharmony_ci * See make_bfloat() to check when most significant bit of f->exponent
5828c2ecf20Sopenharmony_ci * is set or not.
5838c2ecf20Sopenharmony_ci */
5848c2ecf20Sopenharmony_cistatic inline unsigned int bfloat_mantissa(const struct bkey *k,
5858c2ecf20Sopenharmony_ci				       struct bkey_float *f)
5868c2ecf20Sopenharmony_ci{
5878c2ecf20Sopenharmony_ci	const uint64_t *p = &k->low - (f->exponent >> 6);
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK;
5908c2ecf20Sopenharmony_ci}
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_cistatic void make_bfloat(struct bset_tree *t, unsigned int j)
5938c2ecf20Sopenharmony_ci{
5948c2ecf20Sopenharmony_ci	struct bkey_float *f = &t->tree[j];
5958c2ecf20Sopenharmony_ci	struct bkey *m = tree_to_bkey(t, j);
5968c2ecf20Sopenharmony_ci	struct bkey *p = tree_to_prev_bkey(t, j);
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci	struct bkey *l = is_power_of_2(j)
5998c2ecf20Sopenharmony_ci		? t->data->start
6008c2ecf20Sopenharmony_ci		: tree_to_prev_bkey(t, j >> ffs(j));
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci	struct bkey *r = is_power_of_2(j + 1)
6038c2ecf20Sopenharmony_ci		? bset_bkey_idx(t->data, t->data->keys - bkey_u64s(&t->end))
6048c2ecf20Sopenharmony_ci		: tree_to_bkey(t, j >> (ffz(j) + 1));
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_ci	BUG_ON(m < l || m > r);
6078c2ecf20Sopenharmony_ci	BUG_ON(bkey_next(p) != m);
6088c2ecf20Sopenharmony_ci
6098c2ecf20Sopenharmony_ci	/*
6108c2ecf20Sopenharmony_ci	 * If l and r have different KEY_INODE values (different backing
6118c2ecf20Sopenharmony_ci	 * device), f->exponent records how many least significant bits
6128c2ecf20Sopenharmony_ci	 * are different in KEY_INODE values and sets most significant
6138c2ecf20Sopenharmony_ci	 * bits to 1 (by +64).
6148c2ecf20Sopenharmony_ci	 * If l and r have same KEY_INODE value, f->exponent records
6158c2ecf20Sopenharmony_ci	 * how many different bits in least significant bits of bkey->low.
6168c2ecf20Sopenharmony_ci	 * See bfloat_mantiss() how the most significant bit of
6178c2ecf20Sopenharmony_ci	 * f->exponent is used to calculate bfloat mantissa value.
6188c2ecf20Sopenharmony_ci	 */
6198c2ecf20Sopenharmony_ci	if (KEY_INODE(l) != KEY_INODE(r))
6208c2ecf20Sopenharmony_ci		f->exponent = fls64(KEY_INODE(r) ^ KEY_INODE(l)) + 64;
6218c2ecf20Sopenharmony_ci	else
6228c2ecf20Sopenharmony_ci		f->exponent = fls64(r->low ^ l->low);
6238c2ecf20Sopenharmony_ci
6248c2ecf20Sopenharmony_ci	f->exponent = max_t(int, f->exponent - BKEY_MANTISSA_BITS, 0);
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci	/*
6278c2ecf20Sopenharmony_ci	 * Setting f->exponent = 127 flags this node as failed, and causes the
6288c2ecf20Sopenharmony_ci	 * lookup code to fall back to comparing against the original key.
6298c2ecf20Sopenharmony_ci	 */
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci	if (bfloat_mantissa(m, f) != bfloat_mantissa(p, f))
6328c2ecf20Sopenharmony_ci		f->mantissa = bfloat_mantissa(m, f) - 1;
6338c2ecf20Sopenharmony_ci	else
6348c2ecf20Sopenharmony_ci		f->exponent = 127;
6358c2ecf20Sopenharmony_ci}
6368c2ecf20Sopenharmony_ci
6378c2ecf20Sopenharmony_cistatic void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
6388c2ecf20Sopenharmony_ci{
6398c2ecf20Sopenharmony_ci	if (t != b->set) {
6408c2ecf20Sopenharmony_ci		unsigned int j = roundup(t[-1].size,
6418c2ecf20Sopenharmony_ci				     64 / sizeof(struct bkey_float));
6428c2ecf20Sopenharmony_ci
6438c2ecf20Sopenharmony_ci		t->tree = t[-1].tree + j;
6448c2ecf20Sopenharmony_ci		t->prev = t[-1].prev + j;
6458c2ecf20Sopenharmony_ci	}
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci	while (t < b->set + MAX_BSETS)
6488c2ecf20Sopenharmony_ci		t++->size = 0;
6498c2ecf20Sopenharmony_ci}
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_cistatic void bch_bset_build_unwritten_tree(struct btree_keys *b)
6528c2ecf20Sopenharmony_ci{
6538c2ecf20Sopenharmony_ci	struct bset_tree *t = bset_tree_last(b);
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_ci	BUG_ON(b->last_set_unwritten);
6568c2ecf20Sopenharmony_ci	b->last_set_unwritten = 1;
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_ci	bset_alloc_tree(b, t);
6598c2ecf20Sopenharmony_ci
6608c2ecf20Sopenharmony_ci	if (t->tree != b->set->tree + btree_keys_cachelines(b)) {
6618c2ecf20Sopenharmony_ci		t->prev[0] = bkey_to_cacheline_offset(t, 0, t->data->start);
6628c2ecf20Sopenharmony_ci		t->size = 1;
6638c2ecf20Sopenharmony_ci	}
6648c2ecf20Sopenharmony_ci}
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_civoid bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic)
6678c2ecf20Sopenharmony_ci{
6688c2ecf20Sopenharmony_ci	if (i != b->set->data) {
6698c2ecf20Sopenharmony_ci		b->set[++b->nsets].data = i;
6708c2ecf20Sopenharmony_ci		i->seq = b->set->data->seq;
6718c2ecf20Sopenharmony_ci	} else
6728c2ecf20Sopenharmony_ci		get_random_bytes(&i->seq, sizeof(uint64_t));
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci	i->magic	= magic;
6758c2ecf20Sopenharmony_ci	i->version	= 0;
6768c2ecf20Sopenharmony_ci	i->keys		= 0;
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_ci	bch_bset_build_unwritten_tree(b);
6798c2ecf20Sopenharmony_ci}
6808c2ecf20Sopenharmony_ci
6818c2ecf20Sopenharmony_ci/*
6828c2ecf20Sopenharmony_ci * Build auxiliary binary tree 'struct bset_tree *t', this tree is used to
6838c2ecf20Sopenharmony_ci * accelerate bkey search in a btree node (pointed by bset_tree->data in
6848c2ecf20Sopenharmony_ci * memory). After search in the auxiliar tree by calling bset_search_tree(),
6858c2ecf20Sopenharmony_ci * a struct bset_search_iter is returned which indicates range [l, r] from
6868c2ecf20Sopenharmony_ci * bset_tree->data where the searching bkey might be inside. Then a followed
6878c2ecf20Sopenharmony_ci * linear comparison does the exact search, see __bch_bset_search() for how
6888c2ecf20Sopenharmony_ci * the auxiliary tree is used.
6898c2ecf20Sopenharmony_ci */
6908c2ecf20Sopenharmony_civoid bch_bset_build_written_tree(struct btree_keys *b)
6918c2ecf20Sopenharmony_ci{
6928c2ecf20Sopenharmony_ci	struct bset_tree *t = bset_tree_last(b);
6938c2ecf20Sopenharmony_ci	struct bkey *prev = NULL, *k = t->data->start;
6948c2ecf20Sopenharmony_ci	unsigned int j, cacheline = 1;
6958c2ecf20Sopenharmony_ci
6968c2ecf20Sopenharmony_ci	b->last_set_unwritten = 0;
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci	bset_alloc_tree(b, t);
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci	t->size = min_t(unsigned int,
7018c2ecf20Sopenharmony_ci			bkey_to_cacheline(t, bset_bkey_last(t->data)),
7028c2ecf20Sopenharmony_ci			b->set->tree + btree_keys_cachelines(b) - t->tree);
7038c2ecf20Sopenharmony_ci
7048c2ecf20Sopenharmony_ci	if (t->size < 2) {
7058c2ecf20Sopenharmony_ci		t->size = 0;
7068c2ecf20Sopenharmony_ci		return;
7078c2ecf20Sopenharmony_ci	}
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ci	t->extra = (t->size - rounddown_pow_of_two(t->size - 1)) << 1;
7108c2ecf20Sopenharmony_ci
7118c2ecf20Sopenharmony_ci	/* First we figure out where the first key in each cacheline is */
7128c2ecf20Sopenharmony_ci	for (j = inorder_next(0, t->size);
7138c2ecf20Sopenharmony_ci	     j;
7148c2ecf20Sopenharmony_ci	     j = inorder_next(j, t->size)) {
7158c2ecf20Sopenharmony_ci		while (bkey_to_cacheline(t, k) < cacheline)
7168c2ecf20Sopenharmony_ci			prev = k, k = bkey_next(k);
7178c2ecf20Sopenharmony_ci
7188c2ecf20Sopenharmony_ci		t->prev[j] = bkey_u64s(prev);
7198c2ecf20Sopenharmony_ci		t->tree[j].m = bkey_to_cacheline_offset(t, cacheline++, k);
7208c2ecf20Sopenharmony_ci	}
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci	while (bkey_next(k) != bset_bkey_last(t->data))
7238c2ecf20Sopenharmony_ci		k = bkey_next(k);
7248c2ecf20Sopenharmony_ci
7258c2ecf20Sopenharmony_ci	t->end = *k;
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci	/* Then we build the tree */
7288c2ecf20Sopenharmony_ci	for (j = inorder_next(0, t->size);
7298c2ecf20Sopenharmony_ci	     j;
7308c2ecf20Sopenharmony_ci	     j = inorder_next(j, t->size))
7318c2ecf20Sopenharmony_ci		make_bfloat(t, j);
7328c2ecf20Sopenharmony_ci}
7338c2ecf20Sopenharmony_ci
7348c2ecf20Sopenharmony_ci/* Insert */
7358c2ecf20Sopenharmony_ci
7368c2ecf20Sopenharmony_civoid bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k)
7378c2ecf20Sopenharmony_ci{
7388c2ecf20Sopenharmony_ci	struct bset_tree *t;
7398c2ecf20Sopenharmony_ci	unsigned int inorder, j = 1;
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_ci	for (t = b->set; t <= bset_tree_last(b); t++)
7428c2ecf20Sopenharmony_ci		if (k < bset_bkey_last(t->data))
7438c2ecf20Sopenharmony_ci			goto found_set;
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci	BUG();
7468c2ecf20Sopenharmony_cifound_set:
7478c2ecf20Sopenharmony_ci	if (!t->size || !bset_written(b, t))
7488c2ecf20Sopenharmony_ci		return;
7498c2ecf20Sopenharmony_ci
7508c2ecf20Sopenharmony_ci	inorder = bkey_to_cacheline(t, k);
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci	if (k == t->data->start)
7538c2ecf20Sopenharmony_ci		goto fix_left;
7548c2ecf20Sopenharmony_ci
7558c2ecf20Sopenharmony_ci	if (bkey_next(k) == bset_bkey_last(t->data)) {
7568c2ecf20Sopenharmony_ci		t->end = *k;
7578c2ecf20Sopenharmony_ci		goto fix_right;
7588c2ecf20Sopenharmony_ci	}
7598c2ecf20Sopenharmony_ci
7608c2ecf20Sopenharmony_ci	j = inorder_to_tree(inorder, t);
7618c2ecf20Sopenharmony_ci
7628c2ecf20Sopenharmony_ci	if (j &&
7638c2ecf20Sopenharmony_ci	    j < t->size &&
7648c2ecf20Sopenharmony_ci	    k == tree_to_bkey(t, j))
7658c2ecf20Sopenharmony_cifix_left:	do {
7668c2ecf20Sopenharmony_ci			make_bfloat(t, j);
7678c2ecf20Sopenharmony_ci			j = j * 2;
7688c2ecf20Sopenharmony_ci		} while (j < t->size);
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_ci	j = inorder_to_tree(inorder + 1, t);
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	if (j &&
7738c2ecf20Sopenharmony_ci	    j < t->size &&
7748c2ecf20Sopenharmony_ci	    k == tree_to_prev_bkey(t, j))
7758c2ecf20Sopenharmony_cifix_right:	do {
7768c2ecf20Sopenharmony_ci			make_bfloat(t, j);
7778c2ecf20Sopenharmony_ci			j = j * 2 + 1;
7788c2ecf20Sopenharmony_ci		} while (j < t->size);
7798c2ecf20Sopenharmony_ci}
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_cistatic void bch_bset_fix_lookup_table(struct btree_keys *b,
7828c2ecf20Sopenharmony_ci				      struct bset_tree *t,
7838c2ecf20Sopenharmony_ci				      struct bkey *k)
7848c2ecf20Sopenharmony_ci{
7858c2ecf20Sopenharmony_ci	unsigned int shift = bkey_u64s(k);
7868c2ecf20Sopenharmony_ci	unsigned int j = bkey_to_cacheline(t, k);
7878c2ecf20Sopenharmony_ci
7888c2ecf20Sopenharmony_ci	/* We're getting called from btree_split() or btree_gc, just bail out */
7898c2ecf20Sopenharmony_ci	if (!t->size)
7908c2ecf20Sopenharmony_ci		return;
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci	/*
7938c2ecf20Sopenharmony_ci	 * k is the key we just inserted; we need to find the entry in the
7948c2ecf20Sopenharmony_ci	 * lookup table for the first key that is strictly greater than k:
7958c2ecf20Sopenharmony_ci	 * it's either k's cacheline or the next one
7968c2ecf20Sopenharmony_ci	 */
7978c2ecf20Sopenharmony_ci	while (j < t->size &&
7988c2ecf20Sopenharmony_ci	       table_to_bkey(t, j) <= k)
7998c2ecf20Sopenharmony_ci		j++;
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci	/*
8028c2ecf20Sopenharmony_ci	 * Adjust all the lookup table entries, and find a new key for any that
8038c2ecf20Sopenharmony_ci	 * have gotten too big
8048c2ecf20Sopenharmony_ci	 */
8058c2ecf20Sopenharmony_ci	for (; j < t->size; j++) {
8068c2ecf20Sopenharmony_ci		t->prev[j] += shift;
8078c2ecf20Sopenharmony_ci
8088c2ecf20Sopenharmony_ci		if (t->prev[j] > 7) {
8098c2ecf20Sopenharmony_ci			k = table_to_bkey(t, j - 1);
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci			while (k < cacheline_to_bkey(t, j, 0))
8128c2ecf20Sopenharmony_ci				k = bkey_next(k);
8138c2ecf20Sopenharmony_ci
8148c2ecf20Sopenharmony_ci			t->prev[j] = bkey_to_cacheline_offset(t, j, k);
8158c2ecf20Sopenharmony_ci		}
8168c2ecf20Sopenharmony_ci	}
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci	if (t->size == b->set->tree + btree_keys_cachelines(b) - t->tree)
8198c2ecf20Sopenharmony_ci		return;
8208c2ecf20Sopenharmony_ci
8218c2ecf20Sopenharmony_ci	/* Possibly add a new entry to the end of the lookup table */
8228c2ecf20Sopenharmony_ci
8238c2ecf20Sopenharmony_ci	for (k = table_to_bkey(t, t->size - 1);
8248c2ecf20Sopenharmony_ci	     k != bset_bkey_last(t->data);
8258c2ecf20Sopenharmony_ci	     k = bkey_next(k))
8268c2ecf20Sopenharmony_ci		if (t->size == bkey_to_cacheline(t, k)) {
8278c2ecf20Sopenharmony_ci			t->prev[t->size] =
8288c2ecf20Sopenharmony_ci				bkey_to_cacheline_offset(t, t->size, k);
8298c2ecf20Sopenharmony_ci			t->size++;
8308c2ecf20Sopenharmony_ci		}
8318c2ecf20Sopenharmony_ci}
8328c2ecf20Sopenharmony_ci
8338c2ecf20Sopenharmony_ci/*
8348c2ecf20Sopenharmony_ci * Tries to merge l and r: l should be lower than r
8358c2ecf20Sopenharmony_ci * Returns true if we were able to merge. If we did merge, l will be the merged
8368c2ecf20Sopenharmony_ci * key, r will be untouched.
8378c2ecf20Sopenharmony_ci */
8388c2ecf20Sopenharmony_cibool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r)
8398c2ecf20Sopenharmony_ci{
8408c2ecf20Sopenharmony_ci	if (!b->ops->key_merge)
8418c2ecf20Sopenharmony_ci		return false;
8428c2ecf20Sopenharmony_ci
8438c2ecf20Sopenharmony_ci	/*
8448c2ecf20Sopenharmony_ci	 * Generic header checks
8458c2ecf20Sopenharmony_ci	 * Assumes left and right are in order
8468c2ecf20Sopenharmony_ci	 * Left and right must be exactly aligned
8478c2ecf20Sopenharmony_ci	 */
8488c2ecf20Sopenharmony_ci	if (!bch_bkey_equal_header(l, r) ||
8498c2ecf20Sopenharmony_ci	     bkey_cmp(l, &START_KEY(r)))
8508c2ecf20Sopenharmony_ci		return false;
8518c2ecf20Sopenharmony_ci
8528c2ecf20Sopenharmony_ci	return b->ops->key_merge(b, l, r);
8538c2ecf20Sopenharmony_ci}
8548c2ecf20Sopenharmony_ci
8558c2ecf20Sopenharmony_civoid bch_bset_insert(struct btree_keys *b, struct bkey *where,
8568c2ecf20Sopenharmony_ci		     struct bkey *insert)
8578c2ecf20Sopenharmony_ci{
8588c2ecf20Sopenharmony_ci	struct bset_tree *t = bset_tree_last(b);
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci	BUG_ON(!b->last_set_unwritten);
8618c2ecf20Sopenharmony_ci	BUG_ON(bset_byte_offset(b, t->data) +
8628c2ecf20Sopenharmony_ci	       __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) >
8638c2ecf20Sopenharmony_ci	       PAGE_SIZE << b->page_order);
8648c2ecf20Sopenharmony_ci
8658c2ecf20Sopenharmony_ci	memmove((uint64_t *) where + bkey_u64s(insert),
8668c2ecf20Sopenharmony_ci		where,
8678c2ecf20Sopenharmony_ci		(void *) bset_bkey_last(t->data) - (void *) where);
8688c2ecf20Sopenharmony_ci
8698c2ecf20Sopenharmony_ci	t->data->keys += bkey_u64s(insert);
8708c2ecf20Sopenharmony_ci	bkey_copy(where, insert);
8718c2ecf20Sopenharmony_ci	bch_bset_fix_lookup_table(b, t, where);
8728c2ecf20Sopenharmony_ci}
8738c2ecf20Sopenharmony_ci
8748c2ecf20Sopenharmony_ciunsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
8758c2ecf20Sopenharmony_ci			      struct bkey *replace_key)
8768c2ecf20Sopenharmony_ci{
8778c2ecf20Sopenharmony_ci	unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
8788c2ecf20Sopenharmony_ci	struct bset *i = bset_tree_last(b)->data;
8798c2ecf20Sopenharmony_ci	struct bkey *m, *prev = NULL;
8808c2ecf20Sopenharmony_ci	struct btree_iter iter;
8818c2ecf20Sopenharmony_ci	struct bkey preceding_key_on_stack = ZERO_KEY;
8828c2ecf20Sopenharmony_ci	struct bkey *preceding_key_p = &preceding_key_on_stack;
8838c2ecf20Sopenharmony_ci
8848c2ecf20Sopenharmony_ci	BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
8858c2ecf20Sopenharmony_ci
8868c2ecf20Sopenharmony_ci	/*
8878c2ecf20Sopenharmony_ci	 * If k has preceding key, preceding_key_p will be set to address
8888c2ecf20Sopenharmony_ci	 *  of k's preceding key; otherwise preceding_key_p will be set
8898c2ecf20Sopenharmony_ci	 * to NULL inside preceding_key().
8908c2ecf20Sopenharmony_ci	 */
8918c2ecf20Sopenharmony_ci	if (b->ops->is_extents)
8928c2ecf20Sopenharmony_ci		preceding_key(&START_KEY(k), &preceding_key_p);
8938c2ecf20Sopenharmony_ci	else
8948c2ecf20Sopenharmony_ci		preceding_key(k, &preceding_key_p);
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci	m = bch_btree_iter_init(b, &iter, preceding_key_p);
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_ci	if (b->ops->insert_fixup(b, k, &iter, replace_key))
8998c2ecf20Sopenharmony_ci		return status;
9008c2ecf20Sopenharmony_ci
9018c2ecf20Sopenharmony_ci	status = BTREE_INSERT_STATUS_INSERT;
9028c2ecf20Sopenharmony_ci
9038c2ecf20Sopenharmony_ci	while (m != bset_bkey_last(i) &&
9048c2ecf20Sopenharmony_ci	       bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0)
9058c2ecf20Sopenharmony_ci		prev = m, m = bkey_next(m);
9068c2ecf20Sopenharmony_ci
9078c2ecf20Sopenharmony_ci	/* prev is in the tree, if we merge we're done */
9088c2ecf20Sopenharmony_ci	status = BTREE_INSERT_STATUS_BACK_MERGE;
9098c2ecf20Sopenharmony_ci	if (prev &&
9108c2ecf20Sopenharmony_ci	    bch_bkey_try_merge(b, prev, k))
9118c2ecf20Sopenharmony_ci		goto merged;
9128c2ecf20Sopenharmony_ci#if 0
9138c2ecf20Sopenharmony_ci	status = BTREE_INSERT_STATUS_OVERWROTE;
9148c2ecf20Sopenharmony_ci	if (m != bset_bkey_last(i) &&
9158c2ecf20Sopenharmony_ci	    KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
9168c2ecf20Sopenharmony_ci		goto copy;
9178c2ecf20Sopenharmony_ci#endif
9188c2ecf20Sopenharmony_ci	status = BTREE_INSERT_STATUS_FRONT_MERGE;
9198c2ecf20Sopenharmony_ci	if (m != bset_bkey_last(i) &&
9208c2ecf20Sopenharmony_ci	    bch_bkey_try_merge(b, k, m))
9218c2ecf20Sopenharmony_ci		goto copy;
9228c2ecf20Sopenharmony_ci
9238c2ecf20Sopenharmony_ci	bch_bset_insert(b, m, k);
9248c2ecf20Sopenharmony_cicopy:	bkey_copy(m, k);
9258c2ecf20Sopenharmony_cimerged:
9268c2ecf20Sopenharmony_ci	return status;
9278c2ecf20Sopenharmony_ci}
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ci/* Lookup */
9308c2ecf20Sopenharmony_ci
9318c2ecf20Sopenharmony_cistruct bset_search_iter {
9328c2ecf20Sopenharmony_ci	struct bkey *l, *r;
9338c2ecf20Sopenharmony_ci};
9348c2ecf20Sopenharmony_ci
9358c2ecf20Sopenharmony_cistatic struct bset_search_iter bset_search_write_set(struct bset_tree *t,
9368c2ecf20Sopenharmony_ci						     const struct bkey *search)
9378c2ecf20Sopenharmony_ci{
9388c2ecf20Sopenharmony_ci	unsigned int li = 0, ri = t->size;
9398c2ecf20Sopenharmony_ci
9408c2ecf20Sopenharmony_ci	while (li + 1 != ri) {
9418c2ecf20Sopenharmony_ci		unsigned int m = (li + ri) >> 1;
9428c2ecf20Sopenharmony_ci
9438c2ecf20Sopenharmony_ci		if (bkey_cmp(table_to_bkey(t, m), search) > 0)
9448c2ecf20Sopenharmony_ci			ri = m;
9458c2ecf20Sopenharmony_ci		else
9468c2ecf20Sopenharmony_ci			li = m;
9478c2ecf20Sopenharmony_ci	}
9488c2ecf20Sopenharmony_ci
9498c2ecf20Sopenharmony_ci	return (struct bset_search_iter) {
9508c2ecf20Sopenharmony_ci		table_to_bkey(t, li),
9518c2ecf20Sopenharmony_ci		ri < t->size ? table_to_bkey(t, ri) : bset_bkey_last(t->data)
9528c2ecf20Sopenharmony_ci	};
9538c2ecf20Sopenharmony_ci}
9548c2ecf20Sopenharmony_ci
9558c2ecf20Sopenharmony_cistatic struct bset_search_iter bset_search_tree(struct bset_tree *t,
9568c2ecf20Sopenharmony_ci						const struct bkey *search)
9578c2ecf20Sopenharmony_ci{
9588c2ecf20Sopenharmony_ci	struct bkey *l, *r;
9598c2ecf20Sopenharmony_ci	struct bkey_float *f;
9608c2ecf20Sopenharmony_ci	unsigned int inorder, j, n = 1;
9618c2ecf20Sopenharmony_ci
9628c2ecf20Sopenharmony_ci	do {
9638c2ecf20Sopenharmony_ci		unsigned int p = n << 4;
9648c2ecf20Sopenharmony_ci
9658c2ecf20Sopenharmony_ci		if (p < t->size)
9668c2ecf20Sopenharmony_ci			prefetch(&t->tree[p]);
9678c2ecf20Sopenharmony_ci
9688c2ecf20Sopenharmony_ci		j = n;
9698c2ecf20Sopenharmony_ci		f = &t->tree[j];
9708c2ecf20Sopenharmony_ci
9718c2ecf20Sopenharmony_ci		if (likely(f->exponent != 127)) {
9728c2ecf20Sopenharmony_ci			if (f->mantissa >= bfloat_mantissa(search, f))
9738c2ecf20Sopenharmony_ci				n = j * 2;
9748c2ecf20Sopenharmony_ci			else
9758c2ecf20Sopenharmony_ci				n = j * 2 + 1;
9768c2ecf20Sopenharmony_ci		} else {
9778c2ecf20Sopenharmony_ci			if (bkey_cmp(tree_to_bkey(t, j), search) > 0)
9788c2ecf20Sopenharmony_ci				n = j * 2;
9798c2ecf20Sopenharmony_ci			else
9808c2ecf20Sopenharmony_ci				n = j * 2 + 1;
9818c2ecf20Sopenharmony_ci		}
9828c2ecf20Sopenharmony_ci	} while (n < t->size);
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_ci	inorder = to_inorder(j, t);
9858c2ecf20Sopenharmony_ci
9868c2ecf20Sopenharmony_ci	/*
9878c2ecf20Sopenharmony_ci	 * n would have been the node we recursed to - the low bit tells us if
9888c2ecf20Sopenharmony_ci	 * we recursed left or recursed right.
9898c2ecf20Sopenharmony_ci	 */
9908c2ecf20Sopenharmony_ci	if (n & 1) {
9918c2ecf20Sopenharmony_ci		l = cacheline_to_bkey(t, inorder, f->m);
9928c2ecf20Sopenharmony_ci
9938c2ecf20Sopenharmony_ci		if (++inorder != t->size) {
9948c2ecf20Sopenharmony_ci			f = &t->tree[inorder_next(j, t->size)];
9958c2ecf20Sopenharmony_ci			r = cacheline_to_bkey(t, inorder, f->m);
9968c2ecf20Sopenharmony_ci		} else
9978c2ecf20Sopenharmony_ci			r = bset_bkey_last(t->data);
9988c2ecf20Sopenharmony_ci	} else {
9998c2ecf20Sopenharmony_ci		r = cacheline_to_bkey(t, inorder, f->m);
10008c2ecf20Sopenharmony_ci
10018c2ecf20Sopenharmony_ci		if (--inorder) {
10028c2ecf20Sopenharmony_ci			f = &t->tree[inorder_prev(j, t->size)];
10038c2ecf20Sopenharmony_ci			l = cacheline_to_bkey(t, inorder, f->m);
10048c2ecf20Sopenharmony_ci		} else
10058c2ecf20Sopenharmony_ci			l = t->data->start;
10068c2ecf20Sopenharmony_ci	}
10078c2ecf20Sopenharmony_ci
10088c2ecf20Sopenharmony_ci	return (struct bset_search_iter) {l, r};
10098c2ecf20Sopenharmony_ci}
10108c2ecf20Sopenharmony_ci
10118c2ecf20Sopenharmony_cistruct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
10128c2ecf20Sopenharmony_ci			       const struct bkey *search)
10138c2ecf20Sopenharmony_ci{
10148c2ecf20Sopenharmony_ci	struct bset_search_iter i;
10158c2ecf20Sopenharmony_ci
10168c2ecf20Sopenharmony_ci	/*
10178c2ecf20Sopenharmony_ci	 * First, we search for a cacheline, then lastly we do a linear search
10188c2ecf20Sopenharmony_ci	 * within that cacheline.
10198c2ecf20Sopenharmony_ci	 *
10208c2ecf20Sopenharmony_ci	 * To search for the cacheline, there's three different possibilities:
10218c2ecf20Sopenharmony_ci	 *  * The set is too small to have a search tree, so we just do a linear
10228c2ecf20Sopenharmony_ci	 *    search over the whole set.
10238c2ecf20Sopenharmony_ci	 *  * The set is the one we're currently inserting into; keeping a full
10248c2ecf20Sopenharmony_ci	 *    auxiliary search tree up to date would be too expensive, so we
10258c2ecf20Sopenharmony_ci	 *    use a much simpler lookup table to do a binary search -
10268c2ecf20Sopenharmony_ci	 *    bset_search_write_set().
10278c2ecf20Sopenharmony_ci	 *  * Or we use the auxiliary search tree we constructed earlier -
10288c2ecf20Sopenharmony_ci	 *    bset_search_tree()
10298c2ecf20Sopenharmony_ci	 */
10308c2ecf20Sopenharmony_ci
10318c2ecf20Sopenharmony_ci	if (unlikely(!t->size)) {
10328c2ecf20Sopenharmony_ci		i.l = t->data->start;
10338c2ecf20Sopenharmony_ci		i.r = bset_bkey_last(t->data);
10348c2ecf20Sopenharmony_ci	} else if (bset_written(b, t)) {
10358c2ecf20Sopenharmony_ci		/*
10368c2ecf20Sopenharmony_ci		 * Each node in the auxiliary search tree covers a certain range
10378c2ecf20Sopenharmony_ci		 * of bits, and keys above and below the set it covers might
10388c2ecf20Sopenharmony_ci		 * differ outside those bits - so we have to special case the
10398c2ecf20Sopenharmony_ci		 * start and end - handle that here:
10408c2ecf20Sopenharmony_ci		 */
10418c2ecf20Sopenharmony_ci
10428c2ecf20Sopenharmony_ci		if (unlikely(bkey_cmp(search, &t->end) >= 0))
10438c2ecf20Sopenharmony_ci			return bset_bkey_last(t->data);
10448c2ecf20Sopenharmony_ci
10458c2ecf20Sopenharmony_ci		if (unlikely(bkey_cmp(search, t->data->start) < 0))
10468c2ecf20Sopenharmony_ci			return t->data->start;
10478c2ecf20Sopenharmony_ci
10488c2ecf20Sopenharmony_ci		i = bset_search_tree(t, search);
10498c2ecf20Sopenharmony_ci	} else {
10508c2ecf20Sopenharmony_ci		BUG_ON(!b->nsets &&
10518c2ecf20Sopenharmony_ci		       t->size < bkey_to_cacheline(t, bset_bkey_last(t->data)));
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci		i = bset_search_write_set(t, search);
10548c2ecf20Sopenharmony_ci	}
10558c2ecf20Sopenharmony_ci
10568c2ecf20Sopenharmony_ci	if (btree_keys_expensive_checks(b)) {
10578c2ecf20Sopenharmony_ci		BUG_ON(bset_written(b, t) &&
10588c2ecf20Sopenharmony_ci		       i.l != t->data->start &&
10598c2ecf20Sopenharmony_ci		       bkey_cmp(tree_to_prev_bkey(t,
10608c2ecf20Sopenharmony_ci			  inorder_to_tree(bkey_to_cacheline(t, i.l), t)),
10618c2ecf20Sopenharmony_ci				search) > 0);
10628c2ecf20Sopenharmony_ci
10638c2ecf20Sopenharmony_ci		BUG_ON(i.r != bset_bkey_last(t->data) &&
10648c2ecf20Sopenharmony_ci		       bkey_cmp(i.r, search) <= 0);
10658c2ecf20Sopenharmony_ci	}
10668c2ecf20Sopenharmony_ci
10678c2ecf20Sopenharmony_ci	while (likely(i.l != i.r) &&
10688c2ecf20Sopenharmony_ci	       bkey_cmp(i.l, search) <= 0)
10698c2ecf20Sopenharmony_ci		i.l = bkey_next(i.l);
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci	return i.l;
10728c2ecf20Sopenharmony_ci}
10738c2ecf20Sopenharmony_ci
10748c2ecf20Sopenharmony_ci/* Btree iterator */
10758c2ecf20Sopenharmony_ci
10768c2ecf20Sopenharmony_citypedef bool (btree_iter_cmp_fn)(struct btree_iter_set,
10778c2ecf20Sopenharmony_ci				 struct btree_iter_set);
10788c2ecf20Sopenharmony_ci
10798c2ecf20Sopenharmony_cistatic inline bool btree_iter_cmp(struct btree_iter_set l,
10808c2ecf20Sopenharmony_ci				  struct btree_iter_set r)
10818c2ecf20Sopenharmony_ci{
10828c2ecf20Sopenharmony_ci	return bkey_cmp(l.k, r.k) > 0;
10838c2ecf20Sopenharmony_ci}
10848c2ecf20Sopenharmony_ci
10858c2ecf20Sopenharmony_cistatic inline bool btree_iter_end(struct btree_iter *iter)
10868c2ecf20Sopenharmony_ci{
10878c2ecf20Sopenharmony_ci	return !iter->used;
10888c2ecf20Sopenharmony_ci}
10898c2ecf20Sopenharmony_ci
10908c2ecf20Sopenharmony_civoid bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
10918c2ecf20Sopenharmony_ci			 struct bkey *end)
10928c2ecf20Sopenharmony_ci{
10938c2ecf20Sopenharmony_ci	if (k != end)
10948c2ecf20Sopenharmony_ci		BUG_ON(!heap_add(iter,
10958c2ecf20Sopenharmony_ci				 ((struct btree_iter_set) { k, end }),
10968c2ecf20Sopenharmony_ci				 btree_iter_cmp));
10978c2ecf20Sopenharmony_ci}
10988c2ecf20Sopenharmony_ci
10998c2ecf20Sopenharmony_cistatic struct bkey *__bch_btree_iter_init(struct btree_keys *b,
11008c2ecf20Sopenharmony_ci					  struct btree_iter *iter,
11018c2ecf20Sopenharmony_ci					  struct bkey *search,
11028c2ecf20Sopenharmony_ci					  struct bset_tree *start)
11038c2ecf20Sopenharmony_ci{
11048c2ecf20Sopenharmony_ci	struct bkey *ret = NULL;
11058c2ecf20Sopenharmony_ci
11068c2ecf20Sopenharmony_ci	iter->size = ARRAY_SIZE(iter->data);
11078c2ecf20Sopenharmony_ci	iter->used = 0;
11088c2ecf20Sopenharmony_ci
11098c2ecf20Sopenharmony_ci#ifdef CONFIG_BCACHE_DEBUG
11108c2ecf20Sopenharmony_ci	iter->b = b;
11118c2ecf20Sopenharmony_ci#endif
11128c2ecf20Sopenharmony_ci
11138c2ecf20Sopenharmony_ci	for (; start <= bset_tree_last(b); start++) {
11148c2ecf20Sopenharmony_ci		ret = bch_bset_search(b, start, search);
11158c2ecf20Sopenharmony_ci		bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
11168c2ecf20Sopenharmony_ci	}
11178c2ecf20Sopenharmony_ci
11188c2ecf20Sopenharmony_ci	return ret;
11198c2ecf20Sopenharmony_ci}
11208c2ecf20Sopenharmony_ci
11218c2ecf20Sopenharmony_cistruct bkey *bch_btree_iter_init(struct btree_keys *b,
11228c2ecf20Sopenharmony_ci				 struct btree_iter *iter,
11238c2ecf20Sopenharmony_ci				 struct bkey *search)
11248c2ecf20Sopenharmony_ci{
11258c2ecf20Sopenharmony_ci	return __bch_btree_iter_init(b, iter, search, b->set);
11268c2ecf20Sopenharmony_ci}
11278c2ecf20Sopenharmony_ci
11288c2ecf20Sopenharmony_cistatic inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
11298c2ecf20Sopenharmony_ci						 btree_iter_cmp_fn *cmp)
11308c2ecf20Sopenharmony_ci{
11318c2ecf20Sopenharmony_ci	struct btree_iter_set b __maybe_unused;
11328c2ecf20Sopenharmony_ci	struct bkey *ret = NULL;
11338c2ecf20Sopenharmony_ci
11348c2ecf20Sopenharmony_ci	if (!btree_iter_end(iter)) {
11358c2ecf20Sopenharmony_ci		bch_btree_iter_next_check(iter);
11368c2ecf20Sopenharmony_ci
11378c2ecf20Sopenharmony_ci		ret = iter->data->k;
11388c2ecf20Sopenharmony_ci		iter->data->k = bkey_next(iter->data->k);
11398c2ecf20Sopenharmony_ci
11408c2ecf20Sopenharmony_ci		if (iter->data->k > iter->data->end) {
11418c2ecf20Sopenharmony_ci			WARN_ONCE(1, "bset was corrupt!\n");
11428c2ecf20Sopenharmony_ci			iter->data->k = iter->data->end;
11438c2ecf20Sopenharmony_ci		}
11448c2ecf20Sopenharmony_ci
11458c2ecf20Sopenharmony_ci		if (iter->data->k == iter->data->end)
11468c2ecf20Sopenharmony_ci			heap_pop(iter, b, cmp);
11478c2ecf20Sopenharmony_ci		else
11488c2ecf20Sopenharmony_ci			heap_sift(iter, 0, cmp);
11498c2ecf20Sopenharmony_ci	}
11508c2ecf20Sopenharmony_ci
11518c2ecf20Sopenharmony_ci	return ret;
11528c2ecf20Sopenharmony_ci}
11538c2ecf20Sopenharmony_ci
11548c2ecf20Sopenharmony_cistruct bkey *bch_btree_iter_next(struct btree_iter *iter)
11558c2ecf20Sopenharmony_ci{
11568c2ecf20Sopenharmony_ci	return __bch_btree_iter_next(iter, btree_iter_cmp);
11578c2ecf20Sopenharmony_ci
11588c2ecf20Sopenharmony_ci}
11598c2ecf20Sopenharmony_ci
11608c2ecf20Sopenharmony_cistruct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
11618c2ecf20Sopenharmony_ci					struct btree_keys *b, ptr_filter_fn fn)
11628c2ecf20Sopenharmony_ci{
11638c2ecf20Sopenharmony_ci	struct bkey *ret;
11648c2ecf20Sopenharmony_ci
11658c2ecf20Sopenharmony_ci	do {
11668c2ecf20Sopenharmony_ci		ret = bch_btree_iter_next(iter);
11678c2ecf20Sopenharmony_ci	} while (ret && fn(b, ret));
11688c2ecf20Sopenharmony_ci
11698c2ecf20Sopenharmony_ci	return ret;
11708c2ecf20Sopenharmony_ci}
11718c2ecf20Sopenharmony_ci
11728c2ecf20Sopenharmony_ci/* Mergesort */
11738c2ecf20Sopenharmony_ci
11748c2ecf20Sopenharmony_civoid bch_bset_sort_state_free(struct bset_sort_state *state)
11758c2ecf20Sopenharmony_ci{
11768c2ecf20Sopenharmony_ci	mempool_exit(&state->pool);
11778c2ecf20Sopenharmony_ci}
11788c2ecf20Sopenharmony_ci
11798c2ecf20Sopenharmony_ciint bch_bset_sort_state_init(struct bset_sort_state *state,
11808c2ecf20Sopenharmony_ci			     unsigned int page_order)
11818c2ecf20Sopenharmony_ci{
11828c2ecf20Sopenharmony_ci	spin_lock_init(&state->time.lock);
11838c2ecf20Sopenharmony_ci
11848c2ecf20Sopenharmony_ci	state->page_order = page_order;
11858c2ecf20Sopenharmony_ci	state->crit_factor = int_sqrt(1 << page_order);
11868c2ecf20Sopenharmony_ci
11878c2ecf20Sopenharmony_ci	return mempool_init_page_pool(&state->pool, 1, page_order);
11888c2ecf20Sopenharmony_ci}
11898c2ecf20Sopenharmony_ci
11908c2ecf20Sopenharmony_cistatic void btree_mergesort(struct btree_keys *b, struct bset *out,
11918c2ecf20Sopenharmony_ci			    struct btree_iter *iter,
11928c2ecf20Sopenharmony_ci			    bool fixup, bool remove_stale)
11938c2ecf20Sopenharmony_ci{
11948c2ecf20Sopenharmony_ci	int i;
11958c2ecf20Sopenharmony_ci	struct bkey *k, *last = NULL;
11968c2ecf20Sopenharmony_ci	BKEY_PADDED(k) tmp;
11978c2ecf20Sopenharmony_ci	bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale
11988c2ecf20Sopenharmony_ci		? bch_ptr_bad
11998c2ecf20Sopenharmony_ci		: bch_ptr_invalid;
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_ci	/* Heapify the iterator, using our comparison function */
12028c2ecf20Sopenharmony_ci	for (i = iter->used / 2 - 1; i >= 0; --i)
12038c2ecf20Sopenharmony_ci		heap_sift(iter, i, b->ops->sort_cmp);
12048c2ecf20Sopenharmony_ci
12058c2ecf20Sopenharmony_ci	while (!btree_iter_end(iter)) {
12068c2ecf20Sopenharmony_ci		if (b->ops->sort_fixup && fixup)
12078c2ecf20Sopenharmony_ci			k = b->ops->sort_fixup(iter, &tmp.k);
12088c2ecf20Sopenharmony_ci		else
12098c2ecf20Sopenharmony_ci			k = NULL;
12108c2ecf20Sopenharmony_ci
12118c2ecf20Sopenharmony_ci		if (!k)
12128c2ecf20Sopenharmony_ci			k = __bch_btree_iter_next(iter, b->ops->sort_cmp);
12138c2ecf20Sopenharmony_ci
12148c2ecf20Sopenharmony_ci		if (bad(b, k))
12158c2ecf20Sopenharmony_ci			continue;
12168c2ecf20Sopenharmony_ci
12178c2ecf20Sopenharmony_ci		if (!last) {
12188c2ecf20Sopenharmony_ci			last = out->start;
12198c2ecf20Sopenharmony_ci			bkey_copy(last, k);
12208c2ecf20Sopenharmony_ci		} else if (!bch_bkey_try_merge(b, last, k)) {
12218c2ecf20Sopenharmony_ci			last = bkey_next(last);
12228c2ecf20Sopenharmony_ci			bkey_copy(last, k);
12238c2ecf20Sopenharmony_ci		}
12248c2ecf20Sopenharmony_ci	}
12258c2ecf20Sopenharmony_ci
12268c2ecf20Sopenharmony_ci	out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0;
12278c2ecf20Sopenharmony_ci
12288c2ecf20Sopenharmony_ci	pr_debug("sorted %i keys\n", out->keys);
12298c2ecf20Sopenharmony_ci}
12308c2ecf20Sopenharmony_ci
12318c2ecf20Sopenharmony_cistatic void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
12328c2ecf20Sopenharmony_ci			 unsigned int start, unsigned int order, bool fixup,
12338c2ecf20Sopenharmony_ci			 struct bset_sort_state *state)
12348c2ecf20Sopenharmony_ci{
12358c2ecf20Sopenharmony_ci	uint64_t start_time;
12368c2ecf20Sopenharmony_ci	bool used_mempool = false;
12378c2ecf20Sopenharmony_ci	struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT,
12388c2ecf20Sopenharmony_ci						     order);
12398c2ecf20Sopenharmony_ci	if (!out) {
12408c2ecf20Sopenharmony_ci		struct page *outp;
12418c2ecf20Sopenharmony_ci
12428c2ecf20Sopenharmony_ci		BUG_ON(order > state->page_order);
12438c2ecf20Sopenharmony_ci
12448c2ecf20Sopenharmony_ci		outp = mempool_alloc(&state->pool, GFP_NOIO);
12458c2ecf20Sopenharmony_ci		out = page_address(outp);
12468c2ecf20Sopenharmony_ci		used_mempool = true;
12478c2ecf20Sopenharmony_ci		order = state->page_order;
12488c2ecf20Sopenharmony_ci	}
12498c2ecf20Sopenharmony_ci
12508c2ecf20Sopenharmony_ci	start_time = local_clock();
12518c2ecf20Sopenharmony_ci
12528c2ecf20Sopenharmony_ci	btree_mergesort(b, out, iter, fixup, false);
12538c2ecf20Sopenharmony_ci	b->nsets = start;
12548c2ecf20Sopenharmony_ci
12558c2ecf20Sopenharmony_ci	if (!start && order == b->page_order) {
12568c2ecf20Sopenharmony_ci		/*
12578c2ecf20Sopenharmony_ci		 * Our temporary buffer is the same size as the btree node's
12588c2ecf20Sopenharmony_ci		 * buffer, we can just swap buffers instead of doing a big
12598c2ecf20Sopenharmony_ci		 * memcpy()
12608c2ecf20Sopenharmony_ci		 *
12618c2ecf20Sopenharmony_ci		 * Don't worry event 'out' is allocated from mempool, it can
12628c2ecf20Sopenharmony_ci		 * still be swapped here. Because state->pool is a page mempool
12638c2ecf20Sopenharmony_ci		 * creaated by by mempool_init_page_pool(), which allocates
12648c2ecf20Sopenharmony_ci		 * pages by alloc_pages() indeed.
12658c2ecf20Sopenharmony_ci		 */
12668c2ecf20Sopenharmony_ci
12678c2ecf20Sopenharmony_ci		out->magic	= b->set->data->magic;
12688c2ecf20Sopenharmony_ci		out->seq	= b->set->data->seq;
12698c2ecf20Sopenharmony_ci		out->version	= b->set->data->version;
12708c2ecf20Sopenharmony_ci		swap(out, b->set->data);
12718c2ecf20Sopenharmony_ci	} else {
12728c2ecf20Sopenharmony_ci		b->set[start].data->keys = out->keys;
12738c2ecf20Sopenharmony_ci		memcpy(b->set[start].data->start, out->start,
12748c2ecf20Sopenharmony_ci		       (void *) bset_bkey_last(out) - (void *) out->start);
12758c2ecf20Sopenharmony_ci	}
12768c2ecf20Sopenharmony_ci
12778c2ecf20Sopenharmony_ci	if (used_mempool)
12788c2ecf20Sopenharmony_ci		mempool_free(virt_to_page(out), &state->pool);
12798c2ecf20Sopenharmony_ci	else
12808c2ecf20Sopenharmony_ci		free_pages((unsigned long) out, order);
12818c2ecf20Sopenharmony_ci
12828c2ecf20Sopenharmony_ci	bch_bset_build_written_tree(b);
12838c2ecf20Sopenharmony_ci
12848c2ecf20Sopenharmony_ci	if (!start)
12858c2ecf20Sopenharmony_ci		bch_time_stats_update(&state->time, start_time);
12868c2ecf20Sopenharmony_ci}
12878c2ecf20Sopenharmony_ci
12888c2ecf20Sopenharmony_civoid bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
12898c2ecf20Sopenharmony_ci			    struct bset_sort_state *state)
12908c2ecf20Sopenharmony_ci{
12918c2ecf20Sopenharmony_ci	size_t order = b->page_order, keys = 0;
12928c2ecf20Sopenharmony_ci	struct btree_iter iter;
12938c2ecf20Sopenharmony_ci	int oldsize = bch_count_data(b);
12948c2ecf20Sopenharmony_ci
12958c2ecf20Sopenharmony_ci	__bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_ci	if (start) {
12988c2ecf20Sopenharmony_ci		unsigned int i;
12998c2ecf20Sopenharmony_ci
13008c2ecf20Sopenharmony_ci		for (i = start; i <= b->nsets; i++)
13018c2ecf20Sopenharmony_ci			keys += b->set[i].data->keys;
13028c2ecf20Sopenharmony_ci
13038c2ecf20Sopenharmony_ci		order = get_order(__set_bytes(b->set->data, keys));
13048c2ecf20Sopenharmony_ci	}
13058c2ecf20Sopenharmony_ci
13068c2ecf20Sopenharmony_ci	__btree_sort(b, &iter, start, order, false, state);
13078c2ecf20Sopenharmony_ci
13088c2ecf20Sopenharmony_ci	EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
13098c2ecf20Sopenharmony_ci}
13108c2ecf20Sopenharmony_ci
13118c2ecf20Sopenharmony_civoid bch_btree_sort_and_fix_extents(struct btree_keys *b,
13128c2ecf20Sopenharmony_ci				    struct btree_iter *iter,
13138c2ecf20Sopenharmony_ci				    struct bset_sort_state *state)
13148c2ecf20Sopenharmony_ci{
13158c2ecf20Sopenharmony_ci	__btree_sort(b, iter, 0, b->page_order, true, state);
13168c2ecf20Sopenharmony_ci}
13178c2ecf20Sopenharmony_ci
13188c2ecf20Sopenharmony_civoid bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
13198c2ecf20Sopenharmony_ci			 struct bset_sort_state *state)
13208c2ecf20Sopenharmony_ci{
13218c2ecf20Sopenharmony_ci	uint64_t start_time = local_clock();
13228c2ecf20Sopenharmony_ci	struct btree_iter iter;
13238c2ecf20Sopenharmony_ci
13248c2ecf20Sopenharmony_ci	bch_btree_iter_init(b, &iter, NULL);
13258c2ecf20Sopenharmony_ci
13268c2ecf20Sopenharmony_ci	btree_mergesort(b, new->set->data, &iter, false, true);
13278c2ecf20Sopenharmony_ci
13288c2ecf20Sopenharmony_ci	bch_time_stats_update(&state->time, start_time);
13298c2ecf20Sopenharmony_ci
13308c2ecf20Sopenharmony_ci	new->set->size = 0; // XXX: why?
13318c2ecf20Sopenharmony_ci}
13328c2ecf20Sopenharmony_ci
13338c2ecf20Sopenharmony_ci#define SORT_CRIT	(4096 / sizeof(uint64_t))
13348c2ecf20Sopenharmony_ci
13358c2ecf20Sopenharmony_civoid bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
13368c2ecf20Sopenharmony_ci{
13378c2ecf20Sopenharmony_ci	unsigned int crit = SORT_CRIT;
13388c2ecf20Sopenharmony_ci	int i;
13398c2ecf20Sopenharmony_ci
13408c2ecf20Sopenharmony_ci	/* Don't sort if nothing to do */
13418c2ecf20Sopenharmony_ci	if (!b->nsets)
13428c2ecf20Sopenharmony_ci		goto out;
13438c2ecf20Sopenharmony_ci
13448c2ecf20Sopenharmony_ci	for (i = b->nsets - 1; i >= 0; --i) {
13458c2ecf20Sopenharmony_ci		crit *= state->crit_factor;
13468c2ecf20Sopenharmony_ci
13478c2ecf20Sopenharmony_ci		if (b->set[i].data->keys < crit) {
13488c2ecf20Sopenharmony_ci			bch_btree_sort_partial(b, i, state);
13498c2ecf20Sopenharmony_ci			return;
13508c2ecf20Sopenharmony_ci		}
13518c2ecf20Sopenharmony_ci	}
13528c2ecf20Sopenharmony_ci
13538c2ecf20Sopenharmony_ci	/* Sort if we'd overflow */
13548c2ecf20Sopenharmony_ci	if (b->nsets + 1 == MAX_BSETS) {
13558c2ecf20Sopenharmony_ci		bch_btree_sort(b, state);
13568c2ecf20Sopenharmony_ci		return;
13578c2ecf20Sopenharmony_ci	}
13588c2ecf20Sopenharmony_ci
13598c2ecf20Sopenharmony_ciout:
13608c2ecf20Sopenharmony_ci	bch_bset_build_written_tree(b);
13618c2ecf20Sopenharmony_ci}
13628c2ecf20Sopenharmony_ci
13638c2ecf20Sopenharmony_civoid bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
13648c2ecf20Sopenharmony_ci{
13658c2ecf20Sopenharmony_ci	unsigned int i;
13668c2ecf20Sopenharmony_ci
13678c2ecf20Sopenharmony_ci	for (i = 0; i <= b->nsets; i++) {
13688c2ecf20Sopenharmony_ci		struct bset_tree *t = &b->set[i];
13698c2ecf20Sopenharmony_ci		size_t bytes = t->data->keys * sizeof(uint64_t);
13708c2ecf20Sopenharmony_ci		size_t j;
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_ci		if (bset_written(b, t)) {
13738c2ecf20Sopenharmony_ci			stats->sets_written++;
13748c2ecf20Sopenharmony_ci			stats->bytes_written += bytes;
13758c2ecf20Sopenharmony_ci
13768c2ecf20Sopenharmony_ci			stats->floats += t->size - 1;
13778c2ecf20Sopenharmony_ci
13788c2ecf20Sopenharmony_ci			for (j = 1; j < t->size; j++)
13798c2ecf20Sopenharmony_ci				if (t->tree[j].exponent == 127)
13808c2ecf20Sopenharmony_ci					stats->failed++;
13818c2ecf20Sopenharmony_ci		} else {
13828c2ecf20Sopenharmony_ci			stats->sets_unwritten++;
13838c2ecf20Sopenharmony_ci			stats->bytes_unwritten += bytes;
13848c2ecf20Sopenharmony_ci		}
13858c2ecf20Sopenharmony_ci	}
13868c2ecf20Sopenharmony_ci}
1387