18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci
38c2ecf20Sopenharmony_ci#include <linux/ceph/ceph_debug.h>
48c2ecf20Sopenharmony_ci
58c2ecf20Sopenharmony_ci#include <linux/module.h>
68c2ecf20Sopenharmony_ci#include <linux/slab.h>
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/ceph/libceph.h>
98c2ecf20Sopenharmony_ci#include <linux/ceph/osdmap.h>
108c2ecf20Sopenharmony_ci#include <linux/ceph/decode.h>
118c2ecf20Sopenharmony_ci#include <linux/crush/hash.h>
128c2ecf20Sopenharmony_ci#include <linux/crush/mapper.h>
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_cichar *ceph_osdmap_state_str(char *str, int len, u32 state)
158c2ecf20Sopenharmony_ci{
168c2ecf20Sopenharmony_ci	if (!len)
178c2ecf20Sopenharmony_ci		return str;
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci	if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
208c2ecf20Sopenharmony_ci		snprintf(str, len, "exists, up");
218c2ecf20Sopenharmony_ci	else if (state & CEPH_OSD_EXISTS)
228c2ecf20Sopenharmony_ci		snprintf(str, len, "exists");
238c2ecf20Sopenharmony_ci	else if (state & CEPH_OSD_UP)
248c2ecf20Sopenharmony_ci		snprintf(str, len, "up");
258c2ecf20Sopenharmony_ci	else
268c2ecf20Sopenharmony_ci		snprintf(str, len, "doesn't exist");
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci	return str;
298c2ecf20Sopenharmony_ci}
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci/* maps */
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_cistatic int calc_bits_of(unsigned int t)
348c2ecf20Sopenharmony_ci{
358c2ecf20Sopenharmony_ci	int b = 0;
368c2ecf20Sopenharmony_ci	while (t) {
378c2ecf20Sopenharmony_ci		t = t >> 1;
388c2ecf20Sopenharmony_ci		b++;
398c2ecf20Sopenharmony_ci	}
408c2ecf20Sopenharmony_ci	return b;
418c2ecf20Sopenharmony_ci}
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci/*
448c2ecf20Sopenharmony_ci * the foo_mask is the smallest value 2^n-1 that is >= foo.
458c2ecf20Sopenharmony_ci */
468c2ecf20Sopenharmony_cistatic void calc_pg_masks(struct ceph_pg_pool_info *pi)
478c2ecf20Sopenharmony_ci{
488c2ecf20Sopenharmony_ci	pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
498c2ecf20Sopenharmony_ci	pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
508c2ecf20Sopenharmony_ci}
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci/*
538c2ecf20Sopenharmony_ci * decode crush map
548c2ecf20Sopenharmony_ci */
558c2ecf20Sopenharmony_cistatic int crush_decode_uniform_bucket(void **p, void *end,
568c2ecf20Sopenharmony_ci				       struct crush_bucket_uniform *b)
578c2ecf20Sopenharmony_ci{
588c2ecf20Sopenharmony_ci	dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
598c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
608c2ecf20Sopenharmony_ci	b->item_weight = ceph_decode_32(p);
618c2ecf20Sopenharmony_ci	return 0;
628c2ecf20Sopenharmony_cibad:
638c2ecf20Sopenharmony_ci	return -EINVAL;
648c2ecf20Sopenharmony_ci}
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_cistatic int crush_decode_list_bucket(void **p, void *end,
678c2ecf20Sopenharmony_ci				    struct crush_bucket_list *b)
688c2ecf20Sopenharmony_ci{
698c2ecf20Sopenharmony_ci	int j;
708c2ecf20Sopenharmony_ci	dout("crush_decode_list_bucket %p to %p\n", *p, end);
718c2ecf20Sopenharmony_ci	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
728c2ecf20Sopenharmony_ci	if (b->item_weights == NULL)
738c2ecf20Sopenharmony_ci		return -ENOMEM;
748c2ecf20Sopenharmony_ci	b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
758c2ecf20Sopenharmony_ci	if (b->sum_weights == NULL)
768c2ecf20Sopenharmony_ci		return -ENOMEM;
778c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
788c2ecf20Sopenharmony_ci	for (j = 0; j < b->h.size; j++) {
798c2ecf20Sopenharmony_ci		b->item_weights[j] = ceph_decode_32(p);
808c2ecf20Sopenharmony_ci		b->sum_weights[j] = ceph_decode_32(p);
818c2ecf20Sopenharmony_ci	}
828c2ecf20Sopenharmony_ci	return 0;
838c2ecf20Sopenharmony_cibad:
848c2ecf20Sopenharmony_ci	return -EINVAL;
858c2ecf20Sopenharmony_ci}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_cistatic int crush_decode_tree_bucket(void **p, void *end,
888c2ecf20Sopenharmony_ci				    struct crush_bucket_tree *b)
898c2ecf20Sopenharmony_ci{
908c2ecf20Sopenharmony_ci	int j;
918c2ecf20Sopenharmony_ci	dout("crush_decode_tree_bucket %p to %p\n", *p, end);
928c2ecf20Sopenharmony_ci	ceph_decode_8_safe(p, end, b->num_nodes, bad);
938c2ecf20Sopenharmony_ci	b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
948c2ecf20Sopenharmony_ci	if (b->node_weights == NULL)
958c2ecf20Sopenharmony_ci		return -ENOMEM;
968c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
978c2ecf20Sopenharmony_ci	for (j = 0; j < b->num_nodes; j++)
988c2ecf20Sopenharmony_ci		b->node_weights[j] = ceph_decode_32(p);
998c2ecf20Sopenharmony_ci	return 0;
1008c2ecf20Sopenharmony_cibad:
1018c2ecf20Sopenharmony_ci	return -EINVAL;
1028c2ecf20Sopenharmony_ci}
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_cistatic int crush_decode_straw_bucket(void **p, void *end,
1058c2ecf20Sopenharmony_ci				     struct crush_bucket_straw *b)
1068c2ecf20Sopenharmony_ci{
1078c2ecf20Sopenharmony_ci	int j;
1088c2ecf20Sopenharmony_ci	dout("crush_decode_straw_bucket %p to %p\n", *p, end);
1098c2ecf20Sopenharmony_ci	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
1108c2ecf20Sopenharmony_ci	if (b->item_weights == NULL)
1118c2ecf20Sopenharmony_ci		return -ENOMEM;
1128c2ecf20Sopenharmony_ci	b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
1138c2ecf20Sopenharmony_ci	if (b->straws == NULL)
1148c2ecf20Sopenharmony_ci		return -ENOMEM;
1158c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
1168c2ecf20Sopenharmony_ci	for (j = 0; j < b->h.size; j++) {
1178c2ecf20Sopenharmony_ci		b->item_weights[j] = ceph_decode_32(p);
1188c2ecf20Sopenharmony_ci		b->straws[j] = ceph_decode_32(p);
1198c2ecf20Sopenharmony_ci	}
1208c2ecf20Sopenharmony_ci	return 0;
1218c2ecf20Sopenharmony_cibad:
1228c2ecf20Sopenharmony_ci	return -EINVAL;
1238c2ecf20Sopenharmony_ci}
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_cistatic int crush_decode_straw2_bucket(void **p, void *end,
1268c2ecf20Sopenharmony_ci				      struct crush_bucket_straw2 *b)
1278c2ecf20Sopenharmony_ci{
1288c2ecf20Sopenharmony_ci	int j;
1298c2ecf20Sopenharmony_ci	dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
1308c2ecf20Sopenharmony_ci	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
1318c2ecf20Sopenharmony_ci	if (b->item_weights == NULL)
1328c2ecf20Sopenharmony_ci		return -ENOMEM;
1338c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
1348c2ecf20Sopenharmony_ci	for (j = 0; j < b->h.size; j++)
1358c2ecf20Sopenharmony_ci		b->item_weights[j] = ceph_decode_32(p);
1368c2ecf20Sopenharmony_ci	return 0;
1378c2ecf20Sopenharmony_cibad:
1388c2ecf20Sopenharmony_ci	return -EINVAL;
1398c2ecf20Sopenharmony_ci}
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_cistruct crush_name_node {
1428c2ecf20Sopenharmony_ci	struct rb_node cn_node;
1438c2ecf20Sopenharmony_ci	int cn_id;
1448c2ecf20Sopenharmony_ci	char cn_name[];
1458c2ecf20Sopenharmony_ci};
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_cistatic struct crush_name_node *alloc_crush_name(size_t name_len)
1488c2ecf20Sopenharmony_ci{
1498c2ecf20Sopenharmony_ci	struct crush_name_node *cn;
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	cn = kmalloc(sizeof(*cn) + name_len + 1, GFP_NOIO);
1528c2ecf20Sopenharmony_ci	if (!cn)
1538c2ecf20Sopenharmony_ci		return NULL;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	RB_CLEAR_NODE(&cn->cn_node);
1568c2ecf20Sopenharmony_ci	return cn;
1578c2ecf20Sopenharmony_ci}
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_cistatic void free_crush_name(struct crush_name_node *cn)
1608c2ecf20Sopenharmony_ci{
1618c2ecf20Sopenharmony_ci	WARN_ON(!RB_EMPTY_NODE(&cn->cn_node));
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	kfree(cn);
1648c2ecf20Sopenharmony_ci}
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ciDEFINE_RB_FUNCS(crush_name, struct crush_name_node, cn_id, cn_node)
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_cistatic int decode_crush_names(void **p, void *end, struct rb_root *root)
1698c2ecf20Sopenharmony_ci{
1708c2ecf20Sopenharmony_ci	u32 n;
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, n, e_inval);
1738c2ecf20Sopenharmony_ci	while (n--) {
1748c2ecf20Sopenharmony_ci		struct crush_name_node *cn;
1758c2ecf20Sopenharmony_ci		int id;
1768c2ecf20Sopenharmony_ci		u32 name_len;
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci		ceph_decode_32_safe(p, end, id, e_inval);
1798c2ecf20Sopenharmony_ci		ceph_decode_32_safe(p, end, name_len, e_inval);
1808c2ecf20Sopenharmony_ci		ceph_decode_need(p, end, name_len, e_inval);
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci		cn = alloc_crush_name(name_len);
1838c2ecf20Sopenharmony_ci		if (!cn)
1848c2ecf20Sopenharmony_ci			return -ENOMEM;
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci		cn->cn_id = id;
1878c2ecf20Sopenharmony_ci		memcpy(cn->cn_name, *p, name_len);
1888c2ecf20Sopenharmony_ci		cn->cn_name[name_len] = '\0';
1898c2ecf20Sopenharmony_ci		*p += name_len;
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci		if (!__insert_crush_name(root, cn)) {
1928c2ecf20Sopenharmony_ci			free_crush_name(cn);
1938c2ecf20Sopenharmony_ci			return -EEXIST;
1948c2ecf20Sopenharmony_ci		}
1958c2ecf20Sopenharmony_ci	}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci	return 0;
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_cie_inval:
2008c2ecf20Sopenharmony_ci	return -EINVAL;
2018c2ecf20Sopenharmony_ci}
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_civoid clear_crush_names(struct rb_root *root)
2048c2ecf20Sopenharmony_ci{
2058c2ecf20Sopenharmony_ci	while (!RB_EMPTY_ROOT(root)) {
2068c2ecf20Sopenharmony_ci		struct crush_name_node *cn =
2078c2ecf20Sopenharmony_ci		    rb_entry(rb_first(root), struct crush_name_node, cn_node);
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci		erase_crush_name(root, cn);
2108c2ecf20Sopenharmony_ci		free_crush_name(cn);
2118c2ecf20Sopenharmony_ci	}
2128c2ecf20Sopenharmony_ci}
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_cistatic struct crush_choose_arg_map *alloc_choose_arg_map(void)
2158c2ecf20Sopenharmony_ci{
2168c2ecf20Sopenharmony_ci	struct crush_choose_arg_map *arg_map;
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	arg_map = kzalloc(sizeof(*arg_map), GFP_NOIO);
2198c2ecf20Sopenharmony_ci	if (!arg_map)
2208c2ecf20Sopenharmony_ci		return NULL;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	RB_CLEAR_NODE(&arg_map->node);
2238c2ecf20Sopenharmony_ci	return arg_map;
2248c2ecf20Sopenharmony_ci}
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_cistatic void free_choose_arg_map(struct crush_choose_arg_map *arg_map)
2278c2ecf20Sopenharmony_ci{
2288c2ecf20Sopenharmony_ci	if (arg_map) {
2298c2ecf20Sopenharmony_ci		int i, j;
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci		WARN_ON(!RB_EMPTY_NODE(&arg_map->node));
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci		for (i = 0; i < arg_map->size; i++) {
2348c2ecf20Sopenharmony_ci			struct crush_choose_arg *arg = &arg_map->args[i];
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci			for (j = 0; j < arg->weight_set_size; j++)
2378c2ecf20Sopenharmony_ci				kfree(arg->weight_set[j].weights);
2388c2ecf20Sopenharmony_ci			kfree(arg->weight_set);
2398c2ecf20Sopenharmony_ci			kfree(arg->ids);
2408c2ecf20Sopenharmony_ci		}
2418c2ecf20Sopenharmony_ci		kfree(arg_map->args);
2428c2ecf20Sopenharmony_ci		kfree(arg_map);
2438c2ecf20Sopenharmony_ci	}
2448c2ecf20Sopenharmony_ci}
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ciDEFINE_RB_FUNCS(choose_arg_map, struct crush_choose_arg_map, choose_args_index,
2478c2ecf20Sopenharmony_ci		node);
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_civoid clear_choose_args(struct crush_map *c)
2508c2ecf20Sopenharmony_ci{
2518c2ecf20Sopenharmony_ci	while (!RB_EMPTY_ROOT(&c->choose_args)) {
2528c2ecf20Sopenharmony_ci		struct crush_choose_arg_map *arg_map =
2538c2ecf20Sopenharmony_ci		    rb_entry(rb_first(&c->choose_args),
2548c2ecf20Sopenharmony_ci			     struct crush_choose_arg_map, node);
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci		erase_choose_arg_map(&c->choose_args, arg_map);
2578c2ecf20Sopenharmony_ci		free_choose_arg_map(arg_map);
2588c2ecf20Sopenharmony_ci	}
2598c2ecf20Sopenharmony_ci}
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_cistatic u32 *decode_array_32_alloc(void **p, void *end, u32 *plen)
2628c2ecf20Sopenharmony_ci{
2638c2ecf20Sopenharmony_ci	u32 *a = NULL;
2648c2ecf20Sopenharmony_ci	u32 len;
2658c2ecf20Sopenharmony_ci	int ret;
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
2688c2ecf20Sopenharmony_ci	if (len) {
2698c2ecf20Sopenharmony_ci		u32 i;
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci		a = kmalloc_array(len, sizeof(u32), GFP_NOIO);
2728c2ecf20Sopenharmony_ci		if (!a) {
2738c2ecf20Sopenharmony_ci			ret = -ENOMEM;
2748c2ecf20Sopenharmony_ci			goto fail;
2758c2ecf20Sopenharmony_ci		}
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci		ceph_decode_need(p, end, len * sizeof(u32), e_inval);
2788c2ecf20Sopenharmony_ci		for (i = 0; i < len; i++)
2798c2ecf20Sopenharmony_ci			a[i] = ceph_decode_32(p);
2808c2ecf20Sopenharmony_ci	}
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	*plen = len;
2838c2ecf20Sopenharmony_ci	return a;
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_cie_inval:
2868c2ecf20Sopenharmony_ci	ret = -EINVAL;
2878c2ecf20Sopenharmony_cifail:
2888c2ecf20Sopenharmony_ci	kfree(a);
2898c2ecf20Sopenharmony_ci	return ERR_PTR(ret);
2908c2ecf20Sopenharmony_ci}
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci/*
2938c2ecf20Sopenharmony_ci * Assumes @arg is zero-initialized.
2948c2ecf20Sopenharmony_ci */
2958c2ecf20Sopenharmony_cistatic int decode_choose_arg(void **p, void *end, struct crush_choose_arg *arg)
2968c2ecf20Sopenharmony_ci{
2978c2ecf20Sopenharmony_ci	int ret;
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, arg->weight_set_size, e_inval);
3008c2ecf20Sopenharmony_ci	if (arg->weight_set_size) {
3018c2ecf20Sopenharmony_ci		u32 i;
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci		arg->weight_set = kmalloc_array(arg->weight_set_size,
3048c2ecf20Sopenharmony_ci						sizeof(*arg->weight_set),
3058c2ecf20Sopenharmony_ci						GFP_NOIO);
3068c2ecf20Sopenharmony_ci		if (!arg->weight_set)
3078c2ecf20Sopenharmony_ci			return -ENOMEM;
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci		for (i = 0; i < arg->weight_set_size; i++) {
3108c2ecf20Sopenharmony_ci			struct crush_weight_set *w = &arg->weight_set[i];
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci			w->weights = decode_array_32_alloc(p, end, &w->size);
3138c2ecf20Sopenharmony_ci			if (IS_ERR(w->weights)) {
3148c2ecf20Sopenharmony_ci				ret = PTR_ERR(w->weights);
3158c2ecf20Sopenharmony_ci				w->weights = NULL;
3168c2ecf20Sopenharmony_ci				return ret;
3178c2ecf20Sopenharmony_ci			}
3188c2ecf20Sopenharmony_ci		}
3198c2ecf20Sopenharmony_ci	}
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	arg->ids = decode_array_32_alloc(p, end, &arg->ids_size);
3228c2ecf20Sopenharmony_ci	if (IS_ERR(arg->ids)) {
3238c2ecf20Sopenharmony_ci		ret = PTR_ERR(arg->ids);
3248c2ecf20Sopenharmony_ci		arg->ids = NULL;
3258c2ecf20Sopenharmony_ci		return ret;
3268c2ecf20Sopenharmony_ci	}
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci	return 0;
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_cie_inval:
3318c2ecf20Sopenharmony_ci	return -EINVAL;
3328c2ecf20Sopenharmony_ci}
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_cistatic int decode_choose_args(void **p, void *end, struct crush_map *c)
3358c2ecf20Sopenharmony_ci{
3368c2ecf20Sopenharmony_ci	struct crush_choose_arg_map *arg_map = NULL;
3378c2ecf20Sopenharmony_ci	u32 num_choose_arg_maps, num_buckets;
3388c2ecf20Sopenharmony_ci	int ret;
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, num_choose_arg_maps, e_inval);
3418c2ecf20Sopenharmony_ci	while (num_choose_arg_maps--) {
3428c2ecf20Sopenharmony_ci		arg_map = alloc_choose_arg_map();
3438c2ecf20Sopenharmony_ci		if (!arg_map) {
3448c2ecf20Sopenharmony_ci			ret = -ENOMEM;
3458c2ecf20Sopenharmony_ci			goto fail;
3468c2ecf20Sopenharmony_ci		}
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci		ceph_decode_64_safe(p, end, arg_map->choose_args_index,
3498c2ecf20Sopenharmony_ci				    e_inval);
3508c2ecf20Sopenharmony_ci		arg_map->size = c->max_buckets;
3518c2ecf20Sopenharmony_ci		arg_map->args = kcalloc(arg_map->size, sizeof(*arg_map->args),
3528c2ecf20Sopenharmony_ci					GFP_NOIO);
3538c2ecf20Sopenharmony_ci		if (!arg_map->args) {
3548c2ecf20Sopenharmony_ci			ret = -ENOMEM;
3558c2ecf20Sopenharmony_ci			goto fail;
3568c2ecf20Sopenharmony_ci		}
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_ci		ceph_decode_32_safe(p, end, num_buckets, e_inval);
3598c2ecf20Sopenharmony_ci		while (num_buckets--) {
3608c2ecf20Sopenharmony_ci			struct crush_choose_arg *arg;
3618c2ecf20Sopenharmony_ci			u32 bucket_index;
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci			ceph_decode_32_safe(p, end, bucket_index, e_inval);
3648c2ecf20Sopenharmony_ci			if (bucket_index >= arg_map->size)
3658c2ecf20Sopenharmony_ci				goto e_inval;
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci			arg = &arg_map->args[bucket_index];
3688c2ecf20Sopenharmony_ci			ret = decode_choose_arg(p, end, arg);
3698c2ecf20Sopenharmony_ci			if (ret)
3708c2ecf20Sopenharmony_ci				goto fail;
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci			if (arg->ids_size &&
3738c2ecf20Sopenharmony_ci			    arg->ids_size != c->buckets[bucket_index]->size)
3748c2ecf20Sopenharmony_ci				goto e_inval;
3758c2ecf20Sopenharmony_ci		}
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci		insert_choose_arg_map(&c->choose_args, arg_map);
3788c2ecf20Sopenharmony_ci	}
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	return 0;
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_cie_inval:
3838c2ecf20Sopenharmony_ci	ret = -EINVAL;
3848c2ecf20Sopenharmony_cifail:
3858c2ecf20Sopenharmony_ci	free_choose_arg_map(arg_map);
3868c2ecf20Sopenharmony_ci	return ret;
3878c2ecf20Sopenharmony_ci}
3888c2ecf20Sopenharmony_ci
3898c2ecf20Sopenharmony_cistatic void crush_finalize(struct crush_map *c)
3908c2ecf20Sopenharmony_ci{
3918c2ecf20Sopenharmony_ci	__s32 b;
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_ci	/* Space for the array of pointers to per-bucket workspace */
3948c2ecf20Sopenharmony_ci	c->working_size = sizeof(struct crush_work) +
3958c2ecf20Sopenharmony_ci	    c->max_buckets * sizeof(struct crush_work_bucket *);
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	for (b = 0; b < c->max_buckets; b++) {
3988c2ecf20Sopenharmony_ci		if (!c->buckets[b])
3998c2ecf20Sopenharmony_ci			continue;
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci		switch (c->buckets[b]->alg) {
4028c2ecf20Sopenharmony_ci		default:
4038c2ecf20Sopenharmony_ci			/*
4048c2ecf20Sopenharmony_ci			 * The base case, permutation variables and
4058c2ecf20Sopenharmony_ci			 * the pointer to the permutation array.
4068c2ecf20Sopenharmony_ci			 */
4078c2ecf20Sopenharmony_ci			c->working_size += sizeof(struct crush_work_bucket);
4088c2ecf20Sopenharmony_ci			break;
4098c2ecf20Sopenharmony_ci		}
4108c2ecf20Sopenharmony_ci		/* Every bucket has a permutation array. */
4118c2ecf20Sopenharmony_ci		c->working_size += c->buckets[b]->size * sizeof(__u32);
4128c2ecf20Sopenharmony_ci	}
4138c2ecf20Sopenharmony_ci}
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_cistatic struct crush_map *crush_decode(void *pbyval, void *end)
4168c2ecf20Sopenharmony_ci{
4178c2ecf20Sopenharmony_ci	struct crush_map *c;
4188c2ecf20Sopenharmony_ci	int err;
4198c2ecf20Sopenharmony_ci	int i, j;
4208c2ecf20Sopenharmony_ci	void **p = &pbyval;
4218c2ecf20Sopenharmony_ci	void *start = pbyval;
4228c2ecf20Sopenharmony_ci	u32 magic;
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci	dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	c = kzalloc(sizeof(*c), GFP_NOFS);
4278c2ecf20Sopenharmony_ci	if (c == NULL)
4288c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	c->type_names = RB_ROOT;
4318c2ecf20Sopenharmony_ci	c->names = RB_ROOT;
4328c2ecf20Sopenharmony_ci	c->choose_args = RB_ROOT;
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci        /* set tunables to default values */
4358c2ecf20Sopenharmony_ci        c->choose_local_tries = 2;
4368c2ecf20Sopenharmony_ci        c->choose_local_fallback_tries = 5;
4378c2ecf20Sopenharmony_ci        c->choose_total_tries = 19;
4388c2ecf20Sopenharmony_ci	c->chooseleaf_descend_once = 0;
4398c2ecf20Sopenharmony_ci
4408c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, 4*sizeof(u32), bad);
4418c2ecf20Sopenharmony_ci	magic = ceph_decode_32(p);
4428c2ecf20Sopenharmony_ci	if (magic != CRUSH_MAGIC) {
4438c2ecf20Sopenharmony_ci		pr_err("crush_decode magic %x != current %x\n",
4448c2ecf20Sopenharmony_ci		       (unsigned int)magic, (unsigned int)CRUSH_MAGIC);
4458c2ecf20Sopenharmony_ci		goto bad;
4468c2ecf20Sopenharmony_ci	}
4478c2ecf20Sopenharmony_ci	c->max_buckets = ceph_decode_32(p);
4488c2ecf20Sopenharmony_ci	c->max_rules = ceph_decode_32(p);
4498c2ecf20Sopenharmony_ci	c->max_devices = ceph_decode_32(p);
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
4528c2ecf20Sopenharmony_ci	if (c->buckets == NULL)
4538c2ecf20Sopenharmony_ci		goto badmem;
4548c2ecf20Sopenharmony_ci	c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
4558c2ecf20Sopenharmony_ci	if (c->rules == NULL)
4568c2ecf20Sopenharmony_ci		goto badmem;
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci	/* buckets */
4598c2ecf20Sopenharmony_ci	for (i = 0; i < c->max_buckets; i++) {
4608c2ecf20Sopenharmony_ci		int size = 0;
4618c2ecf20Sopenharmony_ci		u32 alg;
4628c2ecf20Sopenharmony_ci		struct crush_bucket *b;
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_ci		ceph_decode_32_safe(p, end, alg, bad);
4658c2ecf20Sopenharmony_ci		if (alg == 0) {
4668c2ecf20Sopenharmony_ci			c->buckets[i] = NULL;
4678c2ecf20Sopenharmony_ci			continue;
4688c2ecf20Sopenharmony_ci		}
4698c2ecf20Sopenharmony_ci		dout("crush_decode bucket %d off %x %p to %p\n",
4708c2ecf20Sopenharmony_ci		     i, (int)(*p-start), *p, end);
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_ci		switch (alg) {
4738c2ecf20Sopenharmony_ci		case CRUSH_BUCKET_UNIFORM:
4748c2ecf20Sopenharmony_ci			size = sizeof(struct crush_bucket_uniform);
4758c2ecf20Sopenharmony_ci			break;
4768c2ecf20Sopenharmony_ci		case CRUSH_BUCKET_LIST:
4778c2ecf20Sopenharmony_ci			size = sizeof(struct crush_bucket_list);
4788c2ecf20Sopenharmony_ci			break;
4798c2ecf20Sopenharmony_ci		case CRUSH_BUCKET_TREE:
4808c2ecf20Sopenharmony_ci			size = sizeof(struct crush_bucket_tree);
4818c2ecf20Sopenharmony_ci			break;
4828c2ecf20Sopenharmony_ci		case CRUSH_BUCKET_STRAW:
4838c2ecf20Sopenharmony_ci			size = sizeof(struct crush_bucket_straw);
4848c2ecf20Sopenharmony_ci			break;
4858c2ecf20Sopenharmony_ci		case CRUSH_BUCKET_STRAW2:
4868c2ecf20Sopenharmony_ci			size = sizeof(struct crush_bucket_straw2);
4878c2ecf20Sopenharmony_ci			break;
4888c2ecf20Sopenharmony_ci		default:
4898c2ecf20Sopenharmony_ci			goto bad;
4908c2ecf20Sopenharmony_ci		}
4918c2ecf20Sopenharmony_ci		BUG_ON(size == 0);
4928c2ecf20Sopenharmony_ci		b = c->buckets[i] = kzalloc(size, GFP_NOFS);
4938c2ecf20Sopenharmony_ci		if (b == NULL)
4948c2ecf20Sopenharmony_ci			goto badmem;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci		ceph_decode_need(p, end, 4*sizeof(u32), bad);
4978c2ecf20Sopenharmony_ci		b->id = ceph_decode_32(p);
4988c2ecf20Sopenharmony_ci		b->type = ceph_decode_16(p);
4998c2ecf20Sopenharmony_ci		b->alg = ceph_decode_8(p);
5008c2ecf20Sopenharmony_ci		b->hash = ceph_decode_8(p);
5018c2ecf20Sopenharmony_ci		b->weight = ceph_decode_32(p);
5028c2ecf20Sopenharmony_ci		b->size = ceph_decode_32(p);
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci		dout("crush_decode bucket size %d off %x %p to %p\n",
5058c2ecf20Sopenharmony_ci		     b->size, (int)(*p-start), *p, end);
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci		b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
5088c2ecf20Sopenharmony_ci		if (b->items == NULL)
5098c2ecf20Sopenharmony_ci			goto badmem;
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci		ceph_decode_need(p, end, b->size*sizeof(u32), bad);
5128c2ecf20Sopenharmony_ci		for (j = 0; j < b->size; j++)
5138c2ecf20Sopenharmony_ci			b->items[j] = ceph_decode_32(p);
5148c2ecf20Sopenharmony_ci
5158c2ecf20Sopenharmony_ci		switch (b->alg) {
5168c2ecf20Sopenharmony_ci		case CRUSH_BUCKET_UNIFORM:
5178c2ecf20Sopenharmony_ci			err = crush_decode_uniform_bucket(p, end,
5188c2ecf20Sopenharmony_ci				  (struct crush_bucket_uniform *)b);
5198c2ecf20Sopenharmony_ci			if (err < 0)
5208c2ecf20Sopenharmony_ci				goto fail;
5218c2ecf20Sopenharmony_ci			break;
5228c2ecf20Sopenharmony_ci		case CRUSH_BUCKET_LIST:
5238c2ecf20Sopenharmony_ci			err = crush_decode_list_bucket(p, end,
5248c2ecf20Sopenharmony_ci			       (struct crush_bucket_list *)b);
5258c2ecf20Sopenharmony_ci			if (err < 0)
5268c2ecf20Sopenharmony_ci				goto fail;
5278c2ecf20Sopenharmony_ci			break;
5288c2ecf20Sopenharmony_ci		case CRUSH_BUCKET_TREE:
5298c2ecf20Sopenharmony_ci			err = crush_decode_tree_bucket(p, end,
5308c2ecf20Sopenharmony_ci				(struct crush_bucket_tree *)b);
5318c2ecf20Sopenharmony_ci			if (err < 0)
5328c2ecf20Sopenharmony_ci				goto fail;
5338c2ecf20Sopenharmony_ci			break;
5348c2ecf20Sopenharmony_ci		case CRUSH_BUCKET_STRAW:
5358c2ecf20Sopenharmony_ci			err = crush_decode_straw_bucket(p, end,
5368c2ecf20Sopenharmony_ci				(struct crush_bucket_straw *)b);
5378c2ecf20Sopenharmony_ci			if (err < 0)
5388c2ecf20Sopenharmony_ci				goto fail;
5398c2ecf20Sopenharmony_ci			break;
5408c2ecf20Sopenharmony_ci		case CRUSH_BUCKET_STRAW2:
5418c2ecf20Sopenharmony_ci			err = crush_decode_straw2_bucket(p, end,
5428c2ecf20Sopenharmony_ci				(struct crush_bucket_straw2 *)b);
5438c2ecf20Sopenharmony_ci			if (err < 0)
5448c2ecf20Sopenharmony_ci				goto fail;
5458c2ecf20Sopenharmony_ci			break;
5468c2ecf20Sopenharmony_ci		}
5478c2ecf20Sopenharmony_ci	}
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_ci	/* rules */
5508c2ecf20Sopenharmony_ci	dout("rule vec is %p\n", c->rules);
5518c2ecf20Sopenharmony_ci	for (i = 0; i < c->max_rules; i++) {
5528c2ecf20Sopenharmony_ci		u32 yes;
5538c2ecf20Sopenharmony_ci		struct crush_rule *r;
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci		ceph_decode_32_safe(p, end, yes, bad);
5568c2ecf20Sopenharmony_ci		if (!yes) {
5578c2ecf20Sopenharmony_ci			dout("crush_decode NO rule %d off %x %p to %p\n",
5588c2ecf20Sopenharmony_ci			     i, (int)(*p-start), *p, end);
5598c2ecf20Sopenharmony_ci			c->rules[i] = NULL;
5608c2ecf20Sopenharmony_ci			continue;
5618c2ecf20Sopenharmony_ci		}
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_ci		dout("crush_decode rule %d off %x %p to %p\n",
5648c2ecf20Sopenharmony_ci		     i, (int)(*p-start), *p, end);
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_ci		/* len */
5678c2ecf20Sopenharmony_ci		ceph_decode_32_safe(p, end, yes, bad);
5688c2ecf20Sopenharmony_ci#if BITS_PER_LONG == 32
5698c2ecf20Sopenharmony_ci		if (yes > (ULONG_MAX - sizeof(*r))
5708c2ecf20Sopenharmony_ci			  / sizeof(struct crush_rule_step))
5718c2ecf20Sopenharmony_ci			goto bad;
5728c2ecf20Sopenharmony_ci#endif
5738c2ecf20Sopenharmony_ci		r = kmalloc(struct_size(r, steps, yes), GFP_NOFS);
5748c2ecf20Sopenharmony_ci		c->rules[i] = r;
5758c2ecf20Sopenharmony_ci		if (r == NULL)
5768c2ecf20Sopenharmony_ci			goto badmem;
5778c2ecf20Sopenharmony_ci		dout(" rule %d is at %p\n", i, r);
5788c2ecf20Sopenharmony_ci		r->len = yes;
5798c2ecf20Sopenharmony_ci		ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
5808c2ecf20Sopenharmony_ci		ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
5818c2ecf20Sopenharmony_ci		for (j = 0; j < r->len; j++) {
5828c2ecf20Sopenharmony_ci			r->steps[j].op = ceph_decode_32(p);
5838c2ecf20Sopenharmony_ci			r->steps[j].arg1 = ceph_decode_32(p);
5848c2ecf20Sopenharmony_ci			r->steps[j].arg2 = ceph_decode_32(p);
5858c2ecf20Sopenharmony_ci		}
5868c2ecf20Sopenharmony_ci	}
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_ci	err = decode_crush_names(p, end, &c->type_names);
5898c2ecf20Sopenharmony_ci	if (err)
5908c2ecf20Sopenharmony_ci		goto fail;
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci	err = decode_crush_names(p, end, &c->names);
5938c2ecf20Sopenharmony_ci	if (err)
5948c2ecf20Sopenharmony_ci		goto fail;
5958c2ecf20Sopenharmony_ci
5968c2ecf20Sopenharmony_ci	ceph_decode_skip_map(p, end, 32, string, bad); /* rule_name_map */
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci        /* tunables */
5998c2ecf20Sopenharmony_ci        ceph_decode_need(p, end, 3*sizeof(u32), done);
6008c2ecf20Sopenharmony_ci        c->choose_local_tries = ceph_decode_32(p);
6018c2ecf20Sopenharmony_ci        c->choose_local_fallback_tries =  ceph_decode_32(p);
6028c2ecf20Sopenharmony_ci        c->choose_total_tries = ceph_decode_32(p);
6038c2ecf20Sopenharmony_ci        dout("crush decode tunable choose_local_tries = %d\n",
6048c2ecf20Sopenharmony_ci             c->choose_local_tries);
6058c2ecf20Sopenharmony_ci        dout("crush decode tunable choose_local_fallback_tries = %d\n",
6068c2ecf20Sopenharmony_ci             c->choose_local_fallback_tries);
6078c2ecf20Sopenharmony_ci        dout("crush decode tunable choose_total_tries = %d\n",
6088c2ecf20Sopenharmony_ci             c->choose_total_tries);
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, sizeof(u32), done);
6118c2ecf20Sopenharmony_ci	c->chooseleaf_descend_once = ceph_decode_32(p);
6128c2ecf20Sopenharmony_ci	dout("crush decode tunable chooseleaf_descend_once = %d\n",
6138c2ecf20Sopenharmony_ci	     c->chooseleaf_descend_once);
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, sizeof(u8), done);
6168c2ecf20Sopenharmony_ci	c->chooseleaf_vary_r = ceph_decode_8(p);
6178c2ecf20Sopenharmony_ci	dout("crush decode tunable chooseleaf_vary_r = %d\n",
6188c2ecf20Sopenharmony_ci	     c->chooseleaf_vary_r);
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	/* skip straw_calc_version, allowed_bucket_algs */
6218c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
6228c2ecf20Sopenharmony_ci	*p += sizeof(u8) + sizeof(u32);
6238c2ecf20Sopenharmony_ci
6248c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, sizeof(u8), done);
6258c2ecf20Sopenharmony_ci	c->chooseleaf_stable = ceph_decode_8(p);
6268c2ecf20Sopenharmony_ci	dout("crush decode tunable chooseleaf_stable = %d\n",
6278c2ecf20Sopenharmony_ci	     c->chooseleaf_stable);
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci	if (*p != end) {
6308c2ecf20Sopenharmony_ci		/* class_map */
6318c2ecf20Sopenharmony_ci		ceph_decode_skip_map(p, end, 32, 32, bad);
6328c2ecf20Sopenharmony_ci		/* class_name */
6338c2ecf20Sopenharmony_ci		ceph_decode_skip_map(p, end, 32, string, bad);
6348c2ecf20Sopenharmony_ci		/* class_bucket */
6358c2ecf20Sopenharmony_ci		ceph_decode_skip_map_of_map(p, end, 32, 32, 32, bad);
6368c2ecf20Sopenharmony_ci	}
6378c2ecf20Sopenharmony_ci
6388c2ecf20Sopenharmony_ci	if (*p != end) {
6398c2ecf20Sopenharmony_ci		err = decode_choose_args(p, end, c);
6408c2ecf20Sopenharmony_ci		if (err)
6418c2ecf20Sopenharmony_ci			goto fail;
6428c2ecf20Sopenharmony_ci	}
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_cidone:
6458c2ecf20Sopenharmony_ci	crush_finalize(c);
6468c2ecf20Sopenharmony_ci	dout("crush_decode success\n");
6478c2ecf20Sopenharmony_ci	return c;
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_cibadmem:
6508c2ecf20Sopenharmony_ci	err = -ENOMEM;
6518c2ecf20Sopenharmony_cifail:
6528c2ecf20Sopenharmony_ci	dout("crush_decode fail %d\n", err);
6538c2ecf20Sopenharmony_ci	crush_destroy(c);
6548c2ecf20Sopenharmony_ci	return ERR_PTR(err);
6558c2ecf20Sopenharmony_ci
6568c2ecf20Sopenharmony_cibad:
6578c2ecf20Sopenharmony_ci	err = -EINVAL;
6588c2ecf20Sopenharmony_ci	goto fail;
6598c2ecf20Sopenharmony_ci}
6608c2ecf20Sopenharmony_ci
6618c2ecf20Sopenharmony_ciint ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs)
6628c2ecf20Sopenharmony_ci{
6638c2ecf20Sopenharmony_ci	if (lhs->pool < rhs->pool)
6648c2ecf20Sopenharmony_ci		return -1;
6658c2ecf20Sopenharmony_ci	if (lhs->pool > rhs->pool)
6668c2ecf20Sopenharmony_ci		return 1;
6678c2ecf20Sopenharmony_ci	if (lhs->seed < rhs->seed)
6688c2ecf20Sopenharmony_ci		return -1;
6698c2ecf20Sopenharmony_ci	if (lhs->seed > rhs->seed)
6708c2ecf20Sopenharmony_ci		return 1;
6718c2ecf20Sopenharmony_ci
6728c2ecf20Sopenharmony_ci	return 0;
6738c2ecf20Sopenharmony_ci}
6748c2ecf20Sopenharmony_ci
6758c2ecf20Sopenharmony_ciint ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs)
6768c2ecf20Sopenharmony_ci{
6778c2ecf20Sopenharmony_ci	int ret;
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_ci	ret = ceph_pg_compare(&lhs->pgid, &rhs->pgid);
6808c2ecf20Sopenharmony_ci	if (ret)
6818c2ecf20Sopenharmony_ci		return ret;
6828c2ecf20Sopenharmony_ci
6838c2ecf20Sopenharmony_ci	if (lhs->shard < rhs->shard)
6848c2ecf20Sopenharmony_ci		return -1;
6858c2ecf20Sopenharmony_ci	if (lhs->shard > rhs->shard)
6868c2ecf20Sopenharmony_ci		return 1;
6878c2ecf20Sopenharmony_ci
6888c2ecf20Sopenharmony_ci	return 0;
6898c2ecf20Sopenharmony_ci}
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_cistatic struct ceph_pg_mapping *alloc_pg_mapping(size_t payload_len)
6928c2ecf20Sopenharmony_ci{
6938c2ecf20Sopenharmony_ci	struct ceph_pg_mapping *pg;
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci	pg = kmalloc(sizeof(*pg) + payload_len, GFP_NOIO);
6968c2ecf20Sopenharmony_ci	if (!pg)
6978c2ecf20Sopenharmony_ci		return NULL;
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_ci	RB_CLEAR_NODE(&pg->node);
7008c2ecf20Sopenharmony_ci	return pg;
7018c2ecf20Sopenharmony_ci}
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_cistatic void free_pg_mapping(struct ceph_pg_mapping *pg)
7048c2ecf20Sopenharmony_ci{
7058c2ecf20Sopenharmony_ci	WARN_ON(!RB_EMPTY_NODE(&pg->node));
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_ci	kfree(pg);
7088c2ecf20Sopenharmony_ci}
7098c2ecf20Sopenharmony_ci
7108c2ecf20Sopenharmony_ci/*
7118c2ecf20Sopenharmony_ci * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
7128c2ecf20Sopenharmony_ci * to a set of osds) and primary_temp (explicit primary setting)
7138c2ecf20Sopenharmony_ci */
7148c2ecf20Sopenharmony_ciDEFINE_RB_FUNCS2(pg_mapping, struct ceph_pg_mapping, pgid, ceph_pg_compare,
7158c2ecf20Sopenharmony_ci		 RB_BYPTR, const struct ceph_pg *, node)
7168c2ecf20Sopenharmony_ci
7178c2ecf20Sopenharmony_ci/*
7188c2ecf20Sopenharmony_ci * rbtree of pg pool info
7198c2ecf20Sopenharmony_ci */
7208c2ecf20Sopenharmony_ciDEFINE_RB_FUNCS(pg_pool, struct ceph_pg_pool_info, id, node)
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_cistruct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
7238c2ecf20Sopenharmony_ci{
7248c2ecf20Sopenharmony_ci	return lookup_pg_pool(&map->pg_pools, id);
7258c2ecf20Sopenharmony_ci}
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ciconst char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
7288c2ecf20Sopenharmony_ci{
7298c2ecf20Sopenharmony_ci	struct ceph_pg_pool_info *pi;
7308c2ecf20Sopenharmony_ci
7318c2ecf20Sopenharmony_ci	if (id == CEPH_NOPOOL)
7328c2ecf20Sopenharmony_ci		return NULL;
7338c2ecf20Sopenharmony_ci
7348c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(id > (u64) INT_MAX))
7358c2ecf20Sopenharmony_ci		return NULL;
7368c2ecf20Sopenharmony_ci
7378c2ecf20Sopenharmony_ci	pi = lookup_pg_pool(&map->pg_pools, id);
7388c2ecf20Sopenharmony_ci	return pi ? pi->name : NULL;
7398c2ecf20Sopenharmony_ci}
7408c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_pg_pool_name_by_id);
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_ciint ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
7438c2ecf20Sopenharmony_ci{
7448c2ecf20Sopenharmony_ci	struct rb_node *rbp;
7458c2ecf20Sopenharmony_ci
7468c2ecf20Sopenharmony_ci	for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
7478c2ecf20Sopenharmony_ci		struct ceph_pg_pool_info *pi =
7488c2ecf20Sopenharmony_ci			rb_entry(rbp, struct ceph_pg_pool_info, node);
7498c2ecf20Sopenharmony_ci		if (pi->name && strcmp(pi->name, name) == 0)
7508c2ecf20Sopenharmony_ci			return pi->id;
7518c2ecf20Sopenharmony_ci	}
7528c2ecf20Sopenharmony_ci	return -ENOENT;
7538c2ecf20Sopenharmony_ci}
7548c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_pg_poolid_by_name);
7558c2ecf20Sopenharmony_ci
7568c2ecf20Sopenharmony_ciu64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id)
7578c2ecf20Sopenharmony_ci{
7588c2ecf20Sopenharmony_ci	struct ceph_pg_pool_info *pi;
7598c2ecf20Sopenharmony_ci
7608c2ecf20Sopenharmony_ci	pi = lookup_pg_pool(&map->pg_pools, id);
7618c2ecf20Sopenharmony_ci	return pi ? pi->flags : 0;
7628c2ecf20Sopenharmony_ci}
7638c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_pg_pool_flags);
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_cistatic void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
7668c2ecf20Sopenharmony_ci{
7678c2ecf20Sopenharmony_ci	erase_pg_pool(root, pi);
7688c2ecf20Sopenharmony_ci	kfree(pi->name);
7698c2ecf20Sopenharmony_ci	kfree(pi);
7708c2ecf20Sopenharmony_ci}
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_cistatic int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
7738c2ecf20Sopenharmony_ci{
7748c2ecf20Sopenharmony_ci	u8 ev, cv;
7758c2ecf20Sopenharmony_ci	unsigned len, num;
7768c2ecf20Sopenharmony_ci	void *pool_end;
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, 2 + 4, bad);
7798c2ecf20Sopenharmony_ci	ev = ceph_decode_8(p);  /* encoding version */
7808c2ecf20Sopenharmony_ci	cv = ceph_decode_8(p); /* compat version */
7818c2ecf20Sopenharmony_ci	if (ev < 5) {
7828c2ecf20Sopenharmony_ci		pr_warn("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
7838c2ecf20Sopenharmony_ci		return -EINVAL;
7848c2ecf20Sopenharmony_ci	}
7858c2ecf20Sopenharmony_ci	if (cv > 9) {
7868c2ecf20Sopenharmony_ci		pr_warn("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv);
7878c2ecf20Sopenharmony_ci		return -EINVAL;
7888c2ecf20Sopenharmony_ci	}
7898c2ecf20Sopenharmony_ci	len = ceph_decode_32(p);
7908c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, len, bad);
7918c2ecf20Sopenharmony_ci	pool_end = *p + len;
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_ci	pi->type = ceph_decode_8(p);
7948c2ecf20Sopenharmony_ci	pi->size = ceph_decode_8(p);
7958c2ecf20Sopenharmony_ci	pi->crush_ruleset = ceph_decode_8(p);
7968c2ecf20Sopenharmony_ci	pi->object_hash = ceph_decode_8(p);
7978c2ecf20Sopenharmony_ci
7988c2ecf20Sopenharmony_ci	pi->pg_num = ceph_decode_32(p);
7998c2ecf20Sopenharmony_ci	pi->pgp_num = ceph_decode_32(p);
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci	*p += 4 + 4;  /* skip lpg* */
8028c2ecf20Sopenharmony_ci	*p += 4;      /* skip last_change */
8038c2ecf20Sopenharmony_ci	*p += 8 + 4;  /* skip snap_seq, snap_epoch */
8048c2ecf20Sopenharmony_ci
8058c2ecf20Sopenharmony_ci	/* skip snaps */
8068c2ecf20Sopenharmony_ci	num = ceph_decode_32(p);
8078c2ecf20Sopenharmony_ci	while (num--) {
8088c2ecf20Sopenharmony_ci		*p += 8;  /* snapid key */
8098c2ecf20Sopenharmony_ci		*p += 1 + 1; /* versions */
8108c2ecf20Sopenharmony_ci		len = ceph_decode_32(p);
8118c2ecf20Sopenharmony_ci		*p += len;
8128c2ecf20Sopenharmony_ci	}
8138c2ecf20Sopenharmony_ci
8148c2ecf20Sopenharmony_ci	/* skip removed_snaps */
8158c2ecf20Sopenharmony_ci	num = ceph_decode_32(p);
8168c2ecf20Sopenharmony_ci	*p += num * (8 + 8);
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_ci	*p += 8;  /* skip auid */
8198c2ecf20Sopenharmony_ci	pi->flags = ceph_decode_64(p);
8208c2ecf20Sopenharmony_ci	*p += 4;  /* skip crash_replay_interval */
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci	if (ev >= 7)
8238c2ecf20Sopenharmony_ci		pi->min_size = ceph_decode_8(p);
8248c2ecf20Sopenharmony_ci	else
8258c2ecf20Sopenharmony_ci		pi->min_size = pi->size - pi->size / 2;
8268c2ecf20Sopenharmony_ci
8278c2ecf20Sopenharmony_ci	if (ev >= 8)
8288c2ecf20Sopenharmony_ci		*p += 8 + 8;  /* skip quota_max_* */
8298c2ecf20Sopenharmony_ci
8308c2ecf20Sopenharmony_ci	if (ev >= 9) {
8318c2ecf20Sopenharmony_ci		/* skip tiers */
8328c2ecf20Sopenharmony_ci		num = ceph_decode_32(p);
8338c2ecf20Sopenharmony_ci		*p += num * 8;
8348c2ecf20Sopenharmony_ci
8358c2ecf20Sopenharmony_ci		*p += 8;  /* skip tier_of */
8368c2ecf20Sopenharmony_ci		*p += 1;  /* skip cache_mode */
8378c2ecf20Sopenharmony_ci
8388c2ecf20Sopenharmony_ci		pi->read_tier = ceph_decode_64(p);
8398c2ecf20Sopenharmony_ci		pi->write_tier = ceph_decode_64(p);
8408c2ecf20Sopenharmony_ci	} else {
8418c2ecf20Sopenharmony_ci		pi->read_tier = -1;
8428c2ecf20Sopenharmony_ci		pi->write_tier = -1;
8438c2ecf20Sopenharmony_ci	}
8448c2ecf20Sopenharmony_ci
8458c2ecf20Sopenharmony_ci	if (ev >= 10) {
8468c2ecf20Sopenharmony_ci		/* skip properties */
8478c2ecf20Sopenharmony_ci		num = ceph_decode_32(p);
8488c2ecf20Sopenharmony_ci		while (num--) {
8498c2ecf20Sopenharmony_ci			len = ceph_decode_32(p);
8508c2ecf20Sopenharmony_ci			*p += len; /* key */
8518c2ecf20Sopenharmony_ci			len = ceph_decode_32(p);
8528c2ecf20Sopenharmony_ci			*p += len; /* val */
8538c2ecf20Sopenharmony_ci		}
8548c2ecf20Sopenharmony_ci	}
8558c2ecf20Sopenharmony_ci
8568c2ecf20Sopenharmony_ci	if (ev >= 11) {
8578c2ecf20Sopenharmony_ci		/* skip hit_set_params */
8588c2ecf20Sopenharmony_ci		*p += 1 + 1; /* versions */
8598c2ecf20Sopenharmony_ci		len = ceph_decode_32(p);
8608c2ecf20Sopenharmony_ci		*p += len;
8618c2ecf20Sopenharmony_ci
8628c2ecf20Sopenharmony_ci		*p += 4; /* skip hit_set_period */
8638c2ecf20Sopenharmony_ci		*p += 4; /* skip hit_set_count */
8648c2ecf20Sopenharmony_ci	}
8658c2ecf20Sopenharmony_ci
8668c2ecf20Sopenharmony_ci	if (ev >= 12)
8678c2ecf20Sopenharmony_ci		*p += 4; /* skip stripe_width */
8688c2ecf20Sopenharmony_ci
8698c2ecf20Sopenharmony_ci	if (ev >= 13) {
8708c2ecf20Sopenharmony_ci		*p += 8; /* skip target_max_bytes */
8718c2ecf20Sopenharmony_ci		*p += 8; /* skip target_max_objects */
8728c2ecf20Sopenharmony_ci		*p += 4; /* skip cache_target_dirty_ratio_micro */
8738c2ecf20Sopenharmony_ci		*p += 4; /* skip cache_target_full_ratio_micro */
8748c2ecf20Sopenharmony_ci		*p += 4; /* skip cache_min_flush_age */
8758c2ecf20Sopenharmony_ci		*p += 4; /* skip cache_min_evict_age */
8768c2ecf20Sopenharmony_ci	}
8778c2ecf20Sopenharmony_ci
8788c2ecf20Sopenharmony_ci	if (ev >=  14) {
8798c2ecf20Sopenharmony_ci		/* skip erasure_code_profile */
8808c2ecf20Sopenharmony_ci		len = ceph_decode_32(p);
8818c2ecf20Sopenharmony_ci		*p += len;
8828c2ecf20Sopenharmony_ci	}
8838c2ecf20Sopenharmony_ci
8848c2ecf20Sopenharmony_ci	/*
8858c2ecf20Sopenharmony_ci	 * last_force_op_resend_preluminous, will be overridden if the
8868c2ecf20Sopenharmony_ci	 * map was encoded with RESEND_ON_SPLIT
8878c2ecf20Sopenharmony_ci	 */
8888c2ecf20Sopenharmony_ci	if (ev >= 15)
8898c2ecf20Sopenharmony_ci		pi->last_force_request_resend = ceph_decode_32(p);
8908c2ecf20Sopenharmony_ci	else
8918c2ecf20Sopenharmony_ci		pi->last_force_request_resend = 0;
8928c2ecf20Sopenharmony_ci
8938c2ecf20Sopenharmony_ci	if (ev >= 16)
8948c2ecf20Sopenharmony_ci		*p += 4; /* skip min_read_recency_for_promote */
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci	if (ev >= 17)
8978c2ecf20Sopenharmony_ci		*p += 8; /* skip expected_num_objects */
8988c2ecf20Sopenharmony_ci
8998c2ecf20Sopenharmony_ci	if (ev >= 19)
9008c2ecf20Sopenharmony_ci		*p += 4; /* skip cache_target_dirty_high_ratio_micro */
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci	if (ev >= 20)
9038c2ecf20Sopenharmony_ci		*p += 4; /* skip min_write_recency_for_promote */
9048c2ecf20Sopenharmony_ci
9058c2ecf20Sopenharmony_ci	if (ev >= 21)
9068c2ecf20Sopenharmony_ci		*p += 1; /* skip use_gmt_hitset */
9078c2ecf20Sopenharmony_ci
9088c2ecf20Sopenharmony_ci	if (ev >= 22)
9098c2ecf20Sopenharmony_ci		*p += 1; /* skip fast_read */
9108c2ecf20Sopenharmony_ci
9118c2ecf20Sopenharmony_ci	if (ev >= 23) {
9128c2ecf20Sopenharmony_ci		*p += 4; /* skip hit_set_grade_decay_rate */
9138c2ecf20Sopenharmony_ci		*p += 4; /* skip hit_set_search_last_n */
9148c2ecf20Sopenharmony_ci	}
9158c2ecf20Sopenharmony_ci
9168c2ecf20Sopenharmony_ci	if (ev >= 24) {
9178c2ecf20Sopenharmony_ci		/* skip opts */
9188c2ecf20Sopenharmony_ci		*p += 1 + 1; /* versions */
9198c2ecf20Sopenharmony_ci		len = ceph_decode_32(p);
9208c2ecf20Sopenharmony_ci		*p += len;
9218c2ecf20Sopenharmony_ci	}
9228c2ecf20Sopenharmony_ci
9238c2ecf20Sopenharmony_ci	if (ev >= 25)
9248c2ecf20Sopenharmony_ci		pi->last_force_request_resend = ceph_decode_32(p);
9258c2ecf20Sopenharmony_ci
9268c2ecf20Sopenharmony_ci	/* ignore the rest */
9278c2ecf20Sopenharmony_ci
9288c2ecf20Sopenharmony_ci	*p = pool_end;
9298c2ecf20Sopenharmony_ci	calc_pg_masks(pi);
9308c2ecf20Sopenharmony_ci	return 0;
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_cibad:
9338c2ecf20Sopenharmony_ci	return -EINVAL;
9348c2ecf20Sopenharmony_ci}
9358c2ecf20Sopenharmony_ci
9368c2ecf20Sopenharmony_cistatic int decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
9378c2ecf20Sopenharmony_ci{
9388c2ecf20Sopenharmony_ci	struct ceph_pg_pool_info *pi;
9398c2ecf20Sopenharmony_ci	u32 num, len;
9408c2ecf20Sopenharmony_ci	u64 pool;
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, num, bad);
9438c2ecf20Sopenharmony_ci	dout(" %d pool names\n", num);
9448c2ecf20Sopenharmony_ci	while (num--) {
9458c2ecf20Sopenharmony_ci		ceph_decode_64_safe(p, end, pool, bad);
9468c2ecf20Sopenharmony_ci		ceph_decode_32_safe(p, end, len, bad);
9478c2ecf20Sopenharmony_ci		dout("  pool %llu len %d\n", pool, len);
9488c2ecf20Sopenharmony_ci		ceph_decode_need(p, end, len, bad);
9498c2ecf20Sopenharmony_ci		pi = lookup_pg_pool(&map->pg_pools, pool);
9508c2ecf20Sopenharmony_ci		if (pi) {
9518c2ecf20Sopenharmony_ci			char *name = kstrndup(*p, len, GFP_NOFS);
9528c2ecf20Sopenharmony_ci
9538c2ecf20Sopenharmony_ci			if (!name)
9548c2ecf20Sopenharmony_ci				return -ENOMEM;
9558c2ecf20Sopenharmony_ci			kfree(pi->name);
9568c2ecf20Sopenharmony_ci			pi->name = name;
9578c2ecf20Sopenharmony_ci			dout("  name is %s\n", pi->name);
9588c2ecf20Sopenharmony_ci		}
9598c2ecf20Sopenharmony_ci		*p += len;
9608c2ecf20Sopenharmony_ci	}
9618c2ecf20Sopenharmony_ci	return 0;
9628c2ecf20Sopenharmony_ci
9638c2ecf20Sopenharmony_cibad:
9648c2ecf20Sopenharmony_ci	return -EINVAL;
9658c2ecf20Sopenharmony_ci}
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_ci/*
9688c2ecf20Sopenharmony_ci * CRUSH workspaces
9698c2ecf20Sopenharmony_ci *
9708c2ecf20Sopenharmony_ci * workspace_manager framework borrowed from fs/btrfs/compression.c.
9718c2ecf20Sopenharmony_ci * Two simplifications: there is only one type of workspace and there
9728c2ecf20Sopenharmony_ci * is always at least one workspace.
9738c2ecf20Sopenharmony_ci */
9748c2ecf20Sopenharmony_cistatic struct crush_work *alloc_workspace(const struct crush_map *c)
9758c2ecf20Sopenharmony_ci{
9768c2ecf20Sopenharmony_ci	struct crush_work *work;
9778c2ecf20Sopenharmony_ci	size_t work_size;
9788c2ecf20Sopenharmony_ci
9798c2ecf20Sopenharmony_ci	WARN_ON(!c->working_size);
9808c2ecf20Sopenharmony_ci	work_size = crush_work_size(c, CEPH_PG_MAX_SIZE);
9818c2ecf20Sopenharmony_ci	dout("%s work_size %zu bytes\n", __func__, work_size);
9828c2ecf20Sopenharmony_ci
9838c2ecf20Sopenharmony_ci	work = ceph_kvmalloc(work_size, GFP_NOIO);
9848c2ecf20Sopenharmony_ci	if (!work)
9858c2ecf20Sopenharmony_ci		return NULL;
9868c2ecf20Sopenharmony_ci
9878c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&work->item);
9888c2ecf20Sopenharmony_ci	crush_init_workspace(c, work);
9898c2ecf20Sopenharmony_ci	return work;
9908c2ecf20Sopenharmony_ci}
9918c2ecf20Sopenharmony_ci
9928c2ecf20Sopenharmony_cistatic void free_workspace(struct crush_work *work)
9938c2ecf20Sopenharmony_ci{
9948c2ecf20Sopenharmony_ci	WARN_ON(!list_empty(&work->item));
9958c2ecf20Sopenharmony_ci	kvfree(work);
9968c2ecf20Sopenharmony_ci}
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_cistatic void init_workspace_manager(struct workspace_manager *wsm)
9998c2ecf20Sopenharmony_ci{
10008c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&wsm->idle_ws);
10018c2ecf20Sopenharmony_ci	spin_lock_init(&wsm->ws_lock);
10028c2ecf20Sopenharmony_ci	atomic_set(&wsm->total_ws, 0);
10038c2ecf20Sopenharmony_ci	wsm->free_ws = 0;
10048c2ecf20Sopenharmony_ci	init_waitqueue_head(&wsm->ws_wait);
10058c2ecf20Sopenharmony_ci}
10068c2ecf20Sopenharmony_ci
10078c2ecf20Sopenharmony_cistatic void add_initial_workspace(struct workspace_manager *wsm,
10088c2ecf20Sopenharmony_ci				  struct crush_work *work)
10098c2ecf20Sopenharmony_ci{
10108c2ecf20Sopenharmony_ci	WARN_ON(!list_empty(&wsm->idle_ws));
10118c2ecf20Sopenharmony_ci
10128c2ecf20Sopenharmony_ci	list_add(&work->item, &wsm->idle_ws);
10138c2ecf20Sopenharmony_ci	atomic_set(&wsm->total_ws, 1);
10148c2ecf20Sopenharmony_ci	wsm->free_ws = 1;
10158c2ecf20Sopenharmony_ci}
10168c2ecf20Sopenharmony_ci
10178c2ecf20Sopenharmony_cistatic void cleanup_workspace_manager(struct workspace_manager *wsm)
10188c2ecf20Sopenharmony_ci{
10198c2ecf20Sopenharmony_ci	struct crush_work *work;
10208c2ecf20Sopenharmony_ci
10218c2ecf20Sopenharmony_ci	while (!list_empty(&wsm->idle_ws)) {
10228c2ecf20Sopenharmony_ci		work = list_first_entry(&wsm->idle_ws, struct crush_work,
10238c2ecf20Sopenharmony_ci					item);
10248c2ecf20Sopenharmony_ci		list_del_init(&work->item);
10258c2ecf20Sopenharmony_ci		free_workspace(work);
10268c2ecf20Sopenharmony_ci	}
10278c2ecf20Sopenharmony_ci	atomic_set(&wsm->total_ws, 0);
10288c2ecf20Sopenharmony_ci	wsm->free_ws = 0;
10298c2ecf20Sopenharmony_ci}
10308c2ecf20Sopenharmony_ci
10318c2ecf20Sopenharmony_ci/*
10328c2ecf20Sopenharmony_ci * Finds an available workspace or allocates a new one.  If it's not
10338c2ecf20Sopenharmony_ci * possible to allocate a new one, waits until there is one.
10348c2ecf20Sopenharmony_ci */
10358c2ecf20Sopenharmony_cistatic struct crush_work *get_workspace(struct workspace_manager *wsm,
10368c2ecf20Sopenharmony_ci					const struct crush_map *c)
10378c2ecf20Sopenharmony_ci{
10388c2ecf20Sopenharmony_ci	struct crush_work *work;
10398c2ecf20Sopenharmony_ci	int cpus = num_online_cpus();
10408c2ecf20Sopenharmony_ci
10418c2ecf20Sopenharmony_ciagain:
10428c2ecf20Sopenharmony_ci	spin_lock(&wsm->ws_lock);
10438c2ecf20Sopenharmony_ci	if (!list_empty(&wsm->idle_ws)) {
10448c2ecf20Sopenharmony_ci		work = list_first_entry(&wsm->idle_ws, struct crush_work,
10458c2ecf20Sopenharmony_ci					item);
10468c2ecf20Sopenharmony_ci		list_del_init(&work->item);
10478c2ecf20Sopenharmony_ci		wsm->free_ws--;
10488c2ecf20Sopenharmony_ci		spin_unlock(&wsm->ws_lock);
10498c2ecf20Sopenharmony_ci		return work;
10508c2ecf20Sopenharmony_ci
10518c2ecf20Sopenharmony_ci	}
10528c2ecf20Sopenharmony_ci	if (atomic_read(&wsm->total_ws) > cpus) {
10538c2ecf20Sopenharmony_ci		DEFINE_WAIT(wait);
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_ci		spin_unlock(&wsm->ws_lock);
10568c2ecf20Sopenharmony_ci		prepare_to_wait(&wsm->ws_wait, &wait, TASK_UNINTERRUPTIBLE);
10578c2ecf20Sopenharmony_ci		if (atomic_read(&wsm->total_ws) > cpus && !wsm->free_ws)
10588c2ecf20Sopenharmony_ci			schedule();
10598c2ecf20Sopenharmony_ci		finish_wait(&wsm->ws_wait, &wait);
10608c2ecf20Sopenharmony_ci		goto again;
10618c2ecf20Sopenharmony_ci	}
10628c2ecf20Sopenharmony_ci	atomic_inc(&wsm->total_ws);
10638c2ecf20Sopenharmony_ci	spin_unlock(&wsm->ws_lock);
10648c2ecf20Sopenharmony_ci
10658c2ecf20Sopenharmony_ci	work = alloc_workspace(c);
10668c2ecf20Sopenharmony_ci	if (!work) {
10678c2ecf20Sopenharmony_ci		atomic_dec(&wsm->total_ws);
10688c2ecf20Sopenharmony_ci		wake_up(&wsm->ws_wait);
10698c2ecf20Sopenharmony_ci
10708c2ecf20Sopenharmony_ci		/*
10718c2ecf20Sopenharmony_ci		 * Do not return the error but go back to waiting.  We
10728c2ecf20Sopenharmony_ci		 * have the inital workspace and the CRUSH computation
10738c2ecf20Sopenharmony_ci		 * time is bounded so we will get it eventually.
10748c2ecf20Sopenharmony_ci		 */
10758c2ecf20Sopenharmony_ci		WARN_ON(atomic_read(&wsm->total_ws) < 1);
10768c2ecf20Sopenharmony_ci		goto again;
10778c2ecf20Sopenharmony_ci	}
10788c2ecf20Sopenharmony_ci	return work;
10798c2ecf20Sopenharmony_ci}
10808c2ecf20Sopenharmony_ci
10818c2ecf20Sopenharmony_ci/*
10828c2ecf20Sopenharmony_ci * Puts a workspace back on the list or frees it if we have enough
10838c2ecf20Sopenharmony_ci * idle ones sitting around.
10848c2ecf20Sopenharmony_ci */
10858c2ecf20Sopenharmony_cistatic void put_workspace(struct workspace_manager *wsm,
10868c2ecf20Sopenharmony_ci			  struct crush_work *work)
10878c2ecf20Sopenharmony_ci{
10888c2ecf20Sopenharmony_ci	spin_lock(&wsm->ws_lock);
10898c2ecf20Sopenharmony_ci	if (wsm->free_ws <= num_online_cpus()) {
10908c2ecf20Sopenharmony_ci		list_add(&work->item, &wsm->idle_ws);
10918c2ecf20Sopenharmony_ci		wsm->free_ws++;
10928c2ecf20Sopenharmony_ci		spin_unlock(&wsm->ws_lock);
10938c2ecf20Sopenharmony_ci		goto wake;
10948c2ecf20Sopenharmony_ci	}
10958c2ecf20Sopenharmony_ci	spin_unlock(&wsm->ws_lock);
10968c2ecf20Sopenharmony_ci
10978c2ecf20Sopenharmony_ci	free_workspace(work);
10988c2ecf20Sopenharmony_ci	atomic_dec(&wsm->total_ws);
10998c2ecf20Sopenharmony_ciwake:
11008c2ecf20Sopenharmony_ci	if (wq_has_sleeper(&wsm->ws_wait))
11018c2ecf20Sopenharmony_ci		wake_up(&wsm->ws_wait);
11028c2ecf20Sopenharmony_ci}
11038c2ecf20Sopenharmony_ci
11048c2ecf20Sopenharmony_ci/*
11058c2ecf20Sopenharmony_ci * osd map
11068c2ecf20Sopenharmony_ci */
11078c2ecf20Sopenharmony_cistruct ceph_osdmap *ceph_osdmap_alloc(void)
11088c2ecf20Sopenharmony_ci{
11098c2ecf20Sopenharmony_ci	struct ceph_osdmap *map;
11108c2ecf20Sopenharmony_ci
11118c2ecf20Sopenharmony_ci	map = kzalloc(sizeof(*map), GFP_NOIO);
11128c2ecf20Sopenharmony_ci	if (!map)
11138c2ecf20Sopenharmony_ci		return NULL;
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_ci	map->pg_pools = RB_ROOT;
11168c2ecf20Sopenharmony_ci	map->pool_max = -1;
11178c2ecf20Sopenharmony_ci	map->pg_temp = RB_ROOT;
11188c2ecf20Sopenharmony_ci	map->primary_temp = RB_ROOT;
11198c2ecf20Sopenharmony_ci	map->pg_upmap = RB_ROOT;
11208c2ecf20Sopenharmony_ci	map->pg_upmap_items = RB_ROOT;
11218c2ecf20Sopenharmony_ci
11228c2ecf20Sopenharmony_ci	init_workspace_manager(&map->crush_wsm);
11238c2ecf20Sopenharmony_ci
11248c2ecf20Sopenharmony_ci	return map;
11258c2ecf20Sopenharmony_ci}
11268c2ecf20Sopenharmony_ci
11278c2ecf20Sopenharmony_civoid ceph_osdmap_destroy(struct ceph_osdmap *map)
11288c2ecf20Sopenharmony_ci{
11298c2ecf20Sopenharmony_ci	dout("osdmap_destroy %p\n", map);
11308c2ecf20Sopenharmony_ci
11318c2ecf20Sopenharmony_ci	if (map->crush)
11328c2ecf20Sopenharmony_ci		crush_destroy(map->crush);
11338c2ecf20Sopenharmony_ci	cleanup_workspace_manager(&map->crush_wsm);
11348c2ecf20Sopenharmony_ci
11358c2ecf20Sopenharmony_ci	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
11368c2ecf20Sopenharmony_ci		struct ceph_pg_mapping *pg =
11378c2ecf20Sopenharmony_ci			rb_entry(rb_first(&map->pg_temp),
11388c2ecf20Sopenharmony_ci				 struct ceph_pg_mapping, node);
11398c2ecf20Sopenharmony_ci		erase_pg_mapping(&map->pg_temp, pg);
11408c2ecf20Sopenharmony_ci		free_pg_mapping(pg);
11418c2ecf20Sopenharmony_ci	}
11428c2ecf20Sopenharmony_ci	while (!RB_EMPTY_ROOT(&map->primary_temp)) {
11438c2ecf20Sopenharmony_ci		struct ceph_pg_mapping *pg =
11448c2ecf20Sopenharmony_ci			rb_entry(rb_first(&map->primary_temp),
11458c2ecf20Sopenharmony_ci				 struct ceph_pg_mapping, node);
11468c2ecf20Sopenharmony_ci		erase_pg_mapping(&map->primary_temp, pg);
11478c2ecf20Sopenharmony_ci		free_pg_mapping(pg);
11488c2ecf20Sopenharmony_ci	}
11498c2ecf20Sopenharmony_ci	while (!RB_EMPTY_ROOT(&map->pg_upmap)) {
11508c2ecf20Sopenharmony_ci		struct ceph_pg_mapping *pg =
11518c2ecf20Sopenharmony_ci			rb_entry(rb_first(&map->pg_upmap),
11528c2ecf20Sopenharmony_ci				 struct ceph_pg_mapping, node);
11538c2ecf20Sopenharmony_ci		rb_erase(&pg->node, &map->pg_upmap);
11548c2ecf20Sopenharmony_ci		kfree(pg);
11558c2ecf20Sopenharmony_ci	}
11568c2ecf20Sopenharmony_ci	while (!RB_EMPTY_ROOT(&map->pg_upmap_items)) {
11578c2ecf20Sopenharmony_ci		struct ceph_pg_mapping *pg =
11588c2ecf20Sopenharmony_ci			rb_entry(rb_first(&map->pg_upmap_items),
11598c2ecf20Sopenharmony_ci				 struct ceph_pg_mapping, node);
11608c2ecf20Sopenharmony_ci		rb_erase(&pg->node, &map->pg_upmap_items);
11618c2ecf20Sopenharmony_ci		kfree(pg);
11628c2ecf20Sopenharmony_ci	}
11638c2ecf20Sopenharmony_ci	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
11648c2ecf20Sopenharmony_ci		struct ceph_pg_pool_info *pi =
11658c2ecf20Sopenharmony_ci			rb_entry(rb_first(&map->pg_pools),
11668c2ecf20Sopenharmony_ci				 struct ceph_pg_pool_info, node);
11678c2ecf20Sopenharmony_ci		__remove_pg_pool(&map->pg_pools, pi);
11688c2ecf20Sopenharmony_ci	}
11698c2ecf20Sopenharmony_ci	kvfree(map->osd_state);
11708c2ecf20Sopenharmony_ci	kvfree(map->osd_weight);
11718c2ecf20Sopenharmony_ci	kvfree(map->osd_addr);
11728c2ecf20Sopenharmony_ci	kvfree(map->osd_primary_affinity);
11738c2ecf20Sopenharmony_ci	kfree(map);
11748c2ecf20Sopenharmony_ci}
11758c2ecf20Sopenharmony_ci
11768c2ecf20Sopenharmony_ci/*
11778c2ecf20Sopenharmony_ci * Adjust max_osd value, (re)allocate arrays.
11788c2ecf20Sopenharmony_ci *
11798c2ecf20Sopenharmony_ci * The new elements are properly initialized.
11808c2ecf20Sopenharmony_ci */
11818c2ecf20Sopenharmony_cistatic int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
11828c2ecf20Sopenharmony_ci{
11838c2ecf20Sopenharmony_ci	u32 *state;
11848c2ecf20Sopenharmony_ci	u32 *weight;
11858c2ecf20Sopenharmony_ci	struct ceph_entity_addr *addr;
11868c2ecf20Sopenharmony_ci	u32 to_copy;
11878c2ecf20Sopenharmony_ci	int i;
11888c2ecf20Sopenharmony_ci
11898c2ecf20Sopenharmony_ci	dout("%s old %u new %u\n", __func__, map->max_osd, max);
11908c2ecf20Sopenharmony_ci	if (max == map->max_osd)
11918c2ecf20Sopenharmony_ci		return 0;
11928c2ecf20Sopenharmony_ci
11938c2ecf20Sopenharmony_ci	state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
11948c2ecf20Sopenharmony_ci	weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
11958c2ecf20Sopenharmony_ci	addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
11968c2ecf20Sopenharmony_ci	if (!state || !weight || !addr) {
11978c2ecf20Sopenharmony_ci		kvfree(state);
11988c2ecf20Sopenharmony_ci		kvfree(weight);
11998c2ecf20Sopenharmony_ci		kvfree(addr);
12008c2ecf20Sopenharmony_ci		return -ENOMEM;
12018c2ecf20Sopenharmony_ci	}
12028c2ecf20Sopenharmony_ci
12038c2ecf20Sopenharmony_ci	to_copy = min(map->max_osd, max);
12048c2ecf20Sopenharmony_ci	if (map->osd_state) {
12058c2ecf20Sopenharmony_ci		memcpy(state, map->osd_state, to_copy * sizeof(*state));
12068c2ecf20Sopenharmony_ci		memcpy(weight, map->osd_weight, to_copy * sizeof(*weight));
12078c2ecf20Sopenharmony_ci		memcpy(addr, map->osd_addr, to_copy * sizeof(*addr));
12088c2ecf20Sopenharmony_ci		kvfree(map->osd_state);
12098c2ecf20Sopenharmony_ci		kvfree(map->osd_weight);
12108c2ecf20Sopenharmony_ci		kvfree(map->osd_addr);
12118c2ecf20Sopenharmony_ci	}
12128c2ecf20Sopenharmony_ci
12138c2ecf20Sopenharmony_ci	map->osd_state = state;
12148c2ecf20Sopenharmony_ci	map->osd_weight = weight;
12158c2ecf20Sopenharmony_ci	map->osd_addr = addr;
12168c2ecf20Sopenharmony_ci	for (i = map->max_osd; i < max; i++) {
12178c2ecf20Sopenharmony_ci		map->osd_state[i] = 0;
12188c2ecf20Sopenharmony_ci		map->osd_weight[i] = CEPH_OSD_OUT;
12198c2ecf20Sopenharmony_ci		memset(map->osd_addr + i, 0, sizeof(*map->osd_addr));
12208c2ecf20Sopenharmony_ci	}
12218c2ecf20Sopenharmony_ci
12228c2ecf20Sopenharmony_ci	if (map->osd_primary_affinity) {
12238c2ecf20Sopenharmony_ci		u32 *affinity;
12248c2ecf20Sopenharmony_ci
12258c2ecf20Sopenharmony_ci		affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
12268c2ecf20Sopenharmony_ci					 GFP_NOFS);
12278c2ecf20Sopenharmony_ci		if (!affinity)
12288c2ecf20Sopenharmony_ci			return -ENOMEM;
12298c2ecf20Sopenharmony_ci
12308c2ecf20Sopenharmony_ci		memcpy(affinity, map->osd_primary_affinity,
12318c2ecf20Sopenharmony_ci		       to_copy * sizeof(*affinity));
12328c2ecf20Sopenharmony_ci		kvfree(map->osd_primary_affinity);
12338c2ecf20Sopenharmony_ci
12348c2ecf20Sopenharmony_ci		map->osd_primary_affinity = affinity;
12358c2ecf20Sopenharmony_ci		for (i = map->max_osd; i < max; i++)
12368c2ecf20Sopenharmony_ci			map->osd_primary_affinity[i] =
12378c2ecf20Sopenharmony_ci			    CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
12388c2ecf20Sopenharmony_ci	}
12398c2ecf20Sopenharmony_ci
12408c2ecf20Sopenharmony_ci	map->max_osd = max;
12418c2ecf20Sopenharmony_ci
12428c2ecf20Sopenharmony_ci	return 0;
12438c2ecf20Sopenharmony_ci}
12448c2ecf20Sopenharmony_ci
12458c2ecf20Sopenharmony_cistatic int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
12468c2ecf20Sopenharmony_ci{
12478c2ecf20Sopenharmony_ci	struct crush_work *work;
12488c2ecf20Sopenharmony_ci
12498c2ecf20Sopenharmony_ci	if (IS_ERR(crush))
12508c2ecf20Sopenharmony_ci		return PTR_ERR(crush);
12518c2ecf20Sopenharmony_ci
12528c2ecf20Sopenharmony_ci	work = alloc_workspace(crush);
12538c2ecf20Sopenharmony_ci	if (!work) {
12548c2ecf20Sopenharmony_ci		crush_destroy(crush);
12558c2ecf20Sopenharmony_ci		return -ENOMEM;
12568c2ecf20Sopenharmony_ci	}
12578c2ecf20Sopenharmony_ci
12588c2ecf20Sopenharmony_ci	if (map->crush)
12598c2ecf20Sopenharmony_ci		crush_destroy(map->crush);
12608c2ecf20Sopenharmony_ci	cleanup_workspace_manager(&map->crush_wsm);
12618c2ecf20Sopenharmony_ci	map->crush = crush;
12628c2ecf20Sopenharmony_ci	add_initial_workspace(&map->crush_wsm, work);
12638c2ecf20Sopenharmony_ci	return 0;
12648c2ecf20Sopenharmony_ci}
12658c2ecf20Sopenharmony_ci
12668c2ecf20Sopenharmony_ci#define OSDMAP_WRAPPER_COMPAT_VER	7
12678c2ecf20Sopenharmony_ci#define OSDMAP_CLIENT_DATA_COMPAT_VER	1
12688c2ecf20Sopenharmony_ci
12698c2ecf20Sopenharmony_ci/*
12708c2ecf20Sopenharmony_ci * Return 0 or error.  On success, *v is set to 0 for old (v6) osdmaps,
12718c2ecf20Sopenharmony_ci * to struct_v of the client_data section for new (v7 and above)
12728c2ecf20Sopenharmony_ci * osdmaps.
12738c2ecf20Sopenharmony_ci */
12748c2ecf20Sopenharmony_cistatic int get_osdmap_client_data_v(void **p, void *end,
12758c2ecf20Sopenharmony_ci				    const char *prefix, u8 *v)
12768c2ecf20Sopenharmony_ci{
12778c2ecf20Sopenharmony_ci	u8 struct_v;
12788c2ecf20Sopenharmony_ci
12798c2ecf20Sopenharmony_ci	ceph_decode_8_safe(p, end, struct_v, e_inval);
12808c2ecf20Sopenharmony_ci	if (struct_v >= 7) {
12818c2ecf20Sopenharmony_ci		u8 struct_compat;
12828c2ecf20Sopenharmony_ci
12838c2ecf20Sopenharmony_ci		ceph_decode_8_safe(p, end, struct_compat, e_inval);
12848c2ecf20Sopenharmony_ci		if (struct_compat > OSDMAP_WRAPPER_COMPAT_VER) {
12858c2ecf20Sopenharmony_ci			pr_warn("got v %d cv %d > %d of %s ceph_osdmap\n",
12868c2ecf20Sopenharmony_ci				struct_v, struct_compat,
12878c2ecf20Sopenharmony_ci				OSDMAP_WRAPPER_COMPAT_VER, prefix);
12888c2ecf20Sopenharmony_ci			return -EINVAL;
12898c2ecf20Sopenharmony_ci		}
12908c2ecf20Sopenharmony_ci		*p += 4; /* ignore wrapper struct_len */
12918c2ecf20Sopenharmony_ci
12928c2ecf20Sopenharmony_ci		ceph_decode_8_safe(p, end, struct_v, e_inval);
12938c2ecf20Sopenharmony_ci		ceph_decode_8_safe(p, end, struct_compat, e_inval);
12948c2ecf20Sopenharmony_ci		if (struct_compat > OSDMAP_CLIENT_DATA_COMPAT_VER) {
12958c2ecf20Sopenharmony_ci			pr_warn("got v %d cv %d > %d of %s ceph_osdmap client data\n",
12968c2ecf20Sopenharmony_ci				struct_v, struct_compat,
12978c2ecf20Sopenharmony_ci				OSDMAP_CLIENT_DATA_COMPAT_VER, prefix);
12988c2ecf20Sopenharmony_ci			return -EINVAL;
12998c2ecf20Sopenharmony_ci		}
13008c2ecf20Sopenharmony_ci		*p += 4; /* ignore client data struct_len */
13018c2ecf20Sopenharmony_ci	} else {
13028c2ecf20Sopenharmony_ci		u16 version;
13038c2ecf20Sopenharmony_ci
13048c2ecf20Sopenharmony_ci		*p -= 1;
13058c2ecf20Sopenharmony_ci		ceph_decode_16_safe(p, end, version, e_inval);
13068c2ecf20Sopenharmony_ci		if (version < 6) {
13078c2ecf20Sopenharmony_ci			pr_warn("got v %d < 6 of %s ceph_osdmap\n",
13088c2ecf20Sopenharmony_ci				version, prefix);
13098c2ecf20Sopenharmony_ci			return -EINVAL;
13108c2ecf20Sopenharmony_ci		}
13118c2ecf20Sopenharmony_ci
13128c2ecf20Sopenharmony_ci		/* old osdmap enconding */
13138c2ecf20Sopenharmony_ci		struct_v = 0;
13148c2ecf20Sopenharmony_ci	}
13158c2ecf20Sopenharmony_ci
13168c2ecf20Sopenharmony_ci	*v = struct_v;
13178c2ecf20Sopenharmony_ci	return 0;
13188c2ecf20Sopenharmony_ci
13198c2ecf20Sopenharmony_cie_inval:
13208c2ecf20Sopenharmony_ci	return -EINVAL;
13218c2ecf20Sopenharmony_ci}
13228c2ecf20Sopenharmony_ci
13238c2ecf20Sopenharmony_cistatic int __decode_pools(void **p, void *end, struct ceph_osdmap *map,
13248c2ecf20Sopenharmony_ci			  bool incremental)
13258c2ecf20Sopenharmony_ci{
13268c2ecf20Sopenharmony_ci	u32 n;
13278c2ecf20Sopenharmony_ci
13288c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, n, e_inval);
13298c2ecf20Sopenharmony_ci	while (n--) {
13308c2ecf20Sopenharmony_ci		struct ceph_pg_pool_info *pi;
13318c2ecf20Sopenharmony_ci		u64 pool;
13328c2ecf20Sopenharmony_ci		int ret;
13338c2ecf20Sopenharmony_ci
13348c2ecf20Sopenharmony_ci		ceph_decode_64_safe(p, end, pool, e_inval);
13358c2ecf20Sopenharmony_ci
13368c2ecf20Sopenharmony_ci		pi = lookup_pg_pool(&map->pg_pools, pool);
13378c2ecf20Sopenharmony_ci		if (!incremental || !pi) {
13388c2ecf20Sopenharmony_ci			pi = kzalloc(sizeof(*pi), GFP_NOFS);
13398c2ecf20Sopenharmony_ci			if (!pi)
13408c2ecf20Sopenharmony_ci				return -ENOMEM;
13418c2ecf20Sopenharmony_ci
13428c2ecf20Sopenharmony_ci			RB_CLEAR_NODE(&pi->node);
13438c2ecf20Sopenharmony_ci			pi->id = pool;
13448c2ecf20Sopenharmony_ci
13458c2ecf20Sopenharmony_ci			if (!__insert_pg_pool(&map->pg_pools, pi)) {
13468c2ecf20Sopenharmony_ci				kfree(pi);
13478c2ecf20Sopenharmony_ci				return -EEXIST;
13488c2ecf20Sopenharmony_ci			}
13498c2ecf20Sopenharmony_ci		}
13508c2ecf20Sopenharmony_ci
13518c2ecf20Sopenharmony_ci		ret = decode_pool(p, end, pi);
13528c2ecf20Sopenharmony_ci		if (ret)
13538c2ecf20Sopenharmony_ci			return ret;
13548c2ecf20Sopenharmony_ci	}
13558c2ecf20Sopenharmony_ci
13568c2ecf20Sopenharmony_ci	return 0;
13578c2ecf20Sopenharmony_ci
13588c2ecf20Sopenharmony_cie_inval:
13598c2ecf20Sopenharmony_ci	return -EINVAL;
13608c2ecf20Sopenharmony_ci}
13618c2ecf20Sopenharmony_ci
13628c2ecf20Sopenharmony_cistatic int decode_pools(void **p, void *end, struct ceph_osdmap *map)
13638c2ecf20Sopenharmony_ci{
13648c2ecf20Sopenharmony_ci	return __decode_pools(p, end, map, false);
13658c2ecf20Sopenharmony_ci}
13668c2ecf20Sopenharmony_ci
13678c2ecf20Sopenharmony_cistatic int decode_new_pools(void **p, void *end, struct ceph_osdmap *map)
13688c2ecf20Sopenharmony_ci{
13698c2ecf20Sopenharmony_ci	return __decode_pools(p, end, map, true);
13708c2ecf20Sopenharmony_ci}
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_citypedef struct ceph_pg_mapping *(*decode_mapping_fn_t)(void **, void *, bool);
13738c2ecf20Sopenharmony_ci
13748c2ecf20Sopenharmony_cistatic int decode_pg_mapping(void **p, void *end, struct rb_root *mapping_root,
13758c2ecf20Sopenharmony_ci			     decode_mapping_fn_t fn, bool incremental)
13768c2ecf20Sopenharmony_ci{
13778c2ecf20Sopenharmony_ci	u32 n;
13788c2ecf20Sopenharmony_ci
13798c2ecf20Sopenharmony_ci	WARN_ON(!incremental && !fn);
13808c2ecf20Sopenharmony_ci
13818c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, n, e_inval);
13828c2ecf20Sopenharmony_ci	while (n--) {
13838c2ecf20Sopenharmony_ci		struct ceph_pg_mapping *pg;
13848c2ecf20Sopenharmony_ci		struct ceph_pg pgid;
13858c2ecf20Sopenharmony_ci		int ret;
13868c2ecf20Sopenharmony_ci
13878c2ecf20Sopenharmony_ci		ret = ceph_decode_pgid(p, end, &pgid);
13888c2ecf20Sopenharmony_ci		if (ret)
13898c2ecf20Sopenharmony_ci			return ret;
13908c2ecf20Sopenharmony_ci
13918c2ecf20Sopenharmony_ci		pg = lookup_pg_mapping(mapping_root, &pgid);
13928c2ecf20Sopenharmony_ci		if (pg) {
13938c2ecf20Sopenharmony_ci			WARN_ON(!incremental);
13948c2ecf20Sopenharmony_ci			erase_pg_mapping(mapping_root, pg);
13958c2ecf20Sopenharmony_ci			free_pg_mapping(pg);
13968c2ecf20Sopenharmony_ci		}
13978c2ecf20Sopenharmony_ci
13988c2ecf20Sopenharmony_ci		if (fn) {
13998c2ecf20Sopenharmony_ci			pg = fn(p, end, incremental);
14008c2ecf20Sopenharmony_ci			if (IS_ERR(pg))
14018c2ecf20Sopenharmony_ci				return PTR_ERR(pg);
14028c2ecf20Sopenharmony_ci
14038c2ecf20Sopenharmony_ci			if (pg) {
14048c2ecf20Sopenharmony_ci				pg->pgid = pgid; /* struct */
14058c2ecf20Sopenharmony_ci				insert_pg_mapping(mapping_root, pg);
14068c2ecf20Sopenharmony_ci			}
14078c2ecf20Sopenharmony_ci		}
14088c2ecf20Sopenharmony_ci	}
14098c2ecf20Sopenharmony_ci
14108c2ecf20Sopenharmony_ci	return 0;
14118c2ecf20Sopenharmony_ci
14128c2ecf20Sopenharmony_cie_inval:
14138c2ecf20Sopenharmony_ci	return -EINVAL;
14148c2ecf20Sopenharmony_ci}
14158c2ecf20Sopenharmony_ci
14168c2ecf20Sopenharmony_cistatic struct ceph_pg_mapping *__decode_pg_temp(void **p, void *end,
14178c2ecf20Sopenharmony_ci						bool incremental)
14188c2ecf20Sopenharmony_ci{
14198c2ecf20Sopenharmony_ci	struct ceph_pg_mapping *pg;
14208c2ecf20Sopenharmony_ci	u32 len, i;
14218c2ecf20Sopenharmony_ci
14228c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
14238c2ecf20Sopenharmony_ci	if (len == 0 && incremental)
14248c2ecf20Sopenharmony_ci		return NULL;	/* new_pg_temp: [] to remove */
14258c2ecf20Sopenharmony_ci	if (len > (SIZE_MAX - sizeof(*pg)) / sizeof(u32))
14268c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
14278c2ecf20Sopenharmony_ci
14288c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, len * sizeof(u32), e_inval);
14298c2ecf20Sopenharmony_ci	pg = alloc_pg_mapping(len * sizeof(u32));
14308c2ecf20Sopenharmony_ci	if (!pg)
14318c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
14328c2ecf20Sopenharmony_ci
14338c2ecf20Sopenharmony_ci	pg->pg_temp.len = len;
14348c2ecf20Sopenharmony_ci	for (i = 0; i < len; i++)
14358c2ecf20Sopenharmony_ci		pg->pg_temp.osds[i] = ceph_decode_32(p);
14368c2ecf20Sopenharmony_ci
14378c2ecf20Sopenharmony_ci	return pg;
14388c2ecf20Sopenharmony_ci
14398c2ecf20Sopenharmony_cie_inval:
14408c2ecf20Sopenharmony_ci	return ERR_PTR(-EINVAL);
14418c2ecf20Sopenharmony_ci}
14428c2ecf20Sopenharmony_ci
14438c2ecf20Sopenharmony_cistatic int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map)
14448c2ecf20Sopenharmony_ci{
14458c2ecf20Sopenharmony_ci	return decode_pg_mapping(p, end, &map->pg_temp, __decode_pg_temp,
14468c2ecf20Sopenharmony_ci				 false);
14478c2ecf20Sopenharmony_ci}
14488c2ecf20Sopenharmony_ci
14498c2ecf20Sopenharmony_cistatic int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map)
14508c2ecf20Sopenharmony_ci{
14518c2ecf20Sopenharmony_ci	return decode_pg_mapping(p, end, &map->pg_temp, __decode_pg_temp,
14528c2ecf20Sopenharmony_ci				 true);
14538c2ecf20Sopenharmony_ci}
14548c2ecf20Sopenharmony_ci
14558c2ecf20Sopenharmony_cistatic struct ceph_pg_mapping *__decode_primary_temp(void **p, void *end,
14568c2ecf20Sopenharmony_ci						     bool incremental)
14578c2ecf20Sopenharmony_ci{
14588c2ecf20Sopenharmony_ci	struct ceph_pg_mapping *pg;
14598c2ecf20Sopenharmony_ci	u32 osd;
14608c2ecf20Sopenharmony_ci
14618c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, osd, e_inval);
14628c2ecf20Sopenharmony_ci	if (osd == (u32)-1 && incremental)
14638c2ecf20Sopenharmony_ci		return NULL;	/* new_primary_temp: -1 to remove */
14648c2ecf20Sopenharmony_ci
14658c2ecf20Sopenharmony_ci	pg = alloc_pg_mapping(0);
14668c2ecf20Sopenharmony_ci	if (!pg)
14678c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
14688c2ecf20Sopenharmony_ci
14698c2ecf20Sopenharmony_ci	pg->primary_temp.osd = osd;
14708c2ecf20Sopenharmony_ci	return pg;
14718c2ecf20Sopenharmony_ci
14728c2ecf20Sopenharmony_cie_inval:
14738c2ecf20Sopenharmony_ci	return ERR_PTR(-EINVAL);
14748c2ecf20Sopenharmony_ci}
14758c2ecf20Sopenharmony_ci
14768c2ecf20Sopenharmony_cistatic int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map)
14778c2ecf20Sopenharmony_ci{
14788c2ecf20Sopenharmony_ci	return decode_pg_mapping(p, end, &map->primary_temp,
14798c2ecf20Sopenharmony_ci				 __decode_primary_temp, false);
14808c2ecf20Sopenharmony_ci}
14818c2ecf20Sopenharmony_ci
14828c2ecf20Sopenharmony_cistatic int decode_new_primary_temp(void **p, void *end,
14838c2ecf20Sopenharmony_ci				   struct ceph_osdmap *map)
14848c2ecf20Sopenharmony_ci{
14858c2ecf20Sopenharmony_ci	return decode_pg_mapping(p, end, &map->primary_temp,
14868c2ecf20Sopenharmony_ci				 __decode_primary_temp, true);
14878c2ecf20Sopenharmony_ci}
14888c2ecf20Sopenharmony_ci
14898c2ecf20Sopenharmony_ciu32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd)
14908c2ecf20Sopenharmony_ci{
14918c2ecf20Sopenharmony_ci	BUG_ON(osd >= map->max_osd);
14928c2ecf20Sopenharmony_ci
14938c2ecf20Sopenharmony_ci	if (!map->osd_primary_affinity)
14948c2ecf20Sopenharmony_ci		return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
14958c2ecf20Sopenharmony_ci
14968c2ecf20Sopenharmony_ci	return map->osd_primary_affinity[osd];
14978c2ecf20Sopenharmony_ci}
14988c2ecf20Sopenharmony_ci
14998c2ecf20Sopenharmony_cistatic int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
15008c2ecf20Sopenharmony_ci{
15018c2ecf20Sopenharmony_ci	BUG_ON(osd >= map->max_osd);
15028c2ecf20Sopenharmony_ci
15038c2ecf20Sopenharmony_ci	if (!map->osd_primary_affinity) {
15048c2ecf20Sopenharmony_ci		int i;
15058c2ecf20Sopenharmony_ci
15068c2ecf20Sopenharmony_ci		map->osd_primary_affinity = ceph_kvmalloc(
15078c2ecf20Sopenharmony_ci		    array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
15088c2ecf20Sopenharmony_ci		    GFP_NOFS);
15098c2ecf20Sopenharmony_ci		if (!map->osd_primary_affinity)
15108c2ecf20Sopenharmony_ci			return -ENOMEM;
15118c2ecf20Sopenharmony_ci
15128c2ecf20Sopenharmony_ci		for (i = 0; i < map->max_osd; i++)
15138c2ecf20Sopenharmony_ci			map->osd_primary_affinity[i] =
15148c2ecf20Sopenharmony_ci			    CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
15158c2ecf20Sopenharmony_ci	}
15168c2ecf20Sopenharmony_ci
15178c2ecf20Sopenharmony_ci	map->osd_primary_affinity[osd] = aff;
15188c2ecf20Sopenharmony_ci
15198c2ecf20Sopenharmony_ci	return 0;
15208c2ecf20Sopenharmony_ci}
15218c2ecf20Sopenharmony_ci
15228c2ecf20Sopenharmony_cistatic int decode_primary_affinity(void **p, void *end,
15238c2ecf20Sopenharmony_ci				   struct ceph_osdmap *map)
15248c2ecf20Sopenharmony_ci{
15258c2ecf20Sopenharmony_ci	u32 len, i;
15268c2ecf20Sopenharmony_ci
15278c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
15288c2ecf20Sopenharmony_ci	if (len == 0) {
15298c2ecf20Sopenharmony_ci		kvfree(map->osd_primary_affinity);
15308c2ecf20Sopenharmony_ci		map->osd_primary_affinity = NULL;
15318c2ecf20Sopenharmony_ci		return 0;
15328c2ecf20Sopenharmony_ci	}
15338c2ecf20Sopenharmony_ci	if (len != map->max_osd)
15348c2ecf20Sopenharmony_ci		goto e_inval;
15358c2ecf20Sopenharmony_ci
15368c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, map->max_osd*sizeof(u32), e_inval);
15378c2ecf20Sopenharmony_ci
15388c2ecf20Sopenharmony_ci	for (i = 0; i < map->max_osd; i++) {
15398c2ecf20Sopenharmony_ci		int ret;
15408c2ecf20Sopenharmony_ci
15418c2ecf20Sopenharmony_ci		ret = set_primary_affinity(map, i, ceph_decode_32(p));
15428c2ecf20Sopenharmony_ci		if (ret)
15438c2ecf20Sopenharmony_ci			return ret;
15448c2ecf20Sopenharmony_ci	}
15458c2ecf20Sopenharmony_ci
15468c2ecf20Sopenharmony_ci	return 0;
15478c2ecf20Sopenharmony_ci
15488c2ecf20Sopenharmony_cie_inval:
15498c2ecf20Sopenharmony_ci	return -EINVAL;
15508c2ecf20Sopenharmony_ci}
15518c2ecf20Sopenharmony_ci
15528c2ecf20Sopenharmony_cistatic int decode_new_primary_affinity(void **p, void *end,
15538c2ecf20Sopenharmony_ci				       struct ceph_osdmap *map)
15548c2ecf20Sopenharmony_ci{
15558c2ecf20Sopenharmony_ci	u32 n;
15568c2ecf20Sopenharmony_ci
15578c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, n, e_inval);
15588c2ecf20Sopenharmony_ci	while (n--) {
15598c2ecf20Sopenharmony_ci		u32 osd, aff;
15608c2ecf20Sopenharmony_ci		int ret;
15618c2ecf20Sopenharmony_ci
15628c2ecf20Sopenharmony_ci		ceph_decode_32_safe(p, end, osd, e_inval);
15638c2ecf20Sopenharmony_ci		ceph_decode_32_safe(p, end, aff, e_inval);
15648c2ecf20Sopenharmony_ci
15658c2ecf20Sopenharmony_ci		ret = set_primary_affinity(map, osd, aff);
15668c2ecf20Sopenharmony_ci		if (ret)
15678c2ecf20Sopenharmony_ci			return ret;
15688c2ecf20Sopenharmony_ci
15698c2ecf20Sopenharmony_ci		pr_info("osd%d primary-affinity 0x%x\n", osd, aff);
15708c2ecf20Sopenharmony_ci	}
15718c2ecf20Sopenharmony_ci
15728c2ecf20Sopenharmony_ci	return 0;
15738c2ecf20Sopenharmony_ci
15748c2ecf20Sopenharmony_cie_inval:
15758c2ecf20Sopenharmony_ci	return -EINVAL;
15768c2ecf20Sopenharmony_ci}
15778c2ecf20Sopenharmony_ci
15788c2ecf20Sopenharmony_cistatic struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end,
15798c2ecf20Sopenharmony_ci						 bool __unused)
15808c2ecf20Sopenharmony_ci{
15818c2ecf20Sopenharmony_ci	return __decode_pg_temp(p, end, false);
15828c2ecf20Sopenharmony_ci}
15838c2ecf20Sopenharmony_ci
15848c2ecf20Sopenharmony_cistatic int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
15858c2ecf20Sopenharmony_ci{
15868c2ecf20Sopenharmony_ci	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
15878c2ecf20Sopenharmony_ci				 false);
15888c2ecf20Sopenharmony_ci}
15898c2ecf20Sopenharmony_ci
15908c2ecf20Sopenharmony_cistatic int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
15918c2ecf20Sopenharmony_ci{
15928c2ecf20Sopenharmony_ci	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
15938c2ecf20Sopenharmony_ci				 true);
15948c2ecf20Sopenharmony_ci}
15958c2ecf20Sopenharmony_ci
15968c2ecf20Sopenharmony_cistatic int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
15978c2ecf20Sopenharmony_ci{
15988c2ecf20Sopenharmony_ci	return decode_pg_mapping(p, end, &map->pg_upmap, NULL, true);
15998c2ecf20Sopenharmony_ci}
16008c2ecf20Sopenharmony_ci
16018c2ecf20Sopenharmony_cistatic struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
16028c2ecf20Sopenharmony_ci						       bool __unused)
16038c2ecf20Sopenharmony_ci{
16048c2ecf20Sopenharmony_ci	struct ceph_pg_mapping *pg;
16058c2ecf20Sopenharmony_ci	u32 len, i;
16068c2ecf20Sopenharmony_ci
16078c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
16088c2ecf20Sopenharmony_ci	if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32)))
16098c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
16108c2ecf20Sopenharmony_ci
16118c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval);
16128c2ecf20Sopenharmony_ci	pg = alloc_pg_mapping(2 * len * sizeof(u32));
16138c2ecf20Sopenharmony_ci	if (!pg)
16148c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
16158c2ecf20Sopenharmony_ci
16168c2ecf20Sopenharmony_ci	pg->pg_upmap_items.len = len;
16178c2ecf20Sopenharmony_ci	for (i = 0; i < len; i++) {
16188c2ecf20Sopenharmony_ci		pg->pg_upmap_items.from_to[i][0] = ceph_decode_32(p);
16198c2ecf20Sopenharmony_ci		pg->pg_upmap_items.from_to[i][1] = ceph_decode_32(p);
16208c2ecf20Sopenharmony_ci	}
16218c2ecf20Sopenharmony_ci
16228c2ecf20Sopenharmony_ci	return pg;
16238c2ecf20Sopenharmony_ci
16248c2ecf20Sopenharmony_cie_inval:
16258c2ecf20Sopenharmony_ci	return ERR_PTR(-EINVAL);
16268c2ecf20Sopenharmony_ci}
16278c2ecf20Sopenharmony_ci
16288c2ecf20Sopenharmony_cistatic int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map)
16298c2ecf20Sopenharmony_ci{
16308c2ecf20Sopenharmony_ci	return decode_pg_mapping(p, end, &map->pg_upmap_items,
16318c2ecf20Sopenharmony_ci				 __decode_pg_upmap_items, false);
16328c2ecf20Sopenharmony_ci}
16338c2ecf20Sopenharmony_ci
16348c2ecf20Sopenharmony_cistatic int decode_new_pg_upmap_items(void **p, void *end,
16358c2ecf20Sopenharmony_ci				     struct ceph_osdmap *map)
16368c2ecf20Sopenharmony_ci{
16378c2ecf20Sopenharmony_ci	return decode_pg_mapping(p, end, &map->pg_upmap_items,
16388c2ecf20Sopenharmony_ci				 __decode_pg_upmap_items, true);
16398c2ecf20Sopenharmony_ci}
16408c2ecf20Sopenharmony_ci
16418c2ecf20Sopenharmony_cistatic int decode_old_pg_upmap_items(void **p, void *end,
16428c2ecf20Sopenharmony_ci				     struct ceph_osdmap *map)
16438c2ecf20Sopenharmony_ci{
16448c2ecf20Sopenharmony_ci	return decode_pg_mapping(p, end, &map->pg_upmap_items, NULL, true);
16458c2ecf20Sopenharmony_ci}
16468c2ecf20Sopenharmony_ci
16478c2ecf20Sopenharmony_ci/*
16488c2ecf20Sopenharmony_ci * decode a full map.
16498c2ecf20Sopenharmony_ci */
16508c2ecf20Sopenharmony_cistatic int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
16518c2ecf20Sopenharmony_ci{
16528c2ecf20Sopenharmony_ci	u8 struct_v;
16538c2ecf20Sopenharmony_ci	u32 epoch = 0;
16548c2ecf20Sopenharmony_ci	void *start = *p;
16558c2ecf20Sopenharmony_ci	u32 max;
16568c2ecf20Sopenharmony_ci	u32 len, i;
16578c2ecf20Sopenharmony_ci	int err;
16588c2ecf20Sopenharmony_ci
16598c2ecf20Sopenharmony_ci	dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
16608c2ecf20Sopenharmony_ci
16618c2ecf20Sopenharmony_ci	err = get_osdmap_client_data_v(p, end, "full", &struct_v);
16628c2ecf20Sopenharmony_ci	if (err)
16638c2ecf20Sopenharmony_ci		goto bad;
16648c2ecf20Sopenharmony_ci
16658c2ecf20Sopenharmony_ci	/* fsid, epoch, created, modified */
16668c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, sizeof(map->fsid) + sizeof(u32) +
16678c2ecf20Sopenharmony_ci			 sizeof(map->created) + sizeof(map->modified), e_inval);
16688c2ecf20Sopenharmony_ci	ceph_decode_copy(p, &map->fsid, sizeof(map->fsid));
16698c2ecf20Sopenharmony_ci	epoch = map->epoch = ceph_decode_32(p);
16708c2ecf20Sopenharmony_ci	ceph_decode_copy(p, &map->created, sizeof(map->created));
16718c2ecf20Sopenharmony_ci	ceph_decode_copy(p, &map->modified, sizeof(map->modified));
16728c2ecf20Sopenharmony_ci
16738c2ecf20Sopenharmony_ci	/* pools */
16748c2ecf20Sopenharmony_ci	err = decode_pools(p, end, map);
16758c2ecf20Sopenharmony_ci	if (err)
16768c2ecf20Sopenharmony_ci		goto bad;
16778c2ecf20Sopenharmony_ci
16788c2ecf20Sopenharmony_ci	/* pool_name */
16798c2ecf20Sopenharmony_ci	err = decode_pool_names(p, end, map);
16808c2ecf20Sopenharmony_ci	if (err)
16818c2ecf20Sopenharmony_ci		goto bad;
16828c2ecf20Sopenharmony_ci
16838c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, map->pool_max, e_inval);
16848c2ecf20Sopenharmony_ci
16858c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, map->flags, e_inval);
16868c2ecf20Sopenharmony_ci
16878c2ecf20Sopenharmony_ci	/* max_osd */
16888c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, max, e_inval);
16898c2ecf20Sopenharmony_ci
16908c2ecf20Sopenharmony_ci	/* (re)alloc osd arrays */
16918c2ecf20Sopenharmony_ci	err = osdmap_set_max_osd(map, max);
16928c2ecf20Sopenharmony_ci	if (err)
16938c2ecf20Sopenharmony_ci		goto bad;
16948c2ecf20Sopenharmony_ci
16958c2ecf20Sopenharmony_ci	/* osd_state, osd_weight, osd_addrs->client_addr */
16968c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, 3*sizeof(u32) +
16978c2ecf20Sopenharmony_ci			 map->max_osd*(struct_v >= 5 ? sizeof(u32) :
16988c2ecf20Sopenharmony_ci						       sizeof(u8)) +
16998c2ecf20Sopenharmony_ci				       sizeof(*map->osd_weight), e_inval);
17008c2ecf20Sopenharmony_ci	if (ceph_decode_32(p) != map->max_osd)
17018c2ecf20Sopenharmony_ci		goto e_inval;
17028c2ecf20Sopenharmony_ci
17038c2ecf20Sopenharmony_ci	if (struct_v >= 5) {
17048c2ecf20Sopenharmony_ci		for (i = 0; i < map->max_osd; i++)
17058c2ecf20Sopenharmony_ci			map->osd_state[i] = ceph_decode_32(p);
17068c2ecf20Sopenharmony_ci	} else {
17078c2ecf20Sopenharmony_ci		for (i = 0; i < map->max_osd; i++)
17088c2ecf20Sopenharmony_ci			map->osd_state[i] = ceph_decode_8(p);
17098c2ecf20Sopenharmony_ci	}
17108c2ecf20Sopenharmony_ci
17118c2ecf20Sopenharmony_ci	if (ceph_decode_32(p) != map->max_osd)
17128c2ecf20Sopenharmony_ci		goto e_inval;
17138c2ecf20Sopenharmony_ci
17148c2ecf20Sopenharmony_ci	for (i = 0; i < map->max_osd; i++)
17158c2ecf20Sopenharmony_ci		map->osd_weight[i] = ceph_decode_32(p);
17168c2ecf20Sopenharmony_ci
17178c2ecf20Sopenharmony_ci	if (ceph_decode_32(p) != map->max_osd)
17188c2ecf20Sopenharmony_ci		goto e_inval;
17198c2ecf20Sopenharmony_ci
17208c2ecf20Sopenharmony_ci	for (i = 0; i < map->max_osd; i++) {
17218c2ecf20Sopenharmony_ci		err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]);
17228c2ecf20Sopenharmony_ci		if (err)
17238c2ecf20Sopenharmony_ci			goto bad;
17248c2ecf20Sopenharmony_ci	}
17258c2ecf20Sopenharmony_ci
17268c2ecf20Sopenharmony_ci	/* pg_temp */
17278c2ecf20Sopenharmony_ci	err = decode_pg_temp(p, end, map);
17288c2ecf20Sopenharmony_ci	if (err)
17298c2ecf20Sopenharmony_ci		goto bad;
17308c2ecf20Sopenharmony_ci
17318c2ecf20Sopenharmony_ci	/* primary_temp */
17328c2ecf20Sopenharmony_ci	if (struct_v >= 1) {
17338c2ecf20Sopenharmony_ci		err = decode_primary_temp(p, end, map);
17348c2ecf20Sopenharmony_ci		if (err)
17358c2ecf20Sopenharmony_ci			goto bad;
17368c2ecf20Sopenharmony_ci	}
17378c2ecf20Sopenharmony_ci
17388c2ecf20Sopenharmony_ci	/* primary_affinity */
17398c2ecf20Sopenharmony_ci	if (struct_v >= 2) {
17408c2ecf20Sopenharmony_ci		err = decode_primary_affinity(p, end, map);
17418c2ecf20Sopenharmony_ci		if (err)
17428c2ecf20Sopenharmony_ci			goto bad;
17438c2ecf20Sopenharmony_ci	} else {
17448c2ecf20Sopenharmony_ci		WARN_ON(map->osd_primary_affinity);
17458c2ecf20Sopenharmony_ci	}
17468c2ecf20Sopenharmony_ci
17478c2ecf20Sopenharmony_ci	/* crush */
17488c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
17498c2ecf20Sopenharmony_ci	err = osdmap_set_crush(map, crush_decode(*p, min(*p + len, end)));
17508c2ecf20Sopenharmony_ci	if (err)
17518c2ecf20Sopenharmony_ci		goto bad;
17528c2ecf20Sopenharmony_ci
17538c2ecf20Sopenharmony_ci	*p += len;
17548c2ecf20Sopenharmony_ci	if (struct_v >= 3) {
17558c2ecf20Sopenharmony_ci		/* erasure_code_profiles */
17568c2ecf20Sopenharmony_ci		ceph_decode_skip_map_of_map(p, end, string, string, string,
17578c2ecf20Sopenharmony_ci					    e_inval);
17588c2ecf20Sopenharmony_ci	}
17598c2ecf20Sopenharmony_ci
17608c2ecf20Sopenharmony_ci	if (struct_v >= 4) {
17618c2ecf20Sopenharmony_ci		err = decode_pg_upmap(p, end, map);
17628c2ecf20Sopenharmony_ci		if (err)
17638c2ecf20Sopenharmony_ci			goto bad;
17648c2ecf20Sopenharmony_ci
17658c2ecf20Sopenharmony_ci		err = decode_pg_upmap_items(p, end, map);
17668c2ecf20Sopenharmony_ci		if (err)
17678c2ecf20Sopenharmony_ci			goto bad;
17688c2ecf20Sopenharmony_ci	} else {
17698c2ecf20Sopenharmony_ci		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap));
17708c2ecf20Sopenharmony_ci		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap_items));
17718c2ecf20Sopenharmony_ci	}
17728c2ecf20Sopenharmony_ci
17738c2ecf20Sopenharmony_ci	/* ignore the rest */
17748c2ecf20Sopenharmony_ci	*p = end;
17758c2ecf20Sopenharmony_ci
17768c2ecf20Sopenharmony_ci	dout("full osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
17778c2ecf20Sopenharmony_ci	return 0;
17788c2ecf20Sopenharmony_ci
17798c2ecf20Sopenharmony_cie_inval:
17808c2ecf20Sopenharmony_ci	err = -EINVAL;
17818c2ecf20Sopenharmony_cibad:
17828c2ecf20Sopenharmony_ci	pr_err("corrupt full osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
17838c2ecf20Sopenharmony_ci	       err, epoch, (int)(*p - start), *p, start, end);
17848c2ecf20Sopenharmony_ci	print_hex_dump(KERN_DEBUG, "osdmap: ",
17858c2ecf20Sopenharmony_ci		       DUMP_PREFIX_OFFSET, 16, 1,
17868c2ecf20Sopenharmony_ci		       start, end - start, true);
17878c2ecf20Sopenharmony_ci	return err;
17888c2ecf20Sopenharmony_ci}
17898c2ecf20Sopenharmony_ci
17908c2ecf20Sopenharmony_ci/*
17918c2ecf20Sopenharmony_ci * Allocate and decode a full map.
17928c2ecf20Sopenharmony_ci */
17938c2ecf20Sopenharmony_cistruct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
17948c2ecf20Sopenharmony_ci{
17958c2ecf20Sopenharmony_ci	struct ceph_osdmap *map;
17968c2ecf20Sopenharmony_ci	int ret;
17978c2ecf20Sopenharmony_ci
17988c2ecf20Sopenharmony_ci	map = ceph_osdmap_alloc();
17998c2ecf20Sopenharmony_ci	if (!map)
18008c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
18018c2ecf20Sopenharmony_ci
18028c2ecf20Sopenharmony_ci	ret = osdmap_decode(p, end, map);
18038c2ecf20Sopenharmony_ci	if (ret) {
18048c2ecf20Sopenharmony_ci		ceph_osdmap_destroy(map);
18058c2ecf20Sopenharmony_ci		return ERR_PTR(ret);
18068c2ecf20Sopenharmony_ci	}
18078c2ecf20Sopenharmony_ci
18088c2ecf20Sopenharmony_ci	return map;
18098c2ecf20Sopenharmony_ci}
18108c2ecf20Sopenharmony_ci
18118c2ecf20Sopenharmony_ci/*
18128c2ecf20Sopenharmony_ci * Encoding order is (new_up_client, new_state, new_weight).  Need to
18138c2ecf20Sopenharmony_ci * apply in the (new_weight, new_state, new_up_client) order, because
18148c2ecf20Sopenharmony_ci * an incremental map may look like e.g.
18158c2ecf20Sopenharmony_ci *
18168c2ecf20Sopenharmony_ci *     new_up_client: { osd=6, addr=... } # set osd_state and addr
18178c2ecf20Sopenharmony_ci *     new_state: { osd=6, xorstate=EXISTS } # clear osd_state
18188c2ecf20Sopenharmony_ci */
18198c2ecf20Sopenharmony_cistatic int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
18208c2ecf20Sopenharmony_ci				      struct ceph_osdmap *map)
18218c2ecf20Sopenharmony_ci{
18228c2ecf20Sopenharmony_ci	void *new_up_client;
18238c2ecf20Sopenharmony_ci	void *new_state;
18248c2ecf20Sopenharmony_ci	void *new_weight_end;
18258c2ecf20Sopenharmony_ci	u32 len;
18268c2ecf20Sopenharmony_ci	int i;
18278c2ecf20Sopenharmony_ci
18288c2ecf20Sopenharmony_ci	new_up_client = *p;
18298c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
18308c2ecf20Sopenharmony_ci	for (i = 0; i < len; ++i) {
18318c2ecf20Sopenharmony_ci		struct ceph_entity_addr addr;
18328c2ecf20Sopenharmony_ci
18338c2ecf20Sopenharmony_ci		ceph_decode_skip_32(p, end, e_inval);
18348c2ecf20Sopenharmony_ci		if (ceph_decode_entity_addr(p, end, &addr))
18358c2ecf20Sopenharmony_ci			goto e_inval;
18368c2ecf20Sopenharmony_ci	}
18378c2ecf20Sopenharmony_ci
18388c2ecf20Sopenharmony_ci	new_state = *p;
18398c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
18408c2ecf20Sopenharmony_ci	len *= sizeof(u32) + (struct_v >= 5 ? sizeof(u32) : sizeof(u8));
18418c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, len, e_inval);
18428c2ecf20Sopenharmony_ci	*p += len;
18438c2ecf20Sopenharmony_ci
18448c2ecf20Sopenharmony_ci	/* new_weight */
18458c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
18468c2ecf20Sopenharmony_ci	while (len--) {
18478c2ecf20Sopenharmony_ci		s32 osd;
18488c2ecf20Sopenharmony_ci		u32 w;
18498c2ecf20Sopenharmony_ci
18508c2ecf20Sopenharmony_ci		ceph_decode_need(p, end, 2*sizeof(u32), e_inval);
18518c2ecf20Sopenharmony_ci		osd = ceph_decode_32(p);
18528c2ecf20Sopenharmony_ci		w = ceph_decode_32(p);
18538c2ecf20Sopenharmony_ci		BUG_ON(osd >= map->max_osd);
18548c2ecf20Sopenharmony_ci		pr_info("osd%d weight 0x%x %s\n", osd, w,
18558c2ecf20Sopenharmony_ci		     w == CEPH_OSD_IN ? "(in)" :
18568c2ecf20Sopenharmony_ci		     (w == CEPH_OSD_OUT ? "(out)" : ""));
18578c2ecf20Sopenharmony_ci		map->osd_weight[osd] = w;
18588c2ecf20Sopenharmony_ci
18598c2ecf20Sopenharmony_ci		/*
18608c2ecf20Sopenharmony_ci		 * If we are marking in, set the EXISTS, and clear the
18618c2ecf20Sopenharmony_ci		 * AUTOOUT and NEW bits.
18628c2ecf20Sopenharmony_ci		 */
18638c2ecf20Sopenharmony_ci		if (w) {
18648c2ecf20Sopenharmony_ci			map->osd_state[osd] |= CEPH_OSD_EXISTS;
18658c2ecf20Sopenharmony_ci			map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT |
18668c2ecf20Sopenharmony_ci						 CEPH_OSD_NEW);
18678c2ecf20Sopenharmony_ci		}
18688c2ecf20Sopenharmony_ci	}
18698c2ecf20Sopenharmony_ci	new_weight_end = *p;
18708c2ecf20Sopenharmony_ci
18718c2ecf20Sopenharmony_ci	/* new_state (up/down) */
18728c2ecf20Sopenharmony_ci	*p = new_state;
18738c2ecf20Sopenharmony_ci	len = ceph_decode_32(p);
18748c2ecf20Sopenharmony_ci	while (len--) {
18758c2ecf20Sopenharmony_ci		s32 osd;
18768c2ecf20Sopenharmony_ci		u32 xorstate;
18778c2ecf20Sopenharmony_ci		int ret;
18788c2ecf20Sopenharmony_ci
18798c2ecf20Sopenharmony_ci		osd = ceph_decode_32(p);
18808c2ecf20Sopenharmony_ci		if (struct_v >= 5)
18818c2ecf20Sopenharmony_ci			xorstate = ceph_decode_32(p);
18828c2ecf20Sopenharmony_ci		else
18838c2ecf20Sopenharmony_ci			xorstate = ceph_decode_8(p);
18848c2ecf20Sopenharmony_ci		if (xorstate == 0)
18858c2ecf20Sopenharmony_ci			xorstate = CEPH_OSD_UP;
18868c2ecf20Sopenharmony_ci		BUG_ON(osd >= map->max_osd);
18878c2ecf20Sopenharmony_ci		if ((map->osd_state[osd] & CEPH_OSD_UP) &&
18888c2ecf20Sopenharmony_ci		    (xorstate & CEPH_OSD_UP))
18898c2ecf20Sopenharmony_ci			pr_info("osd%d down\n", osd);
18908c2ecf20Sopenharmony_ci		if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
18918c2ecf20Sopenharmony_ci		    (xorstate & CEPH_OSD_EXISTS)) {
18928c2ecf20Sopenharmony_ci			pr_info("osd%d does not exist\n", osd);
18938c2ecf20Sopenharmony_ci			ret = set_primary_affinity(map, osd,
18948c2ecf20Sopenharmony_ci						   CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
18958c2ecf20Sopenharmony_ci			if (ret)
18968c2ecf20Sopenharmony_ci				return ret;
18978c2ecf20Sopenharmony_ci			memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr));
18988c2ecf20Sopenharmony_ci			map->osd_state[osd] = 0;
18998c2ecf20Sopenharmony_ci		} else {
19008c2ecf20Sopenharmony_ci			map->osd_state[osd] ^= xorstate;
19018c2ecf20Sopenharmony_ci		}
19028c2ecf20Sopenharmony_ci	}
19038c2ecf20Sopenharmony_ci
19048c2ecf20Sopenharmony_ci	/* new_up_client */
19058c2ecf20Sopenharmony_ci	*p = new_up_client;
19068c2ecf20Sopenharmony_ci	len = ceph_decode_32(p);
19078c2ecf20Sopenharmony_ci	while (len--) {
19088c2ecf20Sopenharmony_ci		s32 osd;
19098c2ecf20Sopenharmony_ci		struct ceph_entity_addr addr;
19108c2ecf20Sopenharmony_ci
19118c2ecf20Sopenharmony_ci		osd = ceph_decode_32(p);
19128c2ecf20Sopenharmony_ci		BUG_ON(osd >= map->max_osd);
19138c2ecf20Sopenharmony_ci		if (ceph_decode_entity_addr(p, end, &addr))
19148c2ecf20Sopenharmony_ci			goto e_inval;
19158c2ecf20Sopenharmony_ci		pr_info("osd%d up\n", osd);
19168c2ecf20Sopenharmony_ci		map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
19178c2ecf20Sopenharmony_ci		map->osd_addr[osd] = addr;
19188c2ecf20Sopenharmony_ci	}
19198c2ecf20Sopenharmony_ci
19208c2ecf20Sopenharmony_ci	*p = new_weight_end;
19218c2ecf20Sopenharmony_ci	return 0;
19228c2ecf20Sopenharmony_ci
19238c2ecf20Sopenharmony_cie_inval:
19248c2ecf20Sopenharmony_ci	return -EINVAL;
19258c2ecf20Sopenharmony_ci}
19268c2ecf20Sopenharmony_ci
19278c2ecf20Sopenharmony_ci/*
19288c2ecf20Sopenharmony_ci * decode and apply an incremental map update.
19298c2ecf20Sopenharmony_ci */
19308c2ecf20Sopenharmony_cistruct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
19318c2ecf20Sopenharmony_ci					     struct ceph_osdmap *map)
19328c2ecf20Sopenharmony_ci{
19338c2ecf20Sopenharmony_ci	struct ceph_fsid fsid;
19348c2ecf20Sopenharmony_ci	u32 epoch = 0;
19358c2ecf20Sopenharmony_ci	struct ceph_timespec modified;
19368c2ecf20Sopenharmony_ci	s32 len;
19378c2ecf20Sopenharmony_ci	u64 pool;
19388c2ecf20Sopenharmony_ci	__s64 new_pool_max;
19398c2ecf20Sopenharmony_ci	__s32 new_flags, max;
19408c2ecf20Sopenharmony_ci	void *start = *p;
19418c2ecf20Sopenharmony_ci	int err;
19428c2ecf20Sopenharmony_ci	u8 struct_v;
19438c2ecf20Sopenharmony_ci
19448c2ecf20Sopenharmony_ci	dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
19458c2ecf20Sopenharmony_ci
19468c2ecf20Sopenharmony_ci	err = get_osdmap_client_data_v(p, end, "inc", &struct_v);
19478c2ecf20Sopenharmony_ci	if (err)
19488c2ecf20Sopenharmony_ci		goto bad;
19498c2ecf20Sopenharmony_ci
19508c2ecf20Sopenharmony_ci	/* fsid, epoch, modified, new_pool_max, new_flags */
19518c2ecf20Sopenharmony_ci	ceph_decode_need(p, end, sizeof(fsid) + sizeof(u32) + sizeof(modified) +
19528c2ecf20Sopenharmony_ci			 sizeof(u64) + sizeof(u32), e_inval);
19538c2ecf20Sopenharmony_ci	ceph_decode_copy(p, &fsid, sizeof(fsid));
19548c2ecf20Sopenharmony_ci	epoch = ceph_decode_32(p);
19558c2ecf20Sopenharmony_ci	BUG_ON(epoch != map->epoch+1);
19568c2ecf20Sopenharmony_ci	ceph_decode_copy(p, &modified, sizeof(modified));
19578c2ecf20Sopenharmony_ci	new_pool_max = ceph_decode_64(p);
19588c2ecf20Sopenharmony_ci	new_flags = ceph_decode_32(p);
19598c2ecf20Sopenharmony_ci
19608c2ecf20Sopenharmony_ci	/* full map? */
19618c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
19628c2ecf20Sopenharmony_ci	if (len > 0) {
19638c2ecf20Sopenharmony_ci		dout("apply_incremental full map len %d, %p to %p\n",
19648c2ecf20Sopenharmony_ci		     len, *p, end);
19658c2ecf20Sopenharmony_ci		return ceph_osdmap_decode(p, min(*p+len, end));
19668c2ecf20Sopenharmony_ci	}
19678c2ecf20Sopenharmony_ci
19688c2ecf20Sopenharmony_ci	/* new crush? */
19698c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
19708c2ecf20Sopenharmony_ci	if (len > 0) {
19718c2ecf20Sopenharmony_ci		err = osdmap_set_crush(map,
19728c2ecf20Sopenharmony_ci				       crush_decode(*p, min(*p + len, end)));
19738c2ecf20Sopenharmony_ci		if (err)
19748c2ecf20Sopenharmony_ci			goto bad;
19758c2ecf20Sopenharmony_ci		*p += len;
19768c2ecf20Sopenharmony_ci	}
19778c2ecf20Sopenharmony_ci
19788c2ecf20Sopenharmony_ci	/* new flags? */
19798c2ecf20Sopenharmony_ci	if (new_flags >= 0)
19808c2ecf20Sopenharmony_ci		map->flags = new_flags;
19818c2ecf20Sopenharmony_ci	if (new_pool_max >= 0)
19828c2ecf20Sopenharmony_ci		map->pool_max = new_pool_max;
19838c2ecf20Sopenharmony_ci
19848c2ecf20Sopenharmony_ci	/* new max? */
19858c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, max, e_inval);
19868c2ecf20Sopenharmony_ci	if (max >= 0) {
19878c2ecf20Sopenharmony_ci		err = osdmap_set_max_osd(map, max);
19888c2ecf20Sopenharmony_ci		if (err)
19898c2ecf20Sopenharmony_ci			goto bad;
19908c2ecf20Sopenharmony_ci	}
19918c2ecf20Sopenharmony_ci
19928c2ecf20Sopenharmony_ci	map->epoch++;
19938c2ecf20Sopenharmony_ci	map->modified = modified;
19948c2ecf20Sopenharmony_ci
19958c2ecf20Sopenharmony_ci	/* new_pools */
19968c2ecf20Sopenharmony_ci	err = decode_new_pools(p, end, map);
19978c2ecf20Sopenharmony_ci	if (err)
19988c2ecf20Sopenharmony_ci		goto bad;
19998c2ecf20Sopenharmony_ci
20008c2ecf20Sopenharmony_ci	/* new_pool_names */
20018c2ecf20Sopenharmony_ci	err = decode_pool_names(p, end, map);
20028c2ecf20Sopenharmony_ci	if (err)
20038c2ecf20Sopenharmony_ci		goto bad;
20048c2ecf20Sopenharmony_ci
20058c2ecf20Sopenharmony_ci	/* old_pool */
20068c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, len, e_inval);
20078c2ecf20Sopenharmony_ci	while (len--) {
20088c2ecf20Sopenharmony_ci		struct ceph_pg_pool_info *pi;
20098c2ecf20Sopenharmony_ci
20108c2ecf20Sopenharmony_ci		ceph_decode_64_safe(p, end, pool, e_inval);
20118c2ecf20Sopenharmony_ci		pi = lookup_pg_pool(&map->pg_pools, pool);
20128c2ecf20Sopenharmony_ci		if (pi)
20138c2ecf20Sopenharmony_ci			__remove_pg_pool(&map->pg_pools, pi);
20148c2ecf20Sopenharmony_ci	}
20158c2ecf20Sopenharmony_ci
20168c2ecf20Sopenharmony_ci	/* new_up_client, new_state, new_weight */
20178c2ecf20Sopenharmony_ci	err = decode_new_up_state_weight(p, end, struct_v, map);
20188c2ecf20Sopenharmony_ci	if (err)
20198c2ecf20Sopenharmony_ci		goto bad;
20208c2ecf20Sopenharmony_ci
20218c2ecf20Sopenharmony_ci	/* new_pg_temp */
20228c2ecf20Sopenharmony_ci	err = decode_new_pg_temp(p, end, map);
20238c2ecf20Sopenharmony_ci	if (err)
20248c2ecf20Sopenharmony_ci		goto bad;
20258c2ecf20Sopenharmony_ci
20268c2ecf20Sopenharmony_ci	/* new_primary_temp */
20278c2ecf20Sopenharmony_ci	if (struct_v >= 1) {
20288c2ecf20Sopenharmony_ci		err = decode_new_primary_temp(p, end, map);
20298c2ecf20Sopenharmony_ci		if (err)
20308c2ecf20Sopenharmony_ci			goto bad;
20318c2ecf20Sopenharmony_ci	}
20328c2ecf20Sopenharmony_ci
20338c2ecf20Sopenharmony_ci	/* new_primary_affinity */
20348c2ecf20Sopenharmony_ci	if (struct_v >= 2) {
20358c2ecf20Sopenharmony_ci		err = decode_new_primary_affinity(p, end, map);
20368c2ecf20Sopenharmony_ci		if (err)
20378c2ecf20Sopenharmony_ci			goto bad;
20388c2ecf20Sopenharmony_ci	}
20398c2ecf20Sopenharmony_ci
20408c2ecf20Sopenharmony_ci	if (struct_v >= 3) {
20418c2ecf20Sopenharmony_ci		/* new_erasure_code_profiles */
20428c2ecf20Sopenharmony_ci		ceph_decode_skip_map_of_map(p, end, string, string, string,
20438c2ecf20Sopenharmony_ci					    e_inval);
20448c2ecf20Sopenharmony_ci		/* old_erasure_code_profiles */
20458c2ecf20Sopenharmony_ci		ceph_decode_skip_set(p, end, string, e_inval);
20468c2ecf20Sopenharmony_ci	}
20478c2ecf20Sopenharmony_ci
20488c2ecf20Sopenharmony_ci	if (struct_v >= 4) {
20498c2ecf20Sopenharmony_ci		err = decode_new_pg_upmap(p, end, map);
20508c2ecf20Sopenharmony_ci		if (err)
20518c2ecf20Sopenharmony_ci			goto bad;
20528c2ecf20Sopenharmony_ci
20538c2ecf20Sopenharmony_ci		err = decode_old_pg_upmap(p, end, map);
20548c2ecf20Sopenharmony_ci		if (err)
20558c2ecf20Sopenharmony_ci			goto bad;
20568c2ecf20Sopenharmony_ci
20578c2ecf20Sopenharmony_ci		err = decode_new_pg_upmap_items(p, end, map);
20588c2ecf20Sopenharmony_ci		if (err)
20598c2ecf20Sopenharmony_ci			goto bad;
20608c2ecf20Sopenharmony_ci
20618c2ecf20Sopenharmony_ci		err = decode_old_pg_upmap_items(p, end, map);
20628c2ecf20Sopenharmony_ci		if (err)
20638c2ecf20Sopenharmony_ci			goto bad;
20648c2ecf20Sopenharmony_ci	}
20658c2ecf20Sopenharmony_ci
20668c2ecf20Sopenharmony_ci	/* ignore the rest */
20678c2ecf20Sopenharmony_ci	*p = end;
20688c2ecf20Sopenharmony_ci
20698c2ecf20Sopenharmony_ci	dout("inc osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
20708c2ecf20Sopenharmony_ci	return map;
20718c2ecf20Sopenharmony_ci
20728c2ecf20Sopenharmony_cie_inval:
20738c2ecf20Sopenharmony_ci	err = -EINVAL;
20748c2ecf20Sopenharmony_cibad:
20758c2ecf20Sopenharmony_ci	pr_err("corrupt inc osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
20768c2ecf20Sopenharmony_ci	       err, epoch, (int)(*p - start), *p, start, end);
20778c2ecf20Sopenharmony_ci	print_hex_dump(KERN_DEBUG, "osdmap: ",
20788c2ecf20Sopenharmony_ci		       DUMP_PREFIX_OFFSET, 16, 1,
20798c2ecf20Sopenharmony_ci		       start, end - start, true);
20808c2ecf20Sopenharmony_ci	return ERR_PTR(err);
20818c2ecf20Sopenharmony_ci}
20828c2ecf20Sopenharmony_ci
20838c2ecf20Sopenharmony_civoid ceph_oloc_copy(struct ceph_object_locator *dest,
20848c2ecf20Sopenharmony_ci		    const struct ceph_object_locator *src)
20858c2ecf20Sopenharmony_ci{
20868c2ecf20Sopenharmony_ci	ceph_oloc_destroy(dest);
20878c2ecf20Sopenharmony_ci
20888c2ecf20Sopenharmony_ci	dest->pool = src->pool;
20898c2ecf20Sopenharmony_ci	if (src->pool_ns)
20908c2ecf20Sopenharmony_ci		dest->pool_ns = ceph_get_string(src->pool_ns);
20918c2ecf20Sopenharmony_ci	else
20928c2ecf20Sopenharmony_ci		dest->pool_ns = NULL;
20938c2ecf20Sopenharmony_ci}
20948c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_oloc_copy);
20958c2ecf20Sopenharmony_ci
20968c2ecf20Sopenharmony_civoid ceph_oloc_destroy(struct ceph_object_locator *oloc)
20978c2ecf20Sopenharmony_ci{
20988c2ecf20Sopenharmony_ci	ceph_put_string(oloc->pool_ns);
20998c2ecf20Sopenharmony_ci}
21008c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_oloc_destroy);
21018c2ecf20Sopenharmony_ci
21028c2ecf20Sopenharmony_civoid ceph_oid_copy(struct ceph_object_id *dest,
21038c2ecf20Sopenharmony_ci		   const struct ceph_object_id *src)
21048c2ecf20Sopenharmony_ci{
21058c2ecf20Sopenharmony_ci	ceph_oid_destroy(dest);
21068c2ecf20Sopenharmony_ci
21078c2ecf20Sopenharmony_ci	if (src->name != src->inline_name) {
21088c2ecf20Sopenharmony_ci		/* very rare, see ceph_object_id definition */
21098c2ecf20Sopenharmony_ci		dest->name = kmalloc(src->name_len + 1,
21108c2ecf20Sopenharmony_ci				     GFP_NOIO | __GFP_NOFAIL);
21118c2ecf20Sopenharmony_ci	} else {
21128c2ecf20Sopenharmony_ci		dest->name = dest->inline_name;
21138c2ecf20Sopenharmony_ci	}
21148c2ecf20Sopenharmony_ci	memcpy(dest->name, src->name, src->name_len + 1);
21158c2ecf20Sopenharmony_ci	dest->name_len = src->name_len;
21168c2ecf20Sopenharmony_ci}
21178c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_oid_copy);
21188c2ecf20Sopenharmony_ci
21198c2ecf20Sopenharmony_cistatic __printf(2, 0)
21208c2ecf20Sopenharmony_ciint oid_printf_vargs(struct ceph_object_id *oid, const char *fmt, va_list ap)
21218c2ecf20Sopenharmony_ci{
21228c2ecf20Sopenharmony_ci	int len;
21238c2ecf20Sopenharmony_ci
21248c2ecf20Sopenharmony_ci	WARN_ON(!ceph_oid_empty(oid));
21258c2ecf20Sopenharmony_ci
21268c2ecf20Sopenharmony_ci	len = vsnprintf(oid->inline_name, sizeof(oid->inline_name), fmt, ap);
21278c2ecf20Sopenharmony_ci	if (len >= sizeof(oid->inline_name))
21288c2ecf20Sopenharmony_ci		return len;
21298c2ecf20Sopenharmony_ci
21308c2ecf20Sopenharmony_ci	oid->name_len = len;
21318c2ecf20Sopenharmony_ci	return 0;
21328c2ecf20Sopenharmony_ci}
21338c2ecf20Sopenharmony_ci
21348c2ecf20Sopenharmony_ci/*
21358c2ecf20Sopenharmony_ci * If oid doesn't fit into inline buffer, BUG.
21368c2ecf20Sopenharmony_ci */
21378c2ecf20Sopenharmony_civoid ceph_oid_printf(struct ceph_object_id *oid, const char *fmt, ...)
21388c2ecf20Sopenharmony_ci{
21398c2ecf20Sopenharmony_ci	va_list ap;
21408c2ecf20Sopenharmony_ci
21418c2ecf20Sopenharmony_ci	va_start(ap, fmt);
21428c2ecf20Sopenharmony_ci	BUG_ON(oid_printf_vargs(oid, fmt, ap));
21438c2ecf20Sopenharmony_ci	va_end(ap);
21448c2ecf20Sopenharmony_ci}
21458c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_oid_printf);
21468c2ecf20Sopenharmony_ci
21478c2ecf20Sopenharmony_cistatic __printf(3, 0)
21488c2ecf20Sopenharmony_ciint oid_aprintf_vargs(struct ceph_object_id *oid, gfp_t gfp,
21498c2ecf20Sopenharmony_ci		      const char *fmt, va_list ap)
21508c2ecf20Sopenharmony_ci{
21518c2ecf20Sopenharmony_ci	va_list aq;
21528c2ecf20Sopenharmony_ci	int len;
21538c2ecf20Sopenharmony_ci
21548c2ecf20Sopenharmony_ci	va_copy(aq, ap);
21558c2ecf20Sopenharmony_ci	len = oid_printf_vargs(oid, fmt, aq);
21568c2ecf20Sopenharmony_ci	va_end(aq);
21578c2ecf20Sopenharmony_ci
21588c2ecf20Sopenharmony_ci	if (len) {
21598c2ecf20Sopenharmony_ci		char *external_name;
21608c2ecf20Sopenharmony_ci
21618c2ecf20Sopenharmony_ci		external_name = kmalloc(len + 1, gfp);
21628c2ecf20Sopenharmony_ci		if (!external_name)
21638c2ecf20Sopenharmony_ci			return -ENOMEM;
21648c2ecf20Sopenharmony_ci
21658c2ecf20Sopenharmony_ci		oid->name = external_name;
21668c2ecf20Sopenharmony_ci		WARN_ON(vsnprintf(oid->name, len + 1, fmt, ap) != len);
21678c2ecf20Sopenharmony_ci		oid->name_len = len;
21688c2ecf20Sopenharmony_ci	}
21698c2ecf20Sopenharmony_ci
21708c2ecf20Sopenharmony_ci	return 0;
21718c2ecf20Sopenharmony_ci}
21728c2ecf20Sopenharmony_ci
21738c2ecf20Sopenharmony_ci/*
21748c2ecf20Sopenharmony_ci * If oid doesn't fit into inline buffer, allocate.
21758c2ecf20Sopenharmony_ci */
21768c2ecf20Sopenharmony_ciint ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp,
21778c2ecf20Sopenharmony_ci		     const char *fmt, ...)
21788c2ecf20Sopenharmony_ci{
21798c2ecf20Sopenharmony_ci	va_list ap;
21808c2ecf20Sopenharmony_ci	int ret;
21818c2ecf20Sopenharmony_ci
21828c2ecf20Sopenharmony_ci	va_start(ap, fmt);
21838c2ecf20Sopenharmony_ci	ret = oid_aprintf_vargs(oid, gfp, fmt, ap);
21848c2ecf20Sopenharmony_ci	va_end(ap);
21858c2ecf20Sopenharmony_ci
21868c2ecf20Sopenharmony_ci	return ret;
21878c2ecf20Sopenharmony_ci}
21888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_oid_aprintf);
21898c2ecf20Sopenharmony_ci
21908c2ecf20Sopenharmony_civoid ceph_oid_destroy(struct ceph_object_id *oid)
21918c2ecf20Sopenharmony_ci{
21928c2ecf20Sopenharmony_ci	if (oid->name != oid->inline_name)
21938c2ecf20Sopenharmony_ci		kfree(oid->name);
21948c2ecf20Sopenharmony_ci}
21958c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_oid_destroy);
21968c2ecf20Sopenharmony_ci
21978c2ecf20Sopenharmony_ci/*
21988c2ecf20Sopenharmony_ci * osds only
21998c2ecf20Sopenharmony_ci */
22008c2ecf20Sopenharmony_cistatic bool __osds_equal(const struct ceph_osds *lhs,
22018c2ecf20Sopenharmony_ci			 const struct ceph_osds *rhs)
22028c2ecf20Sopenharmony_ci{
22038c2ecf20Sopenharmony_ci	if (lhs->size == rhs->size &&
22048c2ecf20Sopenharmony_ci	    !memcmp(lhs->osds, rhs->osds, rhs->size * sizeof(rhs->osds[0])))
22058c2ecf20Sopenharmony_ci		return true;
22068c2ecf20Sopenharmony_ci
22078c2ecf20Sopenharmony_ci	return false;
22088c2ecf20Sopenharmony_ci}
22098c2ecf20Sopenharmony_ci
22108c2ecf20Sopenharmony_ci/*
22118c2ecf20Sopenharmony_ci * osds + primary
22128c2ecf20Sopenharmony_ci */
22138c2ecf20Sopenharmony_cistatic bool osds_equal(const struct ceph_osds *lhs,
22148c2ecf20Sopenharmony_ci		       const struct ceph_osds *rhs)
22158c2ecf20Sopenharmony_ci{
22168c2ecf20Sopenharmony_ci	if (__osds_equal(lhs, rhs) &&
22178c2ecf20Sopenharmony_ci	    lhs->primary == rhs->primary)
22188c2ecf20Sopenharmony_ci		return true;
22198c2ecf20Sopenharmony_ci
22208c2ecf20Sopenharmony_ci	return false;
22218c2ecf20Sopenharmony_ci}
22228c2ecf20Sopenharmony_ci
22238c2ecf20Sopenharmony_cistatic bool osds_valid(const struct ceph_osds *set)
22248c2ecf20Sopenharmony_ci{
22258c2ecf20Sopenharmony_ci	/* non-empty set */
22268c2ecf20Sopenharmony_ci	if (set->size > 0 && set->primary >= 0)
22278c2ecf20Sopenharmony_ci		return true;
22288c2ecf20Sopenharmony_ci
22298c2ecf20Sopenharmony_ci	/* empty can_shift_osds set */
22308c2ecf20Sopenharmony_ci	if (!set->size && set->primary == -1)
22318c2ecf20Sopenharmony_ci		return true;
22328c2ecf20Sopenharmony_ci
22338c2ecf20Sopenharmony_ci	/* empty !can_shift_osds set - all NONE */
22348c2ecf20Sopenharmony_ci	if (set->size > 0 && set->primary == -1) {
22358c2ecf20Sopenharmony_ci		int i;
22368c2ecf20Sopenharmony_ci
22378c2ecf20Sopenharmony_ci		for (i = 0; i < set->size; i++) {
22388c2ecf20Sopenharmony_ci			if (set->osds[i] != CRUSH_ITEM_NONE)
22398c2ecf20Sopenharmony_ci				break;
22408c2ecf20Sopenharmony_ci		}
22418c2ecf20Sopenharmony_ci		if (i == set->size)
22428c2ecf20Sopenharmony_ci			return true;
22438c2ecf20Sopenharmony_ci	}
22448c2ecf20Sopenharmony_ci
22458c2ecf20Sopenharmony_ci	return false;
22468c2ecf20Sopenharmony_ci}
22478c2ecf20Sopenharmony_ci
22488c2ecf20Sopenharmony_civoid ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src)
22498c2ecf20Sopenharmony_ci{
22508c2ecf20Sopenharmony_ci	memcpy(dest->osds, src->osds, src->size * sizeof(src->osds[0]));
22518c2ecf20Sopenharmony_ci	dest->size = src->size;
22528c2ecf20Sopenharmony_ci	dest->primary = src->primary;
22538c2ecf20Sopenharmony_ci}
22548c2ecf20Sopenharmony_ci
22558c2ecf20Sopenharmony_cibool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
22568c2ecf20Sopenharmony_ci		      u32 new_pg_num)
22578c2ecf20Sopenharmony_ci{
22588c2ecf20Sopenharmony_ci	int old_bits = calc_bits_of(old_pg_num);
22598c2ecf20Sopenharmony_ci	int old_mask = (1 << old_bits) - 1;
22608c2ecf20Sopenharmony_ci	int n;
22618c2ecf20Sopenharmony_ci
22628c2ecf20Sopenharmony_ci	WARN_ON(pgid->seed >= old_pg_num);
22638c2ecf20Sopenharmony_ci	if (new_pg_num <= old_pg_num)
22648c2ecf20Sopenharmony_ci		return false;
22658c2ecf20Sopenharmony_ci
22668c2ecf20Sopenharmony_ci	for (n = 1; ; n++) {
22678c2ecf20Sopenharmony_ci		int next_bit = n << (old_bits - 1);
22688c2ecf20Sopenharmony_ci		u32 s = next_bit | pgid->seed;
22698c2ecf20Sopenharmony_ci
22708c2ecf20Sopenharmony_ci		if (s < old_pg_num || s == pgid->seed)
22718c2ecf20Sopenharmony_ci			continue;
22728c2ecf20Sopenharmony_ci		if (s >= new_pg_num)
22738c2ecf20Sopenharmony_ci			break;
22748c2ecf20Sopenharmony_ci
22758c2ecf20Sopenharmony_ci		s = ceph_stable_mod(s, old_pg_num, old_mask);
22768c2ecf20Sopenharmony_ci		if (s == pgid->seed)
22778c2ecf20Sopenharmony_ci			return true;
22788c2ecf20Sopenharmony_ci	}
22798c2ecf20Sopenharmony_ci
22808c2ecf20Sopenharmony_ci	return false;
22818c2ecf20Sopenharmony_ci}
22828c2ecf20Sopenharmony_ci
22838c2ecf20Sopenharmony_cibool ceph_is_new_interval(const struct ceph_osds *old_acting,
22848c2ecf20Sopenharmony_ci			  const struct ceph_osds *new_acting,
22858c2ecf20Sopenharmony_ci			  const struct ceph_osds *old_up,
22868c2ecf20Sopenharmony_ci			  const struct ceph_osds *new_up,
22878c2ecf20Sopenharmony_ci			  int old_size,
22888c2ecf20Sopenharmony_ci			  int new_size,
22898c2ecf20Sopenharmony_ci			  int old_min_size,
22908c2ecf20Sopenharmony_ci			  int new_min_size,
22918c2ecf20Sopenharmony_ci			  u32 old_pg_num,
22928c2ecf20Sopenharmony_ci			  u32 new_pg_num,
22938c2ecf20Sopenharmony_ci			  bool old_sort_bitwise,
22948c2ecf20Sopenharmony_ci			  bool new_sort_bitwise,
22958c2ecf20Sopenharmony_ci			  bool old_recovery_deletes,
22968c2ecf20Sopenharmony_ci			  bool new_recovery_deletes,
22978c2ecf20Sopenharmony_ci			  const struct ceph_pg *pgid)
22988c2ecf20Sopenharmony_ci{
22998c2ecf20Sopenharmony_ci	return !osds_equal(old_acting, new_acting) ||
23008c2ecf20Sopenharmony_ci	       !osds_equal(old_up, new_up) ||
23018c2ecf20Sopenharmony_ci	       old_size != new_size ||
23028c2ecf20Sopenharmony_ci	       old_min_size != new_min_size ||
23038c2ecf20Sopenharmony_ci	       ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
23048c2ecf20Sopenharmony_ci	       old_sort_bitwise != new_sort_bitwise ||
23058c2ecf20Sopenharmony_ci	       old_recovery_deletes != new_recovery_deletes;
23068c2ecf20Sopenharmony_ci}
23078c2ecf20Sopenharmony_ci
23088c2ecf20Sopenharmony_cistatic int calc_pg_rank(int osd, const struct ceph_osds *acting)
23098c2ecf20Sopenharmony_ci{
23108c2ecf20Sopenharmony_ci	int i;
23118c2ecf20Sopenharmony_ci
23128c2ecf20Sopenharmony_ci	for (i = 0; i < acting->size; i++) {
23138c2ecf20Sopenharmony_ci		if (acting->osds[i] == osd)
23148c2ecf20Sopenharmony_ci			return i;
23158c2ecf20Sopenharmony_ci	}
23168c2ecf20Sopenharmony_ci
23178c2ecf20Sopenharmony_ci	return -1;
23188c2ecf20Sopenharmony_ci}
23198c2ecf20Sopenharmony_ci
23208c2ecf20Sopenharmony_cistatic bool primary_changed(const struct ceph_osds *old_acting,
23218c2ecf20Sopenharmony_ci			    const struct ceph_osds *new_acting)
23228c2ecf20Sopenharmony_ci{
23238c2ecf20Sopenharmony_ci	if (!old_acting->size && !new_acting->size)
23248c2ecf20Sopenharmony_ci		return false; /* both still empty */
23258c2ecf20Sopenharmony_ci
23268c2ecf20Sopenharmony_ci	if (!old_acting->size ^ !new_acting->size)
23278c2ecf20Sopenharmony_ci		return true; /* was empty, now not, or vice versa */
23288c2ecf20Sopenharmony_ci
23298c2ecf20Sopenharmony_ci	if (old_acting->primary != new_acting->primary)
23308c2ecf20Sopenharmony_ci		return true; /* primary changed */
23318c2ecf20Sopenharmony_ci
23328c2ecf20Sopenharmony_ci	if (calc_pg_rank(old_acting->primary, old_acting) !=
23338c2ecf20Sopenharmony_ci	    calc_pg_rank(new_acting->primary, new_acting))
23348c2ecf20Sopenharmony_ci		return true;
23358c2ecf20Sopenharmony_ci
23368c2ecf20Sopenharmony_ci	return false; /* same primary (tho replicas may have changed) */
23378c2ecf20Sopenharmony_ci}
23388c2ecf20Sopenharmony_ci
23398c2ecf20Sopenharmony_cibool ceph_osds_changed(const struct ceph_osds *old_acting,
23408c2ecf20Sopenharmony_ci		       const struct ceph_osds *new_acting,
23418c2ecf20Sopenharmony_ci		       bool any_change)
23428c2ecf20Sopenharmony_ci{
23438c2ecf20Sopenharmony_ci	if (primary_changed(old_acting, new_acting))
23448c2ecf20Sopenharmony_ci		return true;
23458c2ecf20Sopenharmony_ci
23468c2ecf20Sopenharmony_ci	if (any_change && !__osds_equal(old_acting, new_acting))
23478c2ecf20Sopenharmony_ci		return true;
23488c2ecf20Sopenharmony_ci
23498c2ecf20Sopenharmony_ci	return false;
23508c2ecf20Sopenharmony_ci}
23518c2ecf20Sopenharmony_ci
23528c2ecf20Sopenharmony_ci/*
23538c2ecf20Sopenharmony_ci * Map an object into a PG.
23548c2ecf20Sopenharmony_ci *
23558c2ecf20Sopenharmony_ci * Should only be called with target_oid and target_oloc (as opposed to
23568c2ecf20Sopenharmony_ci * base_oid and base_oloc), since tiering isn't taken into account.
23578c2ecf20Sopenharmony_ci */
23588c2ecf20Sopenharmony_civoid __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
23598c2ecf20Sopenharmony_ci				 const struct ceph_object_id *oid,
23608c2ecf20Sopenharmony_ci				 const struct ceph_object_locator *oloc,
23618c2ecf20Sopenharmony_ci				 struct ceph_pg *raw_pgid)
23628c2ecf20Sopenharmony_ci{
23638c2ecf20Sopenharmony_ci	WARN_ON(pi->id != oloc->pool);
23648c2ecf20Sopenharmony_ci
23658c2ecf20Sopenharmony_ci	if (!oloc->pool_ns) {
23668c2ecf20Sopenharmony_ci		raw_pgid->pool = oloc->pool;
23678c2ecf20Sopenharmony_ci		raw_pgid->seed = ceph_str_hash(pi->object_hash, oid->name,
23688c2ecf20Sopenharmony_ci					     oid->name_len);
23698c2ecf20Sopenharmony_ci		dout("%s %s -> raw_pgid %llu.%x\n", __func__, oid->name,
23708c2ecf20Sopenharmony_ci		     raw_pgid->pool, raw_pgid->seed);
23718c2ecf20Sopenharmony_ci	} else {
23728c2ecf20Sopenharmony_ci		char stack_buf[256];
23738c2ecf20Sopenharmony_ci		char *buf = stack_buf;
23748c2ecf20Sopenharmony_ci		int nsl = oloc->pool_ns->len;
23758c2ecf20Sopenharmony_ci		size_t total = nsl + 1 + oid->name_len;
23768c2ecf20Sopenharmony_ci
23778c2ecf20Sopenharmony_ci		if (total > sizeof(stack_buf))
23788c2ecf20Sopenharmony_ci			buf = kmalloc(total, GFP_NOIO | __GFP_NOFAIL);
23798c2ecf20Sopenharmony_ci		memcpy(buf, oloc->pool_ns->str, nsl);
23808c2ecf20Sopenharmony_ci		buf[nsl] = '\037';
23818c2ecf20Sopenharmony_ci		memcpy(buf + nsl + 1, oid->name, oid->name_len);
23828c2ecf20Sopenharmony_ci		raw_pgid->pool = oloc->pool;
23838c2ecf20Sopenharmony_ci		raw_pgid->seed = ceph_str_hash(pi->object_hash, buf, total);
23848c2ecf20Sopenharmony_ci		if (buf != stack_buf)
23858c2ecf20Sopenharmony_ci			kfree(buf);
23868c2ecf20Sopenharmony_ci		dout("%s %s ns %.*s -> raw_pgid %llu.%x\n", __func__,
23878c2ecf20Sopenharmony_ci		     oid->name, nsl, oloc->pool_ns->str,
23888c2ecf20Sopenharmony_ci		     raw_pgid->pool, raw_pgid->seed);
23898c2ecf20Sopenharmony_ci	}
23908c2ecf20Sopenharmony_ci}
23918c2ecf20Sopenharmony_ci
23928c2ecf20Sopenharmony_ciint ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
23938c2ecf20Sopenharmony_ci			      const struct ceph_object_id *oid,
23948c2ecf20Sopenharmony_ci			      const struct ceph_object_locator *oloc,
23958c2ecf20Sopenharmony_ci			      struct ceph_pg *raw_pgid)
23968c2ecf20Sopenharmony_ci{
23978c2ecf20Sopenharmony_ci	struct ceph_pg_pool_info *pi;
23988c2ecf20Sopenharmony_ci
23998c2ecf20Sopenharmony_ci	pi = ceph_pg_pool_by_id(osdmap, oloc->pool);
24008c2ecf20Sopenharmony_ci	if (!pi)
24018c2ecf20Sopenharmony_ci		return -ENOENT;
24028c2ecf20Sopenharmony_ci
24038c2ecf20Sopenharmony_ci	__ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
24048c2ecf20Sopenharmony_ci	return 0;
24058c2ecf20Sopenharmony_ci}
24068c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_object_locator_to_pg);
24078c2ecf20Sopenharmony_ci
24088c2ecf20Sopenharmony_ci/*
24098c2ecf20Sopenharmony_ci * Map a raw PG (full precision ps) into an actual PG.
24108c2ecf20Sopenharmony_ci */
24118c2ecf20Sopenharmony_cistatic void raw_pg_to_pg(struct ceph_pg_pool_info *pi,
24128c2ecf20Sopenharmony_ci			 const struct ceph_pg *raw_pgid,
24138c2ecf20Sopenharmony_ci			 struct ceph_pg *pgid)
24148c2ecf20Sopenharmony_ci{
24158c2ecf20Sopenharmony_ci	pgid->pool = raw_pgid->pool;
24168c2ecf20Sopenharmony_ci	pgid->seed = ceph_stable_mod(raw_pgid->seed, pi->pg_num,
24178c2ecf20Sopenharmony_ci				     pi->pg_num_mask);
24188c2ecf20Sopenharmony_ci}
24198c2ecf20Sopenharmony_ci
24208c2ecf20Sopenharmony_ci/*
24218c2ecf20Sopenharmony_ci * Map a raw PG (full precision ps) into a placement ps (placement
24228c2ecf20Sopenharmony_ci * seed).  Include pool id in that value so that different pools don't
24238c2ecf20Sopenharmony_ci * use the same seeds.
24248c2ecf20Sopenharmony_ci */
24258c2ecf20Sopenharmony_cistatic u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
24268c2ecf20Sopenharmony_ci			 const struct ceph_pg *raw_pgid)
24278c2ecf20Sopenharmony_ci{
24288c2ecf20Sopenharmony_ci	if (pi->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
24298c2ecf20Sopenharmony_ci		/* hash pool id and seed so that pool PGs do not overlap */
24308c2ecf20Sopenharmony_ci		return crush_hash32_2(CRUSH_HASH_RJENKINS1,
24318c2ecf20Sopenharmony_ci				      ceph_stable_mod(raw_pgid->seed,
24328c2ecf20Sopenharmony_ci						      pi->pgp_num,
24338c2ecf20Sopenharmony_ci						      pi->pgp_num_mask),
24348c2ecf20Sopenharmony_ci				      raw_pgid->pool);
24358c2ecf20Sopenharmony_ci	} else {
24368c2ecf20Sopenharmony_ci		/*
24378c2ecf20Sopenharmony_ci		 * legacy behavior: add ps and pool together.  this is
24388c2ecf20Sopenharmony_ci		 * not a great approach because the PGs from each pool
24398c2ecf20Sopenharmony_ci		 * will overlap on top of each other: 0.5 == 1.4 ==
24408c2ecf20Sopenharmony_ci		 * 2.3 == ...
24418c2ecf20Sopenharmony_ci		 */
24428c2ecf20Sopenharmony_ci		return ceph_stable_mod(raw_pgid->seed, pi->pgp_num,
24438c2ecf20Sopenharmony_ci				       pi->pgp_num_mask) +
24448c2ecf20Sopenharmony_ci		       (unsigned)raw_pgid->pool;
24458c2ecf20Sopenharmony_ci	}
24468c2ecf20Sopenharmony_ci}
24478c2ecf20Sopenharmony_ci
24488c2ecf20Sopenharmony_ci/*
24498c2ecf20Sopenharmony_ci * Magic value used for a "default" fallback choose_args, used if the
24508c2ecf20Sopenharmony_ci * crush_choose_arg_map passed to do_crush() does not exist.  If this
24518c2ecf20Sopenharmony_ci * also doesn't exist, fall back to canonical weights.
24528c2ecf20Sopenharmony_ci */
24538c2ecf20Sopenharmony_ci#define CEPH_DEFAULT_CHOOSE_ARGS	-1
24548c2ecf20Sopenharmony_ci
24558c2ecf20Sopenharmony_cistatic int do_crush(struct ceph_osdmap *map, int ruleno, int x,
24568c2ecf20Sopenharmony_ci		    int *result, int result_max,
24578c2ecf20Sopenharmony_ci		    const __u32 *weight, int weight_max,
24588c2ecf20Sopenharmony_ci		    s64 choose_args_index)
24598c2ecf20Sopenharmony_ci{
24608c2ecf20Sopenharmony_ci	struct crush_choose_arg_map *arg_map;
24618c2ecf20Sopenharmony_ci	struct crush_work *work;
24628c2ecf20Sopenharmony_ci	int r;
24638c2ecf20Sopenharmony_ci
24648c2ecf20Sopenharmony_ci	BUG_ON(result_max > CEPH_PG_MAX_SIZE);
24658c2ecf20Sopenharmony_ci
24668c2ecf20Sopenharmony_ci	arg_map = lookup_choose_arg_map(&map->crush->choose_args,
24678c2ecf20Sopenharmony_ci					choose_args_index);
24688c2ecf20Sopenharmony_ci	if (!arg_map)
24698c2ecf20Sopenharmony_ci		arg_map = lookup_choose_arg_map(&map->crush->choose_args,
24708c2ecf20Sopenharmony_ci						CEPH_DEFAULT_CHOOSE_ARGS);
24718c2ecf20Sopenharmony_ci
24728c2ecf20Sopenharmony_ci	work = get_workspace(&map->crush_wsm, map->crush);
24738c2ecf20Sopenharmony_ci	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
24748c2ecf20Sopenharmony_ci			  weight, weight_max, work,
24758c2ecf20Sopenharmony_ci			  arg_map ? arg_map->args : NULL);
24768c2ecf20Sopenharmony_ci	put_workspace(&map->crush_wsm, work);
24778c2ecf20Sopenharmony_ci	return r;
24788c2ecf20Sopenharmony_ci}
24798c2ecf20Sopenharmony_ci
24808c2ecf20Sopenharmony_cistatic void remove_nonexistent_osds(struct ceph_osdmap *osdmap,
24818c2ecf20Sopenharmony_ci				    struct ceph_pg_pool_info *pi,
24828c2ecf20Sopenharmony_ci				    struct ceph_osds *set)
24838c2ecf20Sopenharmony_ci{
24848c2ecf20Sopenharmony_ci	int i;
24858c2ecf20Sopenharmony_ci
24868c2ecf20Sopenharmony_ci	if (ceph_can_shift_osds(pi)) {
24878c2ecf20Sopenharmony_ci		int removed = 0;
24888c2ecf20Sopenharmony_ci
24898c2ecf20Sopenharmony_ci		/* shift left */
24908c2ecf20Sopenharmony_ci		for (i = 0; i < set->size; i++) {
24918c2ecf20Sopenharmony_ci			if (!ceph_osd_exists(osdmap, set->osds[i])) {
24928c2ecf20Sopenharmony_ci				removed++;
24938c2ecf20Sopenharmony_ci				continue;
24948c2ecf20Sopenharmony_ci			}
24958c2ecf20Sopenharmony_ci			if (removed)
24968c2ecf20Sopenharmony_ci				set->osds[i - removed] = set->osds[i];
24978c2ecf20Sopenharmony_ci		}
24988c2ecf20Sopenharmony_ci		set->size -= removed;
24998c2ecf20Sopenharmony_ci	} else {
25008c2ecf20Sopenharmony_ci		/* set dne devices to NONE */
25018c2ecf20Sopenharmony_ci		for (i = 0; i < set->size; i++) {
25028c2ecf20Sopenharmony_ci			if (!ceph_osd_exists(osdmap, set->osds[i]))
25038c2ecf20Sopenharmony_ci				set->osds[i] = CRUSH_ITEM_NONE;
25048c2ecf20Sopenharmony_ci		}
25058c2ecf20Sopenharmony_ci	}
25068c2ecf20Sopenharmony_ci}
25078c2ecf20Sopenharmony_ci
25088c2ecf20Sopenharmony_ci/*
25098c2ecf20Sopenharmony_ci * Calculate raw set (CRUSH output) for given PG and filter out
25108c2ecf20Sopenharmony_ci * nonexistent OSDs.  ->primary is undefined for a raw set.
25118c2ecf20Sopenharmony_ci *
25128c2ecf20Sopenharmony_ci * Placement seed (CRUSH input) is returned through @ppps.
25138c2ecf20Sopenharmony_ci */
25148c2ecf20Sopenharmony_cistatic void pg_to_raw_osds(struct ceph_osdmap *osdmap,
25158c2ecf20Sopenharmony_ci			   struct ceph_pg_pool_info *pi,
25168c2ecf20Sopenharmony_ci			   const struct ceph_pg *raw_pgid,
25178c2ecf20Sopenharmony_ci			   struct ceph_osds *raw,
25188c2ecf20Sopenharmony_ci			   u32 *ppps)
25198c2ecf20Sopenharmony_ci{
25208c2ecf20Sopenharmony_ci	u32 pps = raw_pg_to_pps(pi, raw_pgid);
25218c2ecf20Sopenharmony_ci	int ruleno;
25228c2ecf20Sopenharmony_ci	int len;
25238c2ecf20Sopenharmony_ci
25248c2ecf20Sopenharmony_ci	ceph_osds_init(raw);
25258c2ecf20Sopenharmony_ci	if (ppps)
25268c2ecf20Sopenharmony_ci		*ppps = pps;
25278c2ecf20Sopenharmony_ci
25288c2ecf20Sopenharmony_ci	ruleno = crush_find_rule(osdmap->crush, pi->crush_ruleset, pi->type,
25298c2ecf20Sopenharmony_ci				 pi->size);
25308c2ecf20Sopenharmony_ci	if (ruleno < 0) {
25318c2ecf20Sopenharmony_ci		pr_err("no crush rule: pool %lld ruleset %d type %d size %d\n",
25328c2ecf20Sopenharmony_ci		       pi->id, pi->crush_ruleset, pi->type, pi->size);
25338c2ecf20Sopenharmony_ci		return;
25348c2ecf20Sopenharmony_ci	}
25358c2ecf20Sopenharmony_ci
25368c2ecf20Sopenharmony_ci	if (pi->size > ARRAY_SIZE(raw->osds)) {
25378c2ecf20Sopenharmony_ci		pr_err_ratelimited("pool %lld ruleset %d type %d too wide: size %d > %zu\n",
25388c2ecf20Sopenharmony_ci		       pi->id, pi->crush_ruleset, pi->type, pi->size,
25398c2ecf20Sopenharmony_ci		       ARRAY_SIZE(raw->osds));
25408c2ecf20Sopenharmony_ci		return;
25418c2ecf20Sopenharmony_ci	}
25428c2ecf20Sopenharmony_ci
25438c2ecf20Sopenharmony_ci	len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
25448c2ecf20Sopenharmony_ci		       osdmap->osd_weight, osdmap->max_osd, pi->id);
25458c2ecf20Sopenharmony_ci	if (len < 0) {
25468c2ecf20Sopenharmony_ci		pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
25478c2ecf20Sopenharmony_ci		       len, ruleno, pi->id, pi->crush_ruleset, pi->type,
25488c2ecf20Sopenharmony_ci		       pi->size);
25498c2ecf20Sopenharmony_ci		return;
25508c2ecf20Sopenharmony_ci	}
25518c2ecf20Sopenharmony_ci
25528c2ecf20Sopenharmony_ci	raw->size = len;
25538c2ecf20Sopenharmony_ci	remove_nonexistent_osds(osdmap, pi, raw);
25548c2ecf20Sopenharmony_ci}
25558c2ecf20Sopenharmony_ci
25568c2ecf20Sopenharmony_ci/* apply pg_upmap[_items] mappings */
25578c2ecf20Sopenharmony_cistatic void apply_upmap(struct ceph_osdmap *osdmap,
25588c2ecf20Sopenharmony_ci			const struct ceph_pg *pgid,
25598c2ecf20Sopenharmony_ci			struct ceph_osds *raw)
25608c2ecf20Sopenharmony_ci{
25618c2ecf20Sopenharmony_ci	struct ceph_pg_mapping *pg;
25628c2ecf20Sopenharmony_ci	int i, j;
25638c2ecf20Sopenharmony_ci
25648c2ecf20Sopenharmony_ci	pg = lookup_pg_mapping(&osdmap->pg_upmap, pgid);
25658c2ecf20Sopenharmony_ci	if (pg) {
25668c2ecf20Sopenharmony_ci		/* make sure targets aren't marked out */
25678c2ecf20Sopenharmony_ci		for (i = 0; i < pg->pg_upmap.len; i++) {
25688c2ecf20Sopenharmony_ci			int osd = pg->pg_upmap.osds[i];
25698c2ecf20Sopenharmony_ci
25708c2ecf20Sopenharmony_ci			if (osd != CRUSH_ITEM_NONE &&
25718c2ecf20Sopenharmony_ci			    osd < osdmap->max_osd &&
25728c2ecf20Sopenharmony_ci			    osdmap->osd_weight[osd] == 0) {
25738c2ecf20Sopenharmony_ci				/* reject/ignore explicit mapping */
25748c2ecf20Sopenharmony_ci				return;
25758c2ecf20Sopenharmony_ci			}
25768c2ecf20Sopenharmony_ci		}
25778c2ecf20Sopenharmony_ci		for (i = 0; i < pg->pg_upmap.len; i++)
25788c2ecf20Sopenharmony_ci			raw->osds[i] = pg->pg_upmap.osds[i];
25798c2ecf20Sopenharmony_ci		raw->size = pg->pg_upmap.len;
25808c2ecf20Sopenharmony_ci		/* check and apply pg_upmap_items, if any */
25818c2ecf20Sopenharmony_ci	}
25828c2ecf20Sopenharmony_ci
25838c2ecf20Sopenharmony_ci	pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
25848c2ecf20Sopenharmony_ci	if (pg) {
25858c2ecf20Sopenharmony_ci		/*
25868c2ecf20Sopenharmony_ci		 * Note: this approach does not allow a bidirectional swap,
25878c2ecf20Sopenharmony_ci		 * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
25888c2ecf20Sopenharmony_ci		 */
25898c2ecf20Sopenharmony_ci		for (i = 0; i < pg->pg_upmap_items.len; i++) {
25908c2ecf20Sopenharmony_ci			int from = pg->pg_upmap_items.from_to[i][0];
25918c2ecf20Sopenharmony_ci			int to = pg->pg_upmap_items.from_to[i][1];
25928c2ecf20Sopenharmony_ci			int pos = -1;
25938c2ecf20Sopenharmony_ci			bool exists = false;
25948c2ecf20Sopenharmony_ci
25958c2ecf20Sopenharmony_ci			/* make sure replacement doesn't already appear */
25968c2ecf20Sopenharmony_ci			for (j = 0; j < raw->size; j++) {
25978c2ecf20Sopenharmony_ci				int osd = raw->osds[j];
25988c2ecf20Sopenharmony_ci
25998c2ecf20Sopenharmony_ci				if (osd == to) {
26008c2ecf20Sopenharmony_ci					exists = true;
26018c2ecf20Sopenharmony_ci					break;
26028c2ecf20Sopenharmony_ci				}
26038c2ecf20Sopenharmony_ci				/* ignore mapping if target is marked out */
26048c2ecf20Sopenharmony_ci				if (osd == from && pos < 0 &&
26058c2ecf20Sopenharmony_ci				    !(to != CRUSH_ITEM_NONE &&
26068c2ecf20Sopenharmony_ci				      to < osdmap->max_osd &&
26078c2ecf20Sopenharmony_ci				      osdmap->osd_weight[to] == 0)) {
26088c2ecf20Sopenharmony_ci					pos = j;
26098c2ecf20Sopenharmony_ci				}
26108c2ecf20Sopenharmony_ci			}
26118c2ecf20Sopenharmony_ci			if (!exists && pos >= 0)
26128c2ecf20Sopenharmony_ci				raw->osds[pos] = to;
26138c2ecf20Sopenharmony_ci		}
26148c2ecf20Sopenharmony_ci	}
26158c2ecf20Sopenharmony_ci}
26168c2ecf20Sopenharmony_ci
26178c2ecf20Sopenharmony_ci/*
26188c2ecf20Sopenharmony_ci * Given raw set, calculate up set and up primary.  By definition of an
26198c2ecf20Sopenharmony_ci * up set, the result won't contain nonexistent or down OSDs.
26208c2ecf20Sopenharmony_ci *
26218c2ecf20Sopenharmony_ci * This is done in-place - on return @set is the up set.  If it's
26228c2ecf20Sopenharmony_ci * empty, ->primary will remain undefined.
26238c2ecf20Sopenharmony_ci */
26248c2ecf20Sopenharmony_cistatic void raw_to_up_osds(struct ceph_osdmap *osdmap,
26258c2ecf20Sopenharmony_ci			   struct ceph_pg_pool_info *pi,
26268c2ecf20Sopenharmony_ci			   struct ceph_osds *set)
26278c2ecf20Sopenharmony_ci{
26288c2ecf20Sopenharmony_ci	int i;
26298c2ecf20Sopenharmony_ci
26308c2ecf20Sopenharmony_ci	/* ->primary is undefined for a raw set */
26318c2ecf20Sopenharmony_ci	BUG_ON(set->primary != -1);
26328c2ecf20Sopenharmony_ci
26338c2ecf20Sopenharmony_ci	if (ceph_can_shift_osds(pi)) {
26348c2ecf20Sopenharmony_ci		int removed = 0;
26358c2ecf20Sopenharmony_ci
26368c2ecf20Sopenharmony_ci		/* shift left */
26378c2ecf20Sopenharmony_ci		for (i = 0; i < set->size; i++) {
26388c2ecf20Sopenharmony_ci			if (ceph_osd_is_down(osdmap, set->osds[i])) {
26398c2ecf20Sopenharmony_ci				removed++;
26408c2ecf20Sopenharmony_ci				continue;
26418c2ecf20Sopenharmony_ci			}
26428c2ecf20Sopenharmony_ci			if (removed)
26438c2ecf20Sopenharmony_ci				set->osds[i - removed] = set->osds[i];
26448c2ecf20Sopenharmony_ci		}
26458c2ecf20Sopenharmony_ci		set->size -= removed;
26468c2ecf20Sopenharmony_ci		if (set->size > 0)
26478c2ecf20Sopenharmony_ci			set->primary = set->osds[0];
26488c2ecf20Sopenharmony_ci	} else {
26498c2ecf20Sopenharmony_ci		/* set down/dne devices to NONE */
26508c2ecf20Sopenharmony_ci		for (i = set->size - 1; i >= 0; i--) {
26518c2ecf20Sopenharmony_ci			if (ceph_osd_is_down(osdmap, set->osds[i]))
26528c2ecf20Sopenharmony_ci				set->osds[i] = CRUSH_ITEM_NONE;
26538c2ecf20Sopenharmony_ci			else
26548c2ecf20Sopenharmony_ci				set->primary = set->osds[i];
26558c2ecf20Sopenharmony_ci		}
26568c2ecf20Sopenharmony_ci	}
26578c2ecf20Sopenharmony_ci}
26588c2ecf20Sopenharmony_ci
26598c2ecf20Sopenharmony_cistatic void apply_primary_affinity(struct ceph_osdmap *osdmap,
26608c2ecf20Sopenharmony_ci				   struct ceph_pg_pool_info *pi,
26618c2ecf20Sopenharmony_ci				   u32 pps,
26628c2ecf20Sopenharmony_ci				   struct ceph_osds *up)
26638c2ecf20Sopenharmony_ci{
26648c2ecf20Sopenharmony_ci	int i;
26658c2ecf20Sopenharmony_ci	int pos = -1;
26668c2ecf20Sopenharmony_ci
26678c2ecf20Sopenharmony_ci	/*
26688c2ecf20Sopenharmony_ci	 * Do we have any non-default primary_affinity values for these
26698c2ecf20Sopenharmony_ci	 * osds?
26708c2ecf20Sopenharmony_ci	 */
26718c2ecf20Sopenharmony_ci	if (!osdmap->osd_primary_affinity)
26728c2ecf20Sopenharmony_ci		return;
26738c2ecf20Sopenharmony_ci
26748c2ecf20Sopenharmony_ci	for (i = 0; i < up->size; i++) {
26758c2ecf20Sopenharmony_ci		int osd = up->osds[i];
26768c2ecf20Sopenharmony_ci
26778c2ecf20Sopenharmony_ci		if (osd != CRUSH_ITEM_NONE &&
26788c2ecf20Sopenharmony_ci		    osdmap->osd_primary_affinity[osd] !=
26798c2ecf20Sopenharmony_ci					CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
26808c2ecf20Sopenharmony_ci			break;
26818c2ecf20Sopenharmony_ci		}
26828c2ecf20Sopenharmony_ci	}
26838c2ecf20Sopenharmony_ci	if (i == up->size)
26848c2ecf20Sopenharmony_ci		return;
26858c2ecf20Sopenharmony_ci
26868c2ecf20Sopenharmony_ci	/*
26878c2ecf20Sopenharmony_ci	 * Pick the primary.  Feed both the seed (for the pg) and the
26888c2ecf20Sopenharmony_ci	 * osd into the hash/rng so that a proportional fraction of an
26898c2ecf20Sopenharmony_ci	 * osd's pgs get rejected as primary.
26908c2ecf20Sopenharmony_ci	 */
26918c2ecf20Sopenharmony_ci	for (i = 0; i < up->size; i++) {
26928c2ecf20Sopenharmony_ci		int osd = up->osds[i];
26938c2ecf20Sopenharmony_ci		u32 aff;
26948c2ecf20Sopenharmony_ci
26958c2ecf20Sopenharmony_ci		if (osd == CRUSH_ITEM_NONE)
26968c2ecf20Sopenharmony_ci			continue;
26978c2ecf20Sopenharmony_ci
26988c2ecf20Sopenharmony_ci		aff = osdmap->osd_primary_affinity[osd];
26998c2ecf20Sopenharmony_ci		if (aff < CEPH_OSD_MAX_PRIMARY_AFFINITY &&
27008c2ecf20Sopenharmony_ci		    (crush_hash32_2(CRUSH_HASH_RJENKINS1,
27018c2ecf20Sopenharmony_ci				    pps, osd) >> 16) >= aff) {
27028c2ecf20Sopenharmony_ci			/*
27038c2ecf20Sopenharmony_ci			 * We chose not to use this primary.  Note it
27048c2ecf20Sopenharmony_ci			 * anyway as a fallback in case we don't pick
27058c2ecf20Sopenharmony_ci			 * anyone else, but keep looking.
27068c2ecf20Sopenharmony_ci			 */
27078c2ecf20Sopenharmony_ci			if (pos < 0)
27088c2ecf20Sopenharmony_ci				pos = i;
27098c2ecf20Sopenharmony_ci		} else {
27108c2ecf20Sopenharmony_ci			pos = i;
27118c2ecf20Sopenharmony_ci			break;
27128c2ecf20Sopenharmony_ci		}
27138c2ecf20Sopenharmony_ci	}
27148c2ecf20Sopenharmony_ci	if (pos < 0)
27158c2ecf20Sopenharmony_ci		return;
27168c2ecf20Sopenharmony_ci
27178c2ecf20Sopenharmony_ci	up->primary = up->osds[pos];
27188c2ecf20Sopenharmony_ci
27198c2ecf20Sopenharmony_ci	if (ceph_can_shift_osds(pi) && pos > 0) {
27208c2ecf20Sopenharmony_ci		/* move the new primary to the front */
27218c2ecf20Sopenharmony_ci		for (i = pos; i > 0; i--)
27228c2ecf20Sopenharmony_ci			up->osds[i] = up->osds[i - 1];
27238c2ecf20Sopenharmony_ci		up->osds[0] = up->primary;
27248c2ecf20Sopenharmony_ci	}
27258c2ecf20Sopenharmony_ci}
27268c2ecf20Sopenharmony_ci
27278c2ecf20Sopenharmony_ci/*
27288c2ecf20Sopenharmony_ci * Get pg_temp and primary_temp mappings for given PG.
27298c2ecf20Sopenharmony_ci *
27308c2ecf20Sopenharmony_ci * Note that a PG may have none, only pg_temp, only primary_temp or
27318c2ecf20Sopenharmony_ci * both pg_temp and primary_temp mappings.  This means @temp isn't
27328c2ecf20Sopenharmony_ci * always a valid OSD set on return: in the "only primary_temp" case,
27338c2ecf20Sopenharmony_ci * @temp will have its ->primary >= 0 but ->size == 0.
27348c2ecf20Sopenharmony_ci */
27358c2ecf20Sopenharmony_cistatic void get_temp_osds(struct ceph_osdmap *osdmap,
27368c2ecf20Sopenharmony_ci			  struct ceph_pg_pool_info *pi,
27378c2ecf20Sopenharmony_ci			  const struct ceph_pg *pgid,
27388c2ecf20Sopenharmony_ci			  struct ceph_osds *temp)
27398c2ecf20Sopenharmony_ci{
27408c2ecf20Sopenharmony_ci	struct ceph_pg_mapping *pg;
27418c2ecf20Sopenharmony_ci	int i;
27428c2ecf20Sopenharmony_ci
27438c2ecf20Sopenharmony_ci	ceph_osds_init(temp);
27448c2ecf20Sopenharmony_ci
27458c2ecf20Sopenharmony_ci	/* pg_temp? */
27468c2ecf20Sopenharmony_ci	pg = lookup_pg_mapping(&osdmap->pg_temp, pgid);
27478c2ecf20Sopenharmony_ci	if (pg) {
27488c2ecf20Sopenharmony_ci		for (i = 0; i < pg->pg_temp.len; i++) {
27498c2ecf20Sopenharmony_ci			if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
27508c2ecf20Sopenharmony_ci				if (ceph_can_shift_osds(pi))
27518c2ecf20Sopenharmony_ci					continue;
27528c2ecf20Sopenharmony_ci
27538c2ecf20Sopenharmony_ci				temp->osds[temp->size++] = CRUSH_ITEM_NONE;
27548c2ecf20Sopenharmony_ci			} else {
27558c2ecf20Sopenharmony_ci				temp->osds[temp->size++] = pg->pg_temp.osds[i];
27568c2ecf20Sopenharmony_ci			}
27578c2ecf20Sopenharmony_ci		}
27588c2ecf20Sopenharmony_ci
27598c2ecf20Sopenharmony_ci		/* apply pg_temp's primary */
27608c2ecf20Sopenharmony_ci		for (i = 0; i < temp->size; i++) {
27618c2ecf20Sopenharmony_ci			if (temp->osds[i] != CRUSH_ITEM_NONE) {
27628c2ecf20Sopenharmony_ci				temp->primary = temp->osds[i];
27638c2ecf20Sopenharmony_ci				break;
27648c2ecf20Sopenharmony_ci			}
27658c2ecf20Sopenharmony_ci		}
27668c2ecf20Sopenharmony_ci	}
27678c2ecf20Sopenharmony_ci
27688c2ecf20Sopenharmony_ci	/* primary_temp? */
27698c2ecf20Sopenharmony_ci	pg = lookup_pg_mapping(&osdmap->primary_temp, pgid);
27708c2ecf20Sopenharmony_ci	if (pg)
27718c2ecf20Sopenharmony_ci		temp->primary = pg->primary_temp.osd;
27728c2ecf20Sopenharmony_ci}
27738c2ecf20Sopenharmony_ci
27748c2ecf20Sopenharmony_ci/*
27758c2ecf20Sopenharmony_ci * Map a PG to its acting set as well as its up set.
27768c2ecf20Sopenharmony_ci *
27778c2ecf20Sopenharmony_ci * Acting set is used for data mapping purposes, while up set can be
27788c2ecf20Sopenharmony_ci * recorded for detecting interval changes and deciding whether to
27798c2ecf20Sopenharmony_ci * resend a request.
27808c2ecf20Sopenharmony_ci */
27818c2ecf20Sopenharmony_civoid ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
27828c2ecf20Sopenharmony_ci			       struct ceph_pg_pool_info *pi,
27838c2ecf20Sopenharmony_ci			       const struct ceph_pg *raw_pgid,
27848c2ecf20Sopenharmony_ci			       struct ceph_osds *up,
27858c2ecf20Sopenharmony_ci			       struct ceph_osds *acting)
27868c2ecf20Sopenharmony_ci{
27878c2ecf20Sopenharmony_ci	struct ceph_pg pgid;
27888c2ecf20Sopenharmony_ci	u32 pps;
27898c2ecf20Sopenharmony_ci
27908c2ecf20Sopenharmony_ci	WARN_ON(pi->id != raw_pgid->pool);
27918c2ecf20Sopenharmony_ci	raw_pg_to_pg(pi, raw_pgid, &pgid);
27928c2ecf20Sopenharmony_ci
27938c2ecf20Sopenharmony_ci	pg_to_raw_osds(osdmap, pi, raw_pgid, up, &pps);
27948c2ecf20Sopenharmony_ci	apply_upmap(osdmap, &pgid, up);
27958c2ecf20Sopenharmony_ci	raw_to_up_osds(osdmap, pi, up);
27968c2ecf20Sopenharmony_ci	apply_primary_affinity(osdmap, pi, pps, up);
27978c2ecf20Sopenharmony_ci	get_temp_osds(osdmap, pi, &pgid, acting);
27988c2ecf20Sopenharmony_ci	if (!acting->size) {
27998c2ecf20Sopenharmony_ci		memcpy(acting->osds, up->osds, up->size * sizeof(up->osds[0]));
28008c2ecf20Sopenharmony_ci		acting->size = up->size;
28018c2ecf20Sopenharmony_ci		if (acting->primary == -1)
28028c2ecf20Sopenharmony_ci			acting->primary = up->primary;
28038c2ecf20Sopenharmony_ci	}
28048c2ecf20Sopenharmony_ci	WARN_ON(!osds_valid(up) || !osds_valid(acting));
28058c2ecf20Sopenharmony_ci}
28068c2ecf20Sopenharmony_ci
28078c2ecf20Sopenharmony_cibool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
28088c2ecf20Sopenharmony_ci			      struct ceph_pg_pool_info *pi,
28098c2ecf20Sopenharmony_ci			      const struct ceph_pg *raw_pgid,
28108c2ecf20Sopenharmony_ci			      struct ceph_spg *spgid)
28118c2ecf20Sopenharmony_ci{
28128c2ecf20Sopenharmony_ci	struct ceph_pg pgid;
28138c2ecf20Sopenharmony_ci	struct ceph_osds up, acting;
28148c2ecf20Sopenharmony_ci	int i;
28158c2ecf20Sopenharmony_ci
28168c2ecf20Sopenharmony_ci	WARN_ON(pi->id != raw_pgid->pool);
28178c2ecf20Sopenharmony_ci	raw_pg_to_pg(pi, raw_pgid, &pgid);
28188c2ecf20Sopenharmony_ci
28198c2ecf20Sopenharmony_ci	if (ceph_can_shift_osds(pi)) {
28208c2ecf20Sopenharmony_ci		spgid->pgid = pgid; /* struct */
28218c2ecf20Sopenharmony_ci		spgid->shard = CEPH_SPG_NOSHARD;
28228c2ecf20Sopenharmony_ci		return true;
28238c2ecf20Sopenharmony_ci	}
28248c2ecf20Sopenharmony_ci
28258c2ecf20Sopenharmony_ci	ceph_pg_to_up_acting_osds(osdmap, pi, &pgid, &up, &acting);
28268c2ecf20Sopenharmony_ci	for (i = 0; i < acting.size; i++) {
28278c2ecf20Sopenharmony_ci		if (acting.osds[i] == acting.primary) {
28288c2ecf20Sopenharmony_ci			spgid->pgid = pgid; /* struct */
28298c2ecf20Sopenharmony_ci			spgid->shard = i;
28308c2ecf20Sopenharmony_ci			return true;
28318c2ecf20Sopenharmony_ci		}
28328c2ecf20Sopenharmony_ci	}
28338c2ecf20Sopenharmony_ci
28348c2ecf20Sopenharmony_ci	return false;
28358c2ecf20Sopenharmony_ci}
28368c2ecf20Sopenharmony_ci
28378c2ecf20Sopenharmony_ci/*
28388c2ecf20Sopenharmony_ci * Return acting primary for given PG, or -1 if none.
28398c2ecf20Sopenharmony_ci */
28408c2ecf20Sopenharmony_ciint ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
28418c2ecf20Sopenharmony_ci			      const struct ceph_pg *raw_pgid)
28428c2ecf20Sopenharmony_ci{
28438c2ecf20Sopenharmony_ci	struct ceph_pg_pool_info *pi;
28448c2ecf20Sopenharmony_ci	struct ceph_osds up, acting;
28458c2ecf20Sopenharmony_ci
28468c2ecf20Sopenharmony_ci	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
28478c2ecf20Sopenharmony_ci	if (!pi)
28488c2ecf20Sopenharmony_ci		return -1;
28498c2ecf20Sopenharmony_ci
28508c2ecf20Sopenharmony_ci	ceph_pg_to_up_acting_osds(osdmap, pi, raw_pgid, &up, &acting);
28518c2ecf20Sopenharmony_ci	return acting.primary;
28528c2ecf20Sopenharmony_ci}
28538c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ceph_pg_to_acting_primary);
28548c2ecf20Sopenharmony_ci
28558c2ecf20Sopenharmony_cistatic struct crush_loc_node *alloc_crush_loc(size_t type_name_len,
28568c2ecf20Sopenharmony_ci					      size_t name_len)
28578c2ecf20Sopenharmony_ci{
28588c2ecf20Sopenharmony_ci	struct crush_loc_node *loc;
28598c2ecf20Sopenharmony_ci
28608c2ecf20Sopenharmony_ci	loc = kmalloc(sizeof(*loc) + type_name_len + name_len + 2, GFP_NOIO);
28618c2ecf20Sopenharmony_ci	if (!loc)
28628c2ecf20Sopenharmony_ci		return NULL;
28638c2ecf20Sopenharmony_ci
28648c2ecf20Sopenharmony_ci	RB_CLEAR_NODE(&loc->cl_node);
28658c2ecf20Sopenharmony_ci	return loc;
28668c2ecf20Sopenharmony_ci}
28678c2ecf20Sopenharmony_ci
28688c2ecf20Sopenharmony_cistatic void free_crush_loc(struct crush_loc_node *loc)
28698c2ecf20Sopenharmony_ci{
28708c2ecf20Sopenharmony_ci	WARN_ON(!RB_EMPTY_NODE(&loc->cl_node));
28718c2ecf20Sopenharmony_ci
28728c2ecf20Sopenharmony_ci	kfree(loc);
28738c2ecf20Sopenharmony_ci}
28748c2ecf20Sopenharmony_ci
28758c2ecf20Sopenharmony_cistatic int crush_loc_compare(const struct crush_loc *loc1,
28768c2ecf20Sopenharmony_ci			     const struct crush_loc *loc2)
28778c2ecf20Sopenharmony_ci{
28788c2ecf20Sopenharmony_ci	return strcmp(loc1->cl_type_name, loc2->cl_type_name) ?:
28798c2ecf20Sopenharmony_ci	       strcmp(loc1->cl_name, loc2->cl_name);
28808c2ecf20Sopenharmony_ci}
28818c2ecf20Sopenharmony_ci
28828c2ecf20Sopenharmony_ciDEFINE_RB_FUNCS2(crush_loc, struct crush_loc_node, cl_loc, crush_loc_compare,
28838c2ecf20Sopenharmony_ci		 RB_BYPTR, const struct crush_loc *, cl_node)
28848c2ecf20Sopenharmony_ci
28858c2ecf20Sopenharmony_ci/*
28868c2ecf20Sopenharmony_ci * Parses a set of <bucket type name>':'<bucket name> pairs separated
28878c2ecf20Sopenharmony_ci * by '|', e.g. "rack:foo1|rack:foo2|datacenter:bar".
28888c2ecf20Sopenharmony_ci *
28898c2ecf20Sopenharmony_ci * Note that @crush_location is modified by strsep().
28908c2ecf20Sopenharmony_ci */
28918c2ecf20Sopenharmony_ciint ceph_parse_crush_location(char *crush_location, struct rb_root *locs)
28928c2ecf20Sopenharmony_ci{
28938c2ecf20Sopenharmony_ci	struct crush_loc_node *loc;
28948c2ecf20Sopenharmony_ci	const char *type_name, *name, *colon;
28958c2ecf20Sopenharmony_ci	size_t type_name_len, name_len;
28968c2ecf20Sopenharmony_ci
28978c2ecf20Sopenharmony_ci	dout("%s '%s'\n", __func__, crush_location);
28988c2ecf20Sopenharmony_ci	while ((type_name = strsep(&crush_location, "|"))) {
28998c2ecf20Sopenharmony_ci		colon = strchr(type_name, ':');
29008c2ecf20Sopenharmony_ci		if (!colon)
29018c2ecf20Sopenharmony_ci			return -EINVAL;
29028c2ecf20Sopenharmony_ci
29038c2ecf20Sopenharmony_ci		type_name_len = colon - type_name;
29048c2ecf20Sopenharmony_ci		if (type_name_len == 0)
29058c2ecf20Sopenharmony_ci			return -EINVAL;
29068c2ecf20Sopenharmony_ci
29078c2ecf20Sopenharmony_ci		name = colon + 1;
29088c2ecf20Sopenharmony_ci		name_len = strlen(name);
29098c2ecf20Sopenharmony_ci		if (name_len == 0)
29108c2ecf20Sopenharmony_ci			return -EINVAL;
29118c2ecf20Sopenharmony_ci
29128c2ecf20Sopenharmony_ci		loc = alloc_crush_loc(type_name_len, name_len);
29138c2ecf20Sopenharmony_ci		if (!loc)
29148c2ecf20Sopenharmony_ci			return -ENOMEM;
29158c2ecf20Sopenharmony_ci
29168c2ecf20Sopenharmony_ci		loc->cl_loc.cl_type_name = loc->cl_data;
29178c2ecf20Sopenharmony_ci		memcpy(loc->cl_loc.cl_type_name, type_name, type_name_len);
29188c2ecf20Sopenharmony_ci		loc->cl_loc.cl_type_name[type_name_len] = '\0';
29198c2ecf20Sopenharmony_ci
29208c2ecf20Sopenharmony_ci		loc->cl_loc.cl_name = loc->cl_data + type_name_len + 1;
29218c2ecf20Sopenharmony_ci		memcpy(loc->cl_loc.cl_name, name, name_len);
29228c2ecf20Sopenharmony_ci		loc->cl_loc.cl_name[name_len] = '\0';
29238c2ecf20Sopenharmony_ci
29248c2ecf20Sopenharmony_ci		if (!__insert_crush_loc(locs, loc)) {
29258c2ecf20Sopenharmony_ci			free_crush_loc(loc);
29268c2ecf20Sopenharmony_ci			return -EEXIST;
29278c2ecf20Sopenharmony_ci		}
29288c2ecf20Sopenharmony_ci
29298c2ecf20Sopenharmony_ci		dout("%s type_name '%s' name '%s'\n", __func__,
29308c2ecf20Sopenharmony_ci		     loc->cl_loc.cl_type_name, loc->cl_loc.cl_name);
29318c2ecf20Sopenharmony_ci	}
29328c2ecf20Sopenharmony_ci
29338c2ecf20Sopenharmony_ci	return 0;
29348c2ecf20Sopenharmony_ci}
29358c2ecf20Sopenharmony_ci
29368c2ecf20Sopenharmony_ciint ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2)
29378c2ecf20Sopenharmony_ci{
29388c2ecf20Sopenharmony_ci	struct rb_node *n1 = rb_first(locs1);
29398c2ecf20Sopenharmony_ci	struct rb_node *n2 = rb_first(locs2);
29408c2ecf20Sopenharmony_ci	int ret;
29418c2ecf20Sopenharmony_ci
29428c2ecf20Sopenharmony_ci	for ( ; n1 && n2; n1 = rb_next(n1), n2 = rb_next(n2)) {
29438c2ecf20Sopenharmony_ci		struct crush_loc_node *loc1 =
29448c2ecf20Sopenharmony_ci		    rb_entry(n1, struct crush_loc_node, cl_node);
29458c2ecf20Sopenharmony_ci		struct crush_loc_node *loc2 =
29468c2ecf20Sopenharmony_ci		    rb_entry(n2, struct crush_loc_node, cl_node);
29478c2ecf20Sopenharmony_ci
29488c2ecf20Sopenharmony_ci		ret = crush_loc_compare(&loc1->cl_loc, &loc2->cl_loc);
29498c2ecf20Sopenharmony_ci		if (ret)
29508c2ecf20Sopenharmony_ci			return ret;
29518c2ecf20Sopenharmony_ci	}
29528c2ecf20Sopenharmony_ci
29538c2ecf20Sopenharmony_ci	if (!n1 && n2)
29548c2ecf20Sopenharmony_ci		return -1;
29558c2ecf20Sopenharmony_ci	if (n1 && !n2)
29568c2ecf20Sopenharmony_ci		return 1;
29578c2ecf20Sopenharmony_ci	return 0;
29588c2ecf20Sopenharmony_ci}
29598c2ecf20Sopenharmony_ci
29608c2ecf20Sopenharmony_civoid ceph_clear_crush_locs(struct rb_root *locs)
29618c2ecf20Sopenharmony_ci{
29628c2ecf20Sopenharmony_ci	while (!RB_EMPTY_ROOT(locs)) {
29638c2ecf20Sopenharmony_ci		struct crush_loc_node *loc =
29648c2ecf20Sopenharmony_ci		    rb_entry(rb_first(locs), struct crush_loc_node, cl_node);
29658c2ecf20Sopenharmony_ci
29668c2ecf20Sopenharmony_ci		erase_crush_loc(locs, loc);
29678c2ecf20Sopenharmony_ci		free_crush_loc(loc);
29688c2ecf20Sopenharmony_ci	}
29698c2ecf20Sopenharmony_ci}
29708c2ecf20Sopenharmony_ci
29718c2ecf20Sopenharmony_ci/*
29728c2ecf20Sopenharmony_ci * [a-zA-Z0-9-_.]+
29738c2ecf20Sopenharmony_ci */
29748c2ecf20Sopenharmony_cistatic bool is_valid_crush_name(const char *name)
29758c2ecf20Sopenharmony_ci{
29768c2ecf20Sopenharmony_ci	do {
29778c2ecf20Sopenharmony_ci		if (!('a' <= *name && *name <= 'z') &&
29788c2ecf20Sopenharmony_ci		    !('A' <= *name && *name <= 'Z') &&
29798c2ecf20Sopenharmony_ci		    !('0' <= *name && *name <= '9') &&
29808c2ecf20Sopenharmony_ci		    *name != '-' && *name != '_' && *name != '.')
29818c2ecf20Sopenharmony_ci			return false;
29828c2ecf20Sopenharmony_ci	} while (*++name != '\0');
29838c2ecf20Sopenharmony_ci
29848c2ecf20Sopenharmony_ci	return true;
29858c2ecf20Sopenharmony_ci}
29868c2ecf20Sopenharmony_ci
29878c2ecf20Sopenharmony_ci/*
29888c2ecf20Sopenharmony_ci * Gets the parent of an item.  Returns its id (<0 because the
29898c2ecf20Sopenharmony_ci * parent is always a bucket), type id (>0 for the same reason,
29908c2ecf20Sopenharmony_ci * via @parent_type_id) and location (via @parent_loc).  If no
29918c2ecf20Sopenharmony_ci * parent, returns 0.
29928c2ecf20Sopenharmony_ci *
29938c2ecf20Sopenharmony_ci * Does a linear search, as there are no parent pointers of any
29948c2ecf20Sopenharmony_ci * kind.  Note that the result is ambigous for items that occur
29958c2ecf20Sopenharmony_ci * multiple times in the map.
29968c2ecf20Sopenharmony_ci */
29978c2ecf20Sopenharmony_cistatic int get_immediate_parent(struct crush_map *c, int id,
29988c2ecf20Sopenharmony_ci				u16 *parent_type_id,
29998c2ecf20Sopenharmony_ci				struct crush_loc *parent_loc)
30008c2ecf20Sopenharmony_ci{
30018c2ecf20Sopenharmony_ci	struct crush_bucket *b;
30028c2ecf20Sopenharmony_ci	struct crush_name_node *type_cn, *cn;
30038c2ecf20Sopenharmony_ci	int i, j;
30048c2ecf20Sopenharmony_ci
30058c2ecf20Sopenharmony_ci	for (i = 0; i < c->max_buckets; i++) {
30068c2ecf20Sopenharmony_ci		b = c->buckets[i];
30078c2ecf20Sopenharmony_ci		if (!b)
30088c2ecf20Sopenharmony_ci			continue;
30098c2ecf20Sopenharmony_ci
30108c2ecf20Sopenharmony_ci		/* ignore per-class shadow hierarchy */
30118c2ecf20Sopenharmony_ci		cn = lookup_crush_name(&c->names, b->id);
30128c2ecf20Sopenharmony_ci		if (!cn || !is_valid_crush_name(cn->cn_name))
30138c2ecf20Sopenharmony_ci			continue;
30148c2ecf20Sopenharmony_ci
30158c2ecf20Sopenharmony_ci		for (j = 0; j < b->size; j++) {
30168c2ecf20Sopenharmony_ci			if (b->items[j] != id)
30178c2ecf20Sopenharmony_ci				continue;
30188c2ecf20Sopenharmony_ci
30198c2ecf20Sopenharmony_ci			*parent_type_id = b->type;
30208c2ecf20Sopenharmony_ci			type_cn = lookup_crush_name(&c->type_names, b->type);
30218c2ecf20Sopenharmony_ci			parent_loc->cl_type_name = type_cn->cn_name;
30228c2ecf20Sopenharmony_ci			parent_loc->cl_name = cn->cn_name;
30238c2ecf20Sopenharmony_ci			return b->id;
30248c2ecf20Sopenharmony_ci		}
30258c2ecf20Sopenharmony_ci	}
30268c2ecf20Sopenharmony_ci
30278c2ecf20Sopenharmony_ci	return 0;  /* no parent */
30288c2ecf20Sopenharmony_ci}
30298c2ecf20Sopenharmony_ci
30308c2ecf20Sopenharmony_ci/*
30318c2ecf20Sopenharmony_ci * Calculates the locality/distance from an item to a client
30328c2ecf20Sopenharmony_ci * location expressed in terms of CRUSH hierarchy as a set of
30338c2ecf20Sopenharmony_ci * (bucket type name, bucket name) pairs.  Specifically, looks
30348c2ecf20Sopenharmony_ci * for the lowest-valued bucket type for which the location of
30358c2ecf20Sopenharmony_ci * @id matches one of the locations in @locs, so for standard
30368c2ecf20Sopenharmony_ci * bucket types (host = 1, rack = 3, datacenter = 8, zone = 9)
30378c2ecf20Sopenharmony_ci * a matching host is closer than a matching rack and a matching
30388c2ecf20Sopenharmony_ci * data center is closer than a matching zone.
30398c2ecf20Sopenharmony_ci *
30408c2ecf20Sopenharmony_ci * Specifying multiple locations (a "multipath" location) such
30418c2ecf20Sopenharmony_ci * as "rack=foo1 rack=foo2 datacenter=bar" is allowed -- @locs
30428c2ecf20Sopenharmony_ci * is a multimap.  The locality will be:
30438c2ecf20Sopenharmony_ci *
30448c2ecf20Sopenharmony_ci * - 3 for OSDs in racks foo1 and foo2
30458c2ecf20Sopenharmony_ci * - 8 for OSDs in data center bar
30468c2ecf20Sopenharmony_ci * - -1 for all other OSDs
30478c2ecf20Sopenharmony_ci *
30488c2ecf20Sopenharmony_ci * The lowest possible bucket type is 1, so the best locality
30498c2ecf20Sopenharmony_ci * for an OSD is 1 (i.e. a matching host).  Locality 0 would be
30508c2ecf20Sopenharmony_ci * the OSD itself.
30518c2ecf20Sopenharmony_ci */
30528c2ecf20Sopenharmony_ciint ceph_get_crush_locality(struct ceph_osdmap *osdmap, int id,
30538c2ecf20Sopenharmony_ci			    struct rb_root *locs)
30548c2ecf20Sopenharmony_ci{
30558c2ecf20Sopenharmony_ci	struct crush_loc loc;
30568c2ecf20Sopenharmony_ci	u16 type_id;
30578c2ecf20Sopenharmony_ci
30588c2ecf20Sopenharmony_ci	/*
30598c2ecf20Sopenharmony_ci	 * Instead of repeated get_immediate_parent() calls,
30608c2ecf20Sopenharmony_ci	 * the location of @id could be obtained with a single
30618c2ecf20Sopenharmony_ci	 * depth-first traversal.
30628c2ecf20Sopenharmony_ci	 */
30638c2ecf20Sopenharmony_ci	for (;;) {
30648c2ecf20Sopenharmony_ci		id = get_immediate_parent(osdmap->crush, id, &type_id, &loc);
30658c2ecf20Sopenharmony_ci		if (id >= 0)
30668c2ecf20Sopenharmony_ci			return -1;  /* not local */
30678c2ecf20Sopenharmony_ci
30688c2ecf20Sopenharmony_ci		if (lookup_crush_loc(locs, &loc))
30698c2ecf20Sopenharmony_ci			return type_id;
30708c2ecf20Sopenharmony_ci	}
30718c2ecf20Sopenharmony_ci}
3072