18c2ecf20Sopenharmony_ci
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci   rbd.c -- Export ceph rados objects as a Linux block device
48c2ecf20Sopenharmony_ci
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci   based on drivers/block/osdblk.c:
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci   Copyright 2009 Red Hat, Inc.
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci   This program is free software; you can redistribute it and/or modify
118c2ecf20Sopenharmony_ci   it under the terms of the GNU General Public License as published by
128c2ecf20Sopenharmony_ci   the Free Software Foundation.
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci   This program is distributed in the hope that it will be useful,
158c2ecf20Sopenharmony_ci   but WITHOUT ANY WARRANTY; without even the implied warranty of
168c2ecf20Sopenharmony_ci   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
178c2ecf20Sopenharmony_ci   GNU General Public License for more details.
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci   You should have received a copy of the GNU General Public License
208c2ecf20Sopenharmony_ci   along with this program; see the file COPYING.  If not, write to
218c2ecf20Sopenharmony_ci   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci   For usage instructions, please refer to:
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci                 Documentation/ABI/testing/sysfs-bus-rbd
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci */
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci#include <linux/ceph/libceph.h>
328c2ecf20Sopenharmony_ci#include <linux/ceph/osd_client.h>
338c2ecf20Sopenharmony_ci#include <linux/ceph/mon_client.h>
348c2ecf20Sopenharmony_ci#include <linux/ceph/cls_lock_client.h>
358c2ecf20Sopenharmony_ci#include <linux/ceph/striper.h>
368c2ecf20Sopenharmony_ci#include <linux/ceph/decode.h>
378c2ecf20Sopenharmony_ci#include <linux/fs_parser.h>
388c2ecf20Sopenharmony_ci#include <linux/bsearch.h>
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci#include <linux/kernel.h>
418c2ecf20Sopenharmony_ci#include <linux/device.h>
428c2ecf20Sopenharmony_ci#include <linux/module.h>
438c2ecf20Sopenharmony_ci#include <linux/blk-mq.h>
448c2ecf20Sopenharmony_ci#include <linux/fs.h>
458c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
468c2ecf20Sopenharmony_ci#include <linux/slab.h>
478c2ecf20Sopenharmony_ci#include <linux/idr.h>
488c2ecf20Sopenharmony_ci#include <linux/workqueue.h>
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci#include "rbd_types.h"
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci#define RBD_DEBUG	/* Activate rbd_assert() calls */
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci/*
558c2ecf20Sopenharmony_ci * Increment the given counter and return its updated value.
568c2ecf20Sopenharmony_ci * If the counter is already 0 it will not be incremented.
578c2ecf20Sopenharmony_ci * If the counter is already at its maximum value returns
588c2ecf20Sopenharmony_ci * -EINVAL without updating it.
598c2ecf20Sopenharmony_ci */
608c2ecf20Sopenharmony_cistatic int atomic_inc_return_safe(atomic_t *v)
618c2ecf20Sopenharmony_ci{
628c2ecf20Sopenharmony_ci	unsigned int counter;
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	counter = (unsigned int)atomic_fetch_add_unless(v, 1, 0);
658c2ecf20Sopenharmony_ci	if (counter <= (unsigned int)INT_MAX)
668c2ecf20Sopenharmony_ci		return (int)counter;
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	atomic_dec(v);
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	return -EINVAL;
718c2ecf20Sopenharmony_ci}
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci/* Decrement the counter.  Return the resulting value, or -EINVAL */
748c2ecf20Sopenharmony_cistatic int atomic_dec_return_safe(atomic_t *v)
758c2ecf20Sopenharmony_ci{
768c2ecf20Sopenharmony_ci	int counter;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	counter = atomic_dec_return(v);
798c2ecf20Sopenharmony_ci	if (counter >= 0)
808c2ecf20Sopenharmony_ci		return counter;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	atomic_inc(v);
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	return -EINVAL;
858c2ecf20Sopenharmony_ci}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci#define RBD_DRV_NAME "rbd"
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci#define RBD_MINORS_PER_MAJOR		256
908c2ecf20Sopenharmony_ci#define RBD_SINGLE_MAJOR_PART_SHIFT	4
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci#define RBD_MAX_PARENT_CHAIN_LEN	16
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci#define RBD_SNAP_DEV_NAME_PREFIX	"snap_"
958c2ecf20Sopenharmony_ci#define RBD_MAX_SNAP_NAME_LEN	\
968c2ecf20Sopenharmony_ci			(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci#define RBD_MAX_SNAP_COUNT	510	/* allows max snapc to fit in 4KB */
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci#define RBD_SNAP_HEAD_NAME	"-"
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci#define	BAD_SNAP_INDEX	U32_MAX		/* invalid index into snap array */
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci/* This allows a single page to hold an image name sent by OSD */
1058c2ecf20Sopenharmony_ci#define RBD_IMAGE_NAME_LEN_MAX	(PAGE_SIZE - sizeof (__le32) - 1)
1068c2ecf20Sopenharmony_ci#define RBD_IMAGE_ID_LEN_MAX	64
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci#define RBD_OBJ_PREFIX_LEN_MAX	64
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci#define RBD_NOTIFY_TIMEOUT	5	/* seconds */
1118c2ecf20Sopenharmony_ci#define RBD_RETRY_DELAY		msecs_to_jiffies(1000)
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci/* Feature bits */
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci#define RBD_FEATURE_LAYERING		(1ULL<<0)
1168c2ecf20Sopenharmony_ci#define RBD_FEATURE_STRIPINGV2		(1ULL<<1)
1178c2ecf20Sopenharmony_ci#define RBD_FEATURE_EXCLUSIVE_LOCK	(1ULL<<2)
1188c2ecf20Sopenharmony_ci#define RBD_FEATURE_OBJECT_MAP		(1ULL<<3)
1198c2ecf20Sopenharmony_ci#define RBD_FEATURE_FAST_DIFF		(1ULL<<4)
1208c2ecf20Sopenharmony_ci#define RBD_FEATURE_DEEP_FLATTEN	(1ULL<<5)
1218c2ecf20Sopenharmony_ci#define RBD_FEATURE_DATA_POOL		(1ULL<<7)
1228c2ecf20Sopenharmony_ci#define RBD_FEATURE_OPERATIONS		(1ULL<<8)
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci#define RBD_FEATURES_ALL	(RBD_FEATURE_LAYERING |		\
1258c2ecf20Sopenharmony_ci				 RBD_FEATURE_STRIPINGV2 |	\
1268c2ecf20Sopenharmony_ci				 RBD_FEATURE_EXCLUSIVE_LOCK |	\
1278c2ecf20Sopenharmony_ci				 RBD_FEATURE_OBJECT_MAP |	\
1288c2ecf20Sopenharmony_ci				 RBD_FEATURE_FAST_DIFF |	\
1298c2ecf20Sopenharmony_ci				 RBD_FEATURE_DEEP_FLATTEN |	\
1308c2ecf20Sopenharmony_ci				 RBD_FEATURE_DATA_POOL |	\
1318c2ecf20Sopenharmony_ci				 RBD_FEATURE_OPERATIONS)
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci/* Features supported by this (client software) implementation. */
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci#define RBD_FEATURES_SUPPORTED	(RBD_FEATURES_ALL)
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci/*
1388c2ecf20Sopenharmony_ci * An RBD device name will be "rbd#", where the "rbd" comes from
1398c2ecf20Sopenharmony_ci * RBD_DRV_NAME above, and # is a unique integer identifier.
1408c2ecf20Sopenharmony_ci */
1418c2ecf20Sopenharmony_ci#define DEV_NAME_LEN		32
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci/*
1448c2ecf20Sopenharmony_ci * block device image metadata (in-memory version)
1458c2ecf20Sopenharmony_ci */
1468c2ecf20Sopenharmony_cistruct rbd_image_header {
1478c2ecf20Sopenharmony_ci	/* These six fields never change for a given rbd image */
1488c2ecf20Sopenharmony_ci	char *object_prefix;
1498c2ecf20Sopenharmony_ci	__u8 obj_order;
1508c2ecf20Sopenharmony_ci	u64 stripe_unit;
1518c2ecf20Sopenharmony_ci	u64 stripe_count;
1528c2ecf20Sopenharmony_ci	s64 data_pool_id;
1538c2ecf20Sopenharmony_ci	u64 features;		/* Might be changeable someday? */
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	/* The remaining fields need to be updated occasionally */
1568c2ecf20Sopenharmony_ci	u64 image_size;
1578c2ecf20Sopenharmony_ci	struct ceph_snap_context *snapc;
1588c2ecf20Sopenharmony_ci	char *snap_names;	/* format 1 only */
1598c2ecf20Sopenharmony_ci	u64 *snap_sizes;	/* format 1 only */
1608c2ecf20Sopenharmony_ci};
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci/*
1638c2ecf20Sopenharmony_ci * An rbd image specification.
1648c2ecf20Sopenharmony_ci *
1658c2ecf20Sopenharmony_ci * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely
1668c2ecf20Sopenharmony_ci * identify an image.  Each rbd_dev structure includes a pointer to
1678c2ecf20Sopenharmony_ci * an rbd_spec structure that encapsulates this identity.
1688c2ecf20Sopenharmony_ci *
1698c2ecf20Sopenharmony_ci * Each of the id's in an rbd_spec has an associated name.  For a
1708c2ecf20Sopenharmony_ci * user-mapped image, the names are supplied and the id's associated
1718c2ecf20Sopenharmony_ci * with them are looked up.  For a layered image, a parent image is
1728c2ecf20Sopenharmony_ci * defined by the tuple, and the names are looked up.
1738c2ecf20Sopenharmony_ci *
1748c2ecf20Sopenharmony_ci * An rbd_dev structure contains a parent_spec pointer which is
1758c2ecf20Sopenharmony_ci * non-null if the image it represents is a child in a layered
1768c2ecf20Sopenharmony_ci * image.  This pointer will refer to the rbd_spec structure used
1778c2ecf20Sopenharmony_ci * by the parent rbd_dev for its own identity (i.e., the structure
1788c2ecf20Sopenharmony_ci * is shared between the parent and child).
1798c2ecf20Sopenharmony_ci *
1808c2ecf20Sopenharmony_ci * Since these structures are populated once, during the discovery
1818c2ecf20Sopenharmony_ci * phase of image construction, they are effectively immutable so
1828c2ecf20Sopenharmony_ci * we make no effort to synchronize access to them.
1838c2ecf20Sopenharmony_ci *
1848c2ecf20Sopenharmony_ci * Note that code herein does not assume the image name is known (it
1858c2ecf20Sopenharmony_ci * could be a null pointer).
1868c2ecf20Sopenharmony_ci */
1878c2ecf20Sopenharmony_cistruct rbd_spec {
1888c2ecf20Sopenharmony_ci	u64		pool_id;
1898c2ecf20Sopenharmony_ci	const char	*pool_name;
1908c2ecf20Sopenharmony_ci	const char	*pool_ns;	/* NULL if default, never "" */
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci	const char	*image_id;
1938c2ecf20Sopenharmony_ci	const char	*image_name;
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	u64		snap_id;
1968c2ecf20Sopenharmony_ci	const char	*snap_name;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	struct kref	kref;
1998c2ecf20Sopenharmony_ci};
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci/*
2028c2ecf20Sopenharmony_ci * an instance of the client.  multiple devices may share an rbd client.
2038c2ecf20Sopenharmony_ci */
2048c2ecf20Sopenharmony_cistruct rbd_client {
2058c2ecf20Sopenharmony_ci	struct ceph_client	*client;
2068c2ecf20Sopenharmony_ci	struct kref		kref;
2078c2ecf20Sopenharmony_ci	struct list_head	node;
2088c2ecf20Sopenharmony_ci};
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_cistruct pending_result {
2118c2ecf20Sopenharmony_ci	int			result;		/* first nonzero result */
2128c2ecf20Sopenharmony_ci	int			num_pending;
2138c2ecf20Sopenharmony_ci};
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_cistruct rbd_img_request;
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_cienum obj_request_type {
2188c2ecf20Sopenharmony_ci	OBJ_REQUEST_NODATA = 1,
2198c2ecf20Sopenharmony_ci	OBJ_REQUEST_BIO,	/* pointer into provided bio (list) */
2208c2ecf20Sopenharmony_ci	OBJ_REQUEST_BVECS,	/* pointer into provided bio_vec array */
2218c2ecf20Sopenharmony_ci	OBJ_REQUEST_OWN_BVECS,	/* private bio_vec array, doesn't own pages */
2228c2ecf20Sopenharmony_ci};
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_cienum obj_operation_type {
2258c2ecf20Sopenharmony_ci	OBJ_OP_READ = 1,
2268c2ecf20Sopenharmony_ci	OBJ_OP_WRITE,
2278c2ecf20Sopenharmony_ci	OBJ_OP_DISCARD,
2288c2ecf20Sopenharmony_ci	OBJ_OP_ZEROOUT,
2298c2ecf20Sopenharmony_ci};
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci#define RBD_OBJ_FLAG_DELETION			(1U << 0)
2328c2ecf20Sopenharmony_ci#define RBD_OBJ_FLAG_COPYUP_ENABLED		(1U << 1)
2338c2ecf20Sopenharmony_ci#define RBD_OBJ_FLAG_COPYUP_ZEROS		(1U << 2)
2348c2ecf20Sopenharmony_ci#define RBD_OBJ_FLAG_MAY_EXIST			(1U << 3)
2358c2ecf20Sopenharmony_ci#define RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT	(1U << 4)
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_cienum rbd_obj_read_state {
2388c2ecf20Sopenharmony_ci	RBD_OBJ_READ_START = 1,
2398c2ecf20Sopenharmony_ci	RBD_OBJ_READ_OBJECT,
2408c2ecf20Sopenharmony_ci	RBD_OBJ_READ_PARENT,
2418c2ecf20Sopenharmony_ci};
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci/*
2448c2ecf20Sopenharmony_ci * Writes go through the following state machine to deal with
2458c2ecf20Sopenharmony_ci * layering:
2468c2ecf20Sopenharmony_ci *
2478c2ecf20Sopenharmony_ci *            . . . . . RBD_OBJ_WRITE_GUARD. . . . . . . . . . . . . .
2488c2ecf20Sopenharmony_ci *            .                 |                                    .
2498c2ecf20Sopenharmony_ci *            .                 v                                    .
2508c2ecf20Sopenharmony_ci *            .    RBD_OBJ_WRITE_READ_FROM_PARENT. . .               .
2518c2ecf20Sopenharmony_ci *            .                 |                    .               .
2528c2ecf20Sopenharmony_ci *            .                 v                    v (deep-copyup  .
2538c2ecf20Sopenharmony_ci *    (image  .   RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC   .  not needed)  .
2548c2ecf20Sopenharmony_ci * flattened) v                 |                    .               .
2558c2ecf20Sopenharmony_ci *            .                 v                    .               .
2568c2ecf20Sopenharmony_ci *            . . . .RBD_OBJ_WRITE_COPYUP_OPS. . . . .      (copyup  .
2578c2ecf20Sopenharmony_ci *                              |                        not needed) v
2588c2ecf20Sopenharmony_ci *                              v                                    .
2598c2ecf20Sopenharmony_ci *                            done . . . . . . . . . . . . . . . . . .
2608c2ecf20Sopenharmony_ci *                              ^
2618c2ecf20Sopenharmony_ci *                              |
2628c2ecf20Sopenharmony_ci *                     RBD_OBJ_WRITE_FLAT
2638c2ecf20Sopenharmony_ci *
2648c2ecf20Sopenharmony_ci * Writes start in RBD_OBJ_WRITE_GUARD or _FLAT, depending on whether
2658c2ecf20Sopenharmony_ci * assert_exists guard is needed or not (in some cases it's not needed
2668c2ecf20Sopenharmony_ci * even if there is a parent).
2678c2ecf20Sopenharmony_ci */
2688c2ecf20Sopenharmony_cienum rbd_obj_write_state {
2698c2ecf20Sopenharmony_ci	RBD_OBJ_WRITE_START = 1,
2708c2ecf20Sopenharmony_ci	RBD_OBJ_WRITE_PRE_OBJECT_MAP,
2718c2ecf20Sopenharmony_ci	RBD_OBJ_WRITE_OBJECT,
2728c2ecf20Sopenharmony_ci	__RBD_OBJ_WRITE_COPYUP,
2738c2ecf20Sopenharmony_ci	RBD_OBJ_WRITE_COPYUP,
2748c2ecf20Sopenharmony_ci	RBD_OBJ_WRITE_POST_OBJECT_MAP,
2758c2ecf20Sopenharmony_ci};
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_cienum rbd_obj_copyup_state {
2788c2ecf20Sopenharmony_ci	RBD_OBJ_COPYUP_START = 1,
2798c2ecf20Sopenharmony_ci	RBD_OBJ_COPYUP_READ_PARENT,
2808c2ecf20Sopenharmony_ci	__RBD_OBJ_COPYUP_OBJECT_MAPS,
2818c2ecf20Sopenharmony_ci	RBD_OBJ_COPYUP_OBJECT_MAPS,
2828c2ecf20Sopenharmony_ci	__RBD_OBJ_COPYUP_WRITE_OBJECT,
2838c2ecf20Sopenharmony_ci	RBD_OBJ_COPYUP_WRITE_OBJECT,
2848c2ecf20Sopenharmony_ci};
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_cistruct rbd_obj_request {
2878c2ecf20Sopenharmony_ci	struct ceph_object_extent ex;
2888c2ecf20Sopenharmony_ci	unsigned int		flags;	/* RBD_OBJ_FLAG_* */
2898c2ecf20Sopenharmony_ci	union {
2908c2ecf20Sopenharmony_ci		enum rbd_obj_read_state	 read_state;	/* for reads */
2918c2ecf20Sopenharmony_ci		enum rbd_obj_write_state write_state;	/* for writes */
2928c2ecf20Sopenharmony_ci	};
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_ci	struct rbd_img_request	*img_request;
2958c2ecf20Sopenharmony_ci	struct ceph_file_extent	*img_extents;
2968c2ecf20Sopenharmony_ci	u32			num_img_extents;
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	union {
2998c2ecf20Sopenharmony_ci		struct ceph_bio_iter	bio_pos;
3008c2ecf20Sopenharmony_ci		struct {
3018c2ecf20Sopenharmony_ci			struct ceph_bvec_iter	bvec_pos;
3028c2ecf20Sopenharmony_ci			u32			bvec_count;
3038c2ecf20Sopenharmony_ci			u32			bvec_idx;
3048c2ecf20Sopenharmony_ci		};
3058c2ecf20Sopenharmony_ci	};
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_ci	enum rbd_obj_copyup_state copyup_state;
3088c2ecf20Sopenharmony_ci	struct bio_vec		*copyup_bvecs;
3098c2ecf20Sopenharmony_ci	u32			copyup_bvec_count;
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci	struct list_head	osd_reqs;	/* w/ r_private_item */
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	struct mutex		state_mutex;
3148c2ecf20Sopenharmony_ci	struct pending_result	pending;
3158c2ecf20Sopenharmony_ci	struct kref		kref;
3168c2ecf20Sopenharmony_ci};
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_cienum img_req_flags {
3198c2ecf20Sopenharmony_ci	IMG_REQ_CHILD,		/* initiator: block = 0, child image = 1 */
3208c2ecf20Sopenharmony_ci	IMG_REQ_LAYERED,	/* ENOENT handling: normal = 0, layered = 1 */
3218c2ecf20Sopenharmony_ci};
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_cienum rbd_img_state {
3248c2ecf20Sopenharmony_ci	RBD_IMG_START = 1,
3258c2ecf20Sopenharmony_ci	RBD_IMG_EXCLUSIVE_LOCK,
3268c2ecf20Sopenharmony_ci	__RBD_IMG_OBJECT_REQUESTS,
3278c2ecf20Sopenharmony_ci	RBD_IMG_OBJECT_REQUESTS,
3288c2ecf20Sopenharmony_ci};
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_cistruct rbd_img_request {
3318c2ecf20Sopenharmony_ci	struct rbd_device	*rbd_dev;
3328c2ecf20Sopenharmony_ci	enum obj_operation_type	op_type;
3338c2ecf20Sopenharmony_ci	enum obj_request_type	data_type;
3348c2ecf20Sopenharmony_ci	unsigned long		flags;
3358c2ecf20Sopenharmony_ci	enum rbd_img_state	state;
3368c2ecf20Sopenharmony_ci	union {
3378c2ecf20Sopenharmony_ci		u64			snap_id;	/* for reads */
3388c2ecf20Sopenharmony_ci		struct ceph_snap_context *snapc;	/* for writes */
3398c2ecf20Sopenharmony_ci	};
3408c2ecf20Sopenharmony_ci	struct rbd_obj_request	*obj_request;	/* obj req initiator */
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ci	struct list_head	lock_item;
3438c2ecf20Sopenharmony_ci	struct list_head	object_extents;	/* obj_req.ex structs */
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ci	struct mutex		state_mutex;
3468c2ecf20Sopenharmony_ci	struct pending_result	pending;
3478c2ecf20Sopenharmony_ci	struct work_struct	work;
3488c2ecf20Sopenharmony_ci	int			work_result;
3498c2ecf20Sopenharmony_ci};
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci#define for_each_obj_request(ireq, oreq) \
3528c2ecf20Sopenharmony_ci	list_for_each_entry(oreq, &(ireq)->object_extents, ex.oe_item)
3538c2ecf20Sopenharmony_ci#define for_each_obj_request_safe(ireq, oreq, n) \
3548c2ecf20Sopenharmony_ci	list_for_each_entry_safe(oreq, n, &(ireq)->object_extents, ex.oe_item)
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_cienum rbd_watch_state {
3578c2ecf20Sopenharmony_ci	RBD_WATCH_STATE_UNREGISTERED,
3588c2ecf20Sopenharmony_ci	RBD_WATCH_STATE_REGISTERED,
3598c2ecf20Sopenharmony_ci	RBD_WATCH_STATE_ERROR,
3608c2ecf20Sopenharmony_ci};
3618c2ecf20Sopenharmony_ci
3628c2ecf20Sopenharmony_cienum rbd_lock_state {
3638c2ecf20Sopenharmony_ci	RBD_LOCK_STATE_UNLOCKED,
3648c2ecf20Sopenharmony_ci	RBD_LOCK_STATE_LOCKED,
3658c2ecf20Sopenharmony_ci	RBD_LOCK_STATE_RELEASING,
3668c2ecf20Sopenharmony_ci};
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci/* WatchNotify::ClientId */
3698c2ecf20Sopenharmony_cistruct rbd_client_id {
3708c2ecf20Sopenharmony_ci	u64 gid;
3718c2ecf20Sopenharmony_ci	u64 handle;
3728c2ecf20Sopenharmony_ci};
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_cistruct rbd_mapping {
3758c2ecf20Sopenharmony_ci	u64                     size;
3768c2ecf20Sopenharmony_ci};
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci/*
3798c2ecf20Sopenharmony_ci * a single device
3808c2ecf20Sopenharmony_ci */
3818c2ecf20Sopenharmony_cistruct rbd_device {
3828c2ecf20Sopenharmony_ci	int			dev_id;		/* blkdev unique id */
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci	int			major;		/* blkdev assigned major */
3858c2ecf20Sopenharmony_ci	int			minor;
3868c2ecf20Sopenharmony_ci	struct gendisk		*disk;		/* blkdev's gendisk and rq */
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci	u32			image_format;	/* Either 1 or 2 */
3898c2ecf20Sopenharmony_ci	struct rbd_client	*rbd_client;
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_ci	spinlock_t		lock;		/* queue, flags, open_count */
3948c2ecf20Sopenharmony_ci
3958c2ecf20Sopenharmony_ci	struct rbd_image_header	header;
3968c2ecf20Sopenharmony_ci	unsigned long		flags;		/* possibly lock protected */
3978c2ecf20Sopenharmony_ci	struct rbd_spec		*spec;
3988c2ecf20Sopenharmony_ci	struct rbd_options	*opts;
3998c2ecf20Sopenharmony_ci	char			*config_info;	/* add{,_single_major} string */
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci	struct ceph_object_id	header_oid;
4028c2ecf20Sopenharmony_ci	struct ceph_object_locator header_oloc;
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	struct ceph_file_layout	layout;		/* used for all rbd requests */
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci	struct mutex		watch_mutex;
4078c2ecf20Sopenharmony_ci	enum rbd_watch_state	watch_state;
4088c2ecf20Sopenharmony_ci	struct ceph_osd_linger_request *watch_handle;
4098c2ecf20Sopenharmony_ci	u64			watch_cookie;
4108c2ecf20Sopenharmony_ci	struct delayed_work	watch_dwork;
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci	struct rw_semaphore	lock_rwsem;
4138c2ecf20Sopenharmony_ci	enum rbd_lock_state	lock_state;
4148c2ecf20Sopenharmony_ci	char			lock_cookie[32];
4158c2ecf20Sopenharmony_ci	struct rbd_client_id	owner_cid;
4168c2ecf20Sopenharmony_ci	struct work_struct	acquired_lock_work;
4178c2ecf20Sopenharmony_ci	struct work_struct	released_lock_work;
4188c2ecf20Sopenharmony_ci	struct delayed_work	lock_dwork;
4198c2ecf20Sopenharmony_ci	struct work_struct	unlock_work;
4208c2ecf20Sopenharmony_ci	spinlock_t		lock_lists_lock;
4218c2ecf20Sopenharmony_ci	struct list_head	acquiring_list;
4228c2ecf20Sopenharmony_ci	struct list_head	running_list;
4238c2ecf20Sopenharmony_ci	struct completion	acquire_wait;
4248c2ecf20Sopenharmony_ci	int			acquire_err;
4258c2ecf20Sopenharmony_ci	struct completion	releasing_wait;
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci	spinlock_t		object_map_lock;
4288c2ecf20Sopenharmony_ci	u8			*object_map;
4298c2ecf20Sopenharmony_ci	u64			object_map_size;	/* in objects */
4308c2ecf20Sopenharmony_ci	u64			object_map_flags;
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci	struct workqueue_struct	*task_wq;
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci	struct rbd_spec		*parent_spec;
4358c2ecf20Sopenharmony_ci	u64			parent_overlap;
4368c2ecf20Sopenharmony_ci	atomic_t		parent_ref;
4378c2ecf20Sopenharmony_ci	struct rbd_device	*parent;
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	/* Block layer tags. */
4408c2ecf20Sopenharmony_ci	struct blk_mq_tag_set	tag_set;
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_ci	/* protects updating the header */
4438c2ecf20Sopenharmony_ci	struct rw_semaphore     header_rwsem;
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci	struct rbd_mapping	mapping;
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci	struct list_head	node;
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci	/* sysfs related */
4508c2ecf20Sopenharmony_ci	struct device		dev;
4518c2ecf20Sopenharmony_ci	unsigned long		open_count;	/* protected by lock */
4528c2ecf20Sopenharmony_ci};
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci/*
4558c2ecf20Sopenharmony_ci * Flag bits for rbd_dev->flags:
4568c2ecf20Sopenharmony_ci * - REMOVING (which is coupled with rbd_dev->open_count) is protected
4578c2ecf20Sopenharmony_ci *   by rbd_dev->lock
4588c2ecf20Sopenharmony_ci */
4598c2ecf20Sopenharmony_cienum rbd_dev_flags {
4608c2ecf20Sopenharmony_ci	RBD_DEV_FLAG_EXISTS,	/* rbd_dev_device_setup() ran */
4618c2ecf20Sopenharmony_ci	RBD_DEV_FLAG_REMOVING,	/* this mapping is being removed */
4628c2ecf20Sopenharmony_ci	RBD_DEV_FLAG_READONLY,  /* -o ro or snapshot */
4638c2ecf20Sopenharmony_ci};
4648c2ecf20Sopenharmony_ci
4658c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(client_mutex);	/* Serialize client creation */
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_cistatic LIST_HEAD(rbd_dev_list);    /* devices */
4688c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(rbd_dev_list_lock);
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_cistatic LIST_HEAD(rbd_client_list);		/* clients */
4718c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(rbd_client_list_lock);
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci/* Slab caches for frequently-allocated structures */
4748c2ecf20Sopenharmony_ci
4758c2ecf20Sopenharmony_cistatic struct kmem_cache	*rbd_img_request_cache;
4768c2ecf20Sopenharmony_cistatic struct kmem_cache	*rbd_obj_request_cache;
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_cistatic int rbd_major;
4798c2ecf20Sopenharmony_cistatic DEFINE_IDA(rbd_dev_id_ida);
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_cistatic struct workqueue_struct *rbd_wq;
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_cistatic struct ceph_snap_context rbd_empty_snapc = {
4848c2ecf20Sopenharmony_ci	.nref = REFCOUNT_INIT(1),
4858c2ecf20Sopenharmony_ci};
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci/*
4888c2ecf20Sopenharmony_ci * single-major requires >= 0.75 version of userspace rbd utility.
4898c2ecf20Sopenharmony_ci */
4908c2ecf20Sopenharmony_cistatic bool single_major = true;
4918c2ecf20Sopenharmony_cimodule_param(single_major, bool, 0444);
4928c2ecf20Sopenharmony_ciMODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_cistatic ssize_t add_store(struct bus_type *bus, const char *buf, size_t count);
4958c2ecf20Sopenharmony_cistatic ssize_t remove_store(struct bus_type *bus, const char *buf,
4968c2ecf20Sopenharmony_ci			    size_t count);
4978c2ecf20Sopenharmony_cistatic ssize_t add_single_major_store(struct bus_type *bus, const char *buf,
4988c2ecf20Sopenharmony_ci				      size_t count);
4998c2ecf20Sopenharmony_cistatic ssize_t remove_single_major_store(struct bus_type *bus, const char *buf,
5008c2ecf20Sopenharmony_ci					 size_t count);
5018c2ecf20Sopenharmony_cistatic int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_cistatic int rbd_dev_id_to_minor(int dev_id)
5048c2ecf20Sopenharmony_ci{
5058c2ecf20Sopenharmony_ci	return dev_id << RBD_SINGLE_MAJOR_PART_SHIFT;
5068c2ecf20Sopenharmony_ci}
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_cistatic int minor_to_rbd_dev_id(int minor)
5098c2ecf20Sopenharmony_ci{
5108c2ecf20Sopenharmony_ci	return minor >> RBD_SINGLE_MAJOR_PART_SHIFT;
5118c2ecf20Sopenharmony_ci}
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_cistatic bool rbd_is_ro(struct rbd_device *rbd_dev)
5148c2ecf20Sopenharmony_ci{
5158c2ecf20Sopenharmony_ci	return test_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
5168c2ecf20Sopenharmony_ci}
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_cistatic bool rbd_is_snap(struct rbd_device *rbd_dev)
5198c2ecf20Sopenharmony_ci{
5208c2ecf20Sopenharmony_ci	return rbd_dev->spec->snap_id != CEPH_NOSNAP;
5218c2ecf20Sopenharmony_ci}
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_cistatic bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
5248c2ecf20Sopenharmony_ci{
5258c2ecf20Sopenharmony_ci	lockdep_assert_held(&rbd_dev->lock_rwsem);
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED ||
5288c2ecf20Sopenharmony_ci	       rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING;
5298c2ecf20Sopenharmony_ci}
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_cistatic bool rbd_is_lock_owner(struct rbd_device *rbd_dev)
5328c2ecf20Sopenharmony_ci{
5338c2ecf20Sopenharmony_ci	bool is_lock_owner;
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_ci	down_read(&rbd_dev->lock_rwsem);
5368c2ecf20Sopenharmony_ci	is_lock_owner = __rbd_is_lock_owner(rbd_dev);
5378c2ecf20Sopenharmony_ci	up_read(&rbd_dev->lock_rwsem);
5388c2ecf20Sopenharmony_ci	return is_lock_owner;
5398c2ecf20Sopenharmony_ci}
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_cistatic ssize_t supported_features_show(struct bus_type *bus, char *buf)
5428c2ecf20Sopenharmony_ci{
5438c2ecf20Sopenharmony_ci	return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
5448c2ecf20Sopenharmony_ci}
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_cistatic BUS_ATTR_WO(add);
5478c2ecf20Sopenharmony_cistatic BUS_ATTR_WO(remove);
5488c2ecf20Sopenharmony_cistatic BUS_ATTR_WO(add_single_major);
5498c2ecf20Sopenharmony_cistatic BUS_ATTR_WO(remove_single_major);
5508c2ecf20Sopenharmony_cistatic BUS_ATTR_RO(supported_features);
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_cistatic struct attribute *rbd_bus_attrs[] = {
5538c2ecf20Sopenharmony_ci	&bus_attr_add.attr,
5548c2ecf20Sopenharmony_ci	&bus_attr_remove.attr,
5558c2ecf20Sopenharmony_ci	&bus_attr_add_single_major.attr,
5568c2ecf20Sopenharmony_ci	&bus_attr_remove_single_major.attr,
5578c2ecf20Sopenharmony_ci	&bus_attr_supported_features.attr,
5588c2ecf20Sopenharmony_ci	NULL,
5598c2ecf20Sopenharmony_ci};
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_cistatic umode_t rbd_bus_is_visible(struct kobject *kobj,
5628c2ecf20Sopenharmony_ci				  struct attribute *attr, int index)
5638c2ecf20Sopenharmony_ci{
5648c2ecf20Sopenharmony_ci	if (!single_major &&
5658c2ecf20Sopenharmony_ci	    (attr == &bus_attr_add_single_major.attr ||
5668c2ecf20Sopenharmony_ci	     attr == &bus_attr_remove_single_major.attr))
5678c2ecf20Sopenharmony_ci		return 0;
5688c2ecf20Sopenharmony_ci
5698c2ecf20Sopenharmony_ci	return attr->mode;
5708c2ecf20Sopenharmony_ci}
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_cistatic const struct attribute_group rbd_bus_group = {
5738c2ecf20Sopenharmony_ci	.attrs = rbd_bus_attrs,
5748c2ecf20Sopenharmony_ci	.is_visible = rbd_bus_is_visible,
5758c2ecf20Sopenharmony_ci};
5768c2ecf20Sopenharmony_ci__ATTRIBUTE_GROUPS(rbd_bus);
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_cistatic struct bus_type rbd_bus_type = {
5798c2ecf20Sopenharmony_ci	.name		= "rbd",
5808c2ecf20Sopenharmony_ci	.bus_groups	= rbd_bus_groups,
5818c2ecf20Sopenharmony_ci};
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_cistatic void rbd_root_dev_release(struct device *dev)
5848c2ecf20Sopenharmony_ci{
5858c2ecf20Sopenharmony_ci}
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_cistatic struct device rbd_root_dev = {
5888c2ecf20Sopenharmony_ci	.init_name =    "rbd",
5898c2ecf20Sopenharmony_ci	.release =      rbd_root_dev_release,
5908c2ecf20Sopenharmony_ci};
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_cistatic __printf(2, 3)
5938c2ecf20Sopenharmony_civoid rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
5948c2ecf20Sopenharmony_ci{
5958c2ecf20Sopenharmony_ci	struct va_format vaf;
5968c2ecf20Sopenharmony_ci	va_list args;
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci	va_start(args, fmt);
5998c2ecf20Sopenharmony_ci	vaf.fmt = fmt;
6008c2ecf20Sopenharmony_ci	vaf.va = &args;
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci	if (!rbd_dev)
6038c2ecf20Sopenharmony_ci		printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf);
6048c2ecf20Sopenharmony_ci	else if (rbd_dev->disk)
6058c2ecf20Sopenharmony_ci		printk(KERN_WARNING "%s: %s: %pV\n",
6068c2ecf20Sopenharmony_ci			RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf);
6078c2ecf20Sopenharmony_ci	else if (rbd_dev->spec && rbd_dev->spec->image_name)
6088c2ecf20Sopenharmony_ci		printk(KERN_WARNING "%s: image %s: %pV\n",
6098c2ecf20Sopenharmony_ci			RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf);
6108c2ecf20Sopenharmony_ci	else if (rbd_dev->spec && rbd_dev->spec->image_id)
6118c2ecf20Sopenharmony_ci		printk(KERN_WARNING "%s: id %s: %pV\n",
6128c2ecf20Sopenharmony_ci			RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf);
6138c2ecf20Sopenharmony_ci	else	/* punt */
6148c2ecf20Sopenharmony_ci		printk(KERN_WARNING "%s: rbd_dev %p: %pV\n",
6158c2ecf20Sopenharmony_ci			RBD_DRV_NAME, rbd_dev, &vaf);
6168c2ecf20Sopenharmony_ci	va_end(args);
6178c2ecf20Sopenharmony_ci}
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci#ifdef RBD_DEBUG
6208c2ecf20Sopenharmony_ci#define rbd_assert(expr)						\
6218c2ecf20Sopenharmony_ci		if (unlikely(!(expr))) {				\
6228c2ecf20Sopenharmony_ci			printk(KERN_ERR "\nAssertion failure in %s() "	\
6238c2ecf20Sopenharmony_ci						"at line %d:\n\n"	\
6248c2ecf20Sopenharmony_ci					"\trbd_assert(%s);\n\n",	\
6258c2ecf20Sopenharmony_ci					__func__, __LINE__, #expr);	\
6268c2ecf20Sopenharmony_ci			BUG();						\
6278c2ecf20Sopenharmony_ci		}
6288c2ecf20Sopenharmony_ci#else /* !RBD_DEBUG */
6298c2ecf20Sopenharmony_ci#  define rbd_assert(expr)	((void) 0)
6308c2ecf20Sopenharmony_ci#endif /* !RBD_DEBUG */
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_cistatic void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_cistatic int rbd_dev_refresh(struct rbd_device *rbd_dev);
6358c2ecf20Sopenharmony_cistatic int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
6368c2ecf20Sopenharmony_ci				     struct rbd_image_header *header);
6378c2ecf20Sopenharmony_cistatic const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
6388c2ecf20Sopenharmony_ci					u64 snap_id);
6398c2ecf20Sopenharmony_cistatic int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
6408c2ecf20Sopenharmony_ci				u8 *order, u64 *snap_size);
6418c2ecf20Sopenharmony_cistatic int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev);
6428c2ecf20Sopenharmony_ci
6438c2ecf20Sopenharmony_cistatic void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result);
6448c2ecf20Sopenharmony_cistatic void rbd_img_handle_request(struct rbd_img_request *img_req, int result);
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_ci/*
6478c2ecf20Sopenharmony_ci * Return true if nothing else is pending.
6488c2ecf20Sopenharmony_ci */
6498c2ecf20Sopenharmony_cistatic bool pending_result_dec(struct pending_result *pending, int *result)
6508c2ecf20Sopenharmony_ci{
6518c2ecf20Sopenharmony_ci	rbd_assert(pending->num_pending > 0);
6528c2ecf20Sopenharmony_ci
6538c2ecf20Sopenharmony_ci	if (*result && !pending->result)
6548c2ecf20Sopenharmony_ci		pending->result = *result;
6558c2ecf20Sopenharmony_ci	if (--pending->num_pending)
6568c2ecf20Sopenharmony_ci		return false;
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_ci	*result = pending->result;
6598c2ecf20Sopenharmony_ci	return true;
6608c2ecf20Sopenharmony_ci}
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_cistatic int rbd_open(struct block_device *bdev, fmode_t mode)
6638c2ecf20Sopenharmony_ci{
6648c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
6658c2ecf20Sopenharmony_ci	bool removing = false;
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_ci	spin_lock_irq(&rbd_dev->lock);
6688c2ecf20Sopenharmony_ci	if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags))
6698c2ecf20Sopenharmony_ci		removing = true;
6708c2ecf20Sopenharmony_ci	else
6718c2ecf20Sopenharmony_ci		rbd_dev->open_count++;
6728c2ecf20Sopenharmony_ci	spin_unlock_irq(&rbd_dev->lock);
6738c2ecf20Sopenharmony_ci	if (removing)
6748c2ecf20Sopenharmony_ci		return -ENOENT;
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci	(void) get_device(&rbd_dev->dev);
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_ci	return 0;
6798c2ecf20Sopenharmony_ci}
6808c2ecf20Sopenharmony_ci
6818c2ecf20Sopenharmony_cistatic void rbd_release(struct gendisk *disk, fmode_t mode)
6828c2ecf20Sopenharmony_ci{
6838c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = disk->private_data;
6848c2ecf20Sopenharmony_ci	unsigned long open_count_before;
6858c2ecf20Sopenharmony_ci
6868c2ecf20Sopenharmony_ci	spin_lock_irq(&rbd_dev->lock);
6878c2ecf20Sopenharmony_ci	open_count_before = rbd_dev->open_count--;
6888c2ecf20Sopenharmony_ci	spin_unlock_irq(&rbd_dev->lock);
6898c2ecf20Sopenharmony_ci	rbd_assert(open_count_before > 0);
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_ci	put_device(&rbd_dev->dev);
6928c2ecf20Sopenharmony_ci}
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_cistatic int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg)
6958c2ecf20Sopenharmony_ci{
6968c2ecf20Sopenharmony_ci	int ro;
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci	if (get_user(ro, (int __user *)arg))
6998c2ecf20Sopenharmony_ci		return -EFAULT;
7008c2ecf20Sopenharmony_ci
7018c2ecf20Sopenharmony_ci	/*
7028c2ecf20Sopenharmony_ci	 * Both images mapped read-only and snapshots can't be marked
7038c2ecf20Sopenharmony_ci	 * read-write.
7048c2ecf20Sopenharmony_ci	 */
7058c2ecf20Sopenharmony_ci	if (!ro) {
7068c2ecf20Sopenharmony_ci		if (rbd_is_ro(rbd_dev))
7078c2ecf20Sopenharmony_ci			return -EROFS;
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ci		rbd_assert(!rbd_is_snap(rbd_dev));
7108c2ecf20Sopenharmony_ci	}
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	/* Let blkdev_roset() handle it */
7138c2ecf20Sopenharmony_ci	return -ENOTTY;
7148c2ecf20Sopenharmony_ci}
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_cistatic int rbd_ioctl(struct block_device *bdev, fmode_t mode,
7178c2ecf20Sopenharmony_ci			unsigned int cmd, unsigned long arg)
7188c2ecf20Sopenharmony_ci{
7198c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
7208c2ecf20Sopenharmony_ci	int ret;
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_ci	switch (cmd) {
7238c2ecf20Sopenharmony_ci	case BLKROSET:
7248c2ecf20Sopenharmony_ci		ret = rbd_ioctl_set_ro(rbd_dev, arg);
7258c2ecf20Sopenharmony_ci		break;
7268c2ecf20Sopenharmony_ci	default:
7278c2ecf20Sopenharmony_ci		ret = -ENOTTY;
7288c2ecf20Sopenharmony_ci	}
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_ci	return ret;
7318c2ecf20Sopenharmony_ci}
7328c2ecf20Sopenharmony_ci
7338c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT
7348c2ecf20Sopenharmony_cistatic int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode,
7358c2ecf20Sopenharmony_ci				unsigned int cmd, unsigned long arg)
7368c2ecf20Sopenharmony_ci{
7378c2ecf20Sopenharmony_ci	return rbd_ioctl(bdev, mode, cmd, arg);
7388c2ecf20Sopenharmony_ci}
7398c2ecf20Sopenharmony_ci#endif /* CONFIG_COMPAT */
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_cistatic const struct block_device_operations rbd_bd_ops = {
7428c2ecf20Sopenharmony_ci	.owner			= THIS_MODULE,
7438c2ecf20Sopenharmony_ci	.open			= rbd_open,
7448c2ecf20Sopenharmony_ci	.release		= rbd_release,
7458c2ecf20Sopenharmony_ci	.ioctl			= rbd_ioctl,
7468c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT
7478c2ecf20Sopenharmony_ci	.compat_ioctl		= rbd_compat_ioctl,
7488c2ecf20Sopenharmony_ci#endif
7498c2ecf20Sopenharmony_ci};
7508c2ecf20Sopenharmony_ci
7518c2ecf20Sopenharmony_ci/*
7528c2ecf20Sopenharmony_ci * Initialize an rbd client instance.  Success or not, this function
7538c2ecf20Sopenharmony_ci * consumes ceph_opts.  Caller holds client_mutex.
7548c2ecf20Sopenharmony_ci */
7558c2ecf20Sopenharmony_cistatic struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
7568c2ecf20Sopenharmony_ci{
7578c2ecf20Sopenharmony_ci	struct rbd_client *rbdc;
7588c2ecf20Sopenharmony_ci	int ret = -ENOMEM;
7598c2ecf20Sopenharmony_ci
7608c2ecf20Sopenharmony_ci	dout("%s:\n", __func__);
7618c2ecf20Sopenharmony_ci	rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL);
7628c2ecf20Sopenharmony_ci	if (!rbdc)
7638c2ecf20Sopenharmony_ci		goto out_opt;
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_ci	kref_init(&rbdc->kref);
7668c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&rbdc->node);
7678c2ecf20Sopenharmony_ci
7688c2ecf20Sopenharmony_ci	rbdc->client = ceph_create_client(ceph_opts, rbdc);
7698c2ecf20Sopenharmony_ci	if (IS_ERR(rbdc->client))
7708c2ecf20Sopenharmony_ci		goto out_rbdc;
7718c2ecf20Sopenharmony_ci	ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */
7728c2ecf20Sopenharmony_ci
7738c2ecf20Sopenharmony_ci	ret = ceph_open_session(rbdc->client);
7748c2ecf20Sopenharmony_ci	if (ret < 0)
7758c2ecf20Sopenharmony_ci		goto out_client;
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci	spin_lock(&rbd_client_list_lock);
7788c2ecf20Sopenharmony_ci	list_add_tail(&rbdc->node, &rbd_client_list);
7798c2ecf20Sopenharmony_ci	spin_unlock(&rbd_client_list_lock);
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_ci	dout("%s: rbdc %p\n", __func__, rbdc);
7828c2ecf20Sopenharmony_ci
7838c2ecf20Sopenharmony_ci	return rbdc;
7848c2ecf20Sopenharmony_ciout_client:
7858c2ecf20Sopenharmony_ci	ceph_destroy_client(rbdc->client);
7868c2ecf20Sopenharmony_ciout_rbdc:
7878c2ecf20Sopenharmony_ci	kfree(rbdc);
7888c2ecf20Sopenharmony_ciout_opt:
7898c2ecf20Sopenharmony_ci	if (ceph_opts)
7908c2ecf20Sopenharmony_ci		ceph_destroy_options(ceph_opts);
7918c2ecf20Sopenharmony_ci	dout("%s: error %d\n", __func__, ret);
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_ci	return ERR_PTR(ret);
7948c2ecf20Sopenharmony_ci}
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_cistatic struct rbd_client *__rbd_get_client(struct rbd_client *rbdc)
7978c2ecf20Sopenharmony_ci{
7988c2ecf20Sopenharmony_ci	kref_get(&rbdc->kref);
7998c2ecf20Sopenharmony_ci
8008c2ecf20Sopenharmony_ci	return rbdc;
8018c2ecf20Sopenharmony_ci}
8028c2ecf20Sopenharmony_ci
8038c2ecf20Sopenharmony_ci/*
8048c2ecf20Sopenharmony_ci * Find a ceph client with specific addr and configuration.  If
8058c2ecf20Sopenharmony_ci * found, bump its reference count.
8068c2ecf20Sopenharmony_ci */
8078c2ecf20Sopenharmony_cistatic struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
8088c2ecf20Sopenharmony_ci{
8098c2ecf20Sopenharmony_ci	struct rbd_client *client_node;
8108c2ecf20Sopenharmony_ci	bool found = false;
8118c2ecf20Sopenharmony_ci
8128c2ecf20Sopenharmony_ci	if (ceph_opts->flags & CEPH_OPT_NOSHARE)
8138c2ecf20Sopenharmony_ci		return NULL;
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci	spin_lock(&rbd_client_list_lock);
8168c2ecf20Sopenharmony_ci	list_for_each_entry(client_node, &rbd_client_list, node) {
8178c2ecf20Sopenharmony_ci		if (!ceph_compare_options(ceph_opts, client_node->client)) {
8188c2ecf20Sopenharmony_ci			__rbd_get_client(client_node);
8198c2ecf20Sopenharmony_ci
8208c2ecf20Sopenharmony_ci			found = true;
8218c2ecf20Sopenharmony_ci			break;
8228c2ecf20Sopenharmony_ci		}
8238c2ecf20Sopenharmony_ci	}
8248c2ecf20Sopenharmony_ci	spin_unlock(&rbd_client_list_lock);
8258c2ecf20Sopenharmony_ci
8268c2ecf20Sopenharmony_ci	return found ? client_node : NULL;
8278c2ecf20Sopenharmony_ci}
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_ci/*
8308c2ecf20Sopenharmony_ci * (Per device) rbd map options
8318c2ecf20Sopenharmony_ci */
8328c2ecf20Sopenharmony_cienum {
8338c2ecf20Sopenharmony_ci	Opt_queue_depth,
8348c2ecf20Sopenharmony_ci	Opt_alloc_size,
8358c2ecf20Sopenharmony_ci	Opt_lock_timeout,
8368c2ecf20Sopenharmony_ci	/* int args above */
8378c2ecf20Sopenharmony_ci	Opt_pool_ns,
8388c2ecf20Sopenharmony_ci	Opt_compression_hint,
8398c2ecf20Sopenharmony_ci	/* string args above */
8408c2ecf20Sopenharmony_ci	Opt_read_only,
8418c2ecf20Sopenharmony_ci	Opt_read_write,
8428c2ecf20Sopenharmony_ci	Opt_lock_on_read,
8438c2ecf20Sopenharmony_ci	Opt_exclusive,
8448c2ecf20Sopenharmony_ci	Opt_notrim,
8458c2ecf20Sopenharmony_ci};
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_cienum {
8488c2ecf20Sopenharmony_ci	Opt_compression_hint_none,
8498c2ecf20Sopenharmony_ci	Opt_compression_hint_compressible,
8508c2ecf20Sopenharmony_ci	Opt_compression_hint_incompressible,
8518c2ecf20Sopenharmony_ci};
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_cistatic const struct constant_table rbd_param_compression_hint[] = {
8548c2ecf20Sopenharmony_ci	{"none",		Opt_compression_hint_none},
8558c2ecf20Sopenharmony_ci	{"compressible",	Opt_compression_hint_compressible},
8568c2ecf20Sopenharmony_ci	{"incompressible",	Opt_compression_hint_incompressible},
8578c2ecf20Sopenharmony_ci	{}
8588c2ecf20Sopenharmony_ci};
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_cistatic const struct fs_parameter_spec rbd_parameters[] = {
8618c2ecf20Sopenharmony_ci	fsparam_u32	("alloc_size",			Opt_alloc_size),
8628c2ecf20Sopenharmony_ci	fsparam_enum	("compression_hint",		Opt_compression_hint,
8638c2ecf20Sopenharmony_ci			 rbd_param_compression_hint),
8648c2ecf20Sopenharmony_ci	fsparam_flag	("exclusive",			Opt_exclusive),
8658c2ecf20Sopenharmony_ci	fsparam_flag	("lock_on_read",		Opt_lock_on_read),
8668c2ecf20Sopenharmony_ci	fsparam_u32	("lock_timeout",		Opt_lock_timeout),
8678c2ecf20Sopenharmony_ci	fsparam_flag	("notrim",			Opt_notrim),
8688c2ecf20Sopenharmony_ci	fsparam_string	("_pool_ns",			Opt_pool_ns),
8698c2ecf20Sopenharmony_ci	fsparam_u32	("queue_depth",			Opt_queue_depth),
8708c2ecf20Sopenharmony_ci	fsparam_flag	("read_only",			Opt_read_only),
8718c2ecf20Sopenharmony_ci	fsparam_flag	("read_write",			Opt_read_write),
8728c2ecf20Sopenharmony_ci	fsparam_flag	("ro",				Opt_read_only),
8738c2ecf20Sopenharmony_ci	fsparam_flag	("rw",				Opt_read_write),
8748c2ecf20Sopenharmony_ci	{}
8758c2ecf20Sopenharmony_ci};
8768c2ecf20Sopenharmony_ci
8778c2ecf20Sopenharmony_cistruct rbd_options {
8788c2ecf20Sopenharmony_ci	int	queue_depth;
8798c2ecf20Sopenharmony_ci	int	alloc_size;
8808c2ecf20Sopenharmony_ci	unsigned long	lock_timeout;
8818c2ecf20Sopenharmony_ci	bool	read_only;
8828c2ecf20Sopenharmony_ci	bool	lock_on_read;
8838c2ecf20Sopenharmony_ci	bool	exclusive;
8848c2ecf20Sopenharmony_ci	bool	trim;
8858c2ecf20Sopenharmony_ci
8868c2ecf20Sopenharmony_ci	u32 alloc_hint_flags;  /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
8878c2ecf20Sopenharmony_ci};
8888c2ecf20Sopenharmony_ci
8898c2ecf20Sopenharmony_ci#define RBD_QUEUE_DEPTH_DEFAULT	BLKDEV_MAX_RQ
8908c2ecf20Sopenharmony_ci#define RBD_ALLOC_SIZE_DEFAULT	(64 * 1024)
8918c2ecf20Sopenharmony_ci#define RBD_LOCK_TIMEOUT_DEFAULT 0  /* no timeout */
8928c2ecf20Sopenharmony_ci#define RBD_READ_ONLY_DEFAULT	false
8938c2ecf20Sopenharmony_ci#define RBD_LOCK_ON_READ_DEFAULT false
8948c2ecf20Sopenharmony_ci#define RBD_EXCLUSIVE_DEFAULT	false
8958c2ecf20Sopenharmony_ci#define RBD_TRIM_DEFAULT	true
8968c2ecf20Sopenharmony_ci
8978c2ecf20Sopenharmony_cistruct rbd_parse_opts_ctx {
8988c2ecf20Sopenharmony_ci	struct rbd_spec		*spec;
8998c2ecf20Sopenharmony_ci	struct ceph_options	*copts;
9008c2ecf20Sopenharmony_ci	struct rbd_options	*opts;
9018c2ecf20Sopenharmony_ci};
9028c2ecf20Sopenharmony_ci
9038c2ecf20Sopenharmony_cistatic char* obj_op_name(enum obj_operation_type op_type)
9048c2ecf20Sopenharmony_ci{
9058c2ecf20Sopenharmony_ci	switch (op_type) {
9068c2ecf20Sopenharmony_ci	case OBJ_OP_READ:
9078c2ecf20Sopenharmony_ci		return "read";
9088c2ecf20Sopenharmony_ci	case OBJ_OP_WRITE:
9098c2ecf20Sopenharmony_ci		return "write";
9108c2ecf20Sopenharmony_ci	case OBJ_OP_DISCARD:
9118c2ecf20Sopenharmony_ci		return "discard";
9128c2ecf20Sopenharmony_ci	case OBJ_OP_ZEROOUT:
9138c2ecf20Sopenharmony_ci		return "zeroout";
9148c2ecf20Sopenharmony_ci	default:
9158c2ecf20Sopenharmony_ci		return "???";
9168c2ecf20Sopenharmony_ci	}
9178c2ecf20Sopenharmony_ci}
9188c2ecf20Sopenharmony_ci
9198c2ecf20Sopenharmony_ci/*
9208c2ecf20Sopenharmony_ci * Destroy ceph client
9218c2ecf20Sopenharmony_ci *
9228c2ecf20Sopenharmony_ci * Caller must hold rbd_client_list_lock.
9238c2ecf20Sopenharmony_ci */
9248c2ecf20Sopenharmony_cistatic void rbd_client_release(struct kref *kref)
9258c2ecf20Sopenharmony_ci{
9268c2ecf20Sopenharmony_ci	struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);
9278c2ecf20Sopenharmony_ci
9288c2ecf20Sopenharmony_ci	dout("%s: rbdc %p\n", __func__, rbdc);
9298c2ecf20Sopenharmony_ci	spin_lock(&rbd_client_list_lock);
9308c2ecf20Sopenharmony_ci	list_del(&rbdc->node);
9318c2ecf20Sopenharmony_ci	spin_unlock(&rbd_client_list_lock);
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_ci	ceph_destroy_client(rbdc->client);
9348c2ecf20Sopenharmony_ci	kfree(rbdc);
9358c2ecf20Sopenharmony_ci}
9368c2ecf20Sopenharmony_ci
9378c2ecf20Sopenharmony_ci/*
9388c2ecf20Sopenharmony_ci * Drop reference to ceph client node. If it's not referenced anymore, release
9398c2ecf20Sopenharmony_ci * it.
9408c2ecf20Sopenharmony_ci */
9418c2ecf20Sopenharmony_cistatic void rbd_put_client(struct rbd_client *rbdc)
9428c2ecf20Sopenharmony_ci{
9438c2ecf20Sopenharmony_ci	if (rbdc)
9448c2ecf20Sopenharmony_ci		kref_put(&rbdc->kref, rbd_client_release);
9458c2ecf20Sopenharmony_ci}
9468c2ecf20Sopenharmony_ci
9478c2ecf20Sopenharmony_ci/*
9488c2ecf20Sopenharmony_ci * Get a ceph client with specific addr and configuration, if one does
9498c2ecf20Sopenharmony_ci * not exist create it.  Either way, ceph_opts is consumed by this
9508c2ecf20Sopenharmony_ci * function.
9518c2ecf20Sopenharmony_ci */
9528c2ecf20Sopenharmony_cistatic struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
9538c2ecf20Sopenharmony_ci{
9548c2ecf20Sopenharmony_ci	struct rbd_client *rbdc;
9558c2ecf20Sopenharmony_ci	int ret;
9568c2ecf20Sopenharmony_ci
9578c2ecf20Sopenharmony_ci	mutex_lock(&client_mutex);
9588c2ecf20Sopenharmony_ci	rbdc = rbd_client_find(ceph_opts);
9598c2ecf20Sopenharmony_ci	if (rbdc) {
9608c2ecf20Sopenharmony_ci		ceph_destroy_options(ceph_opts);
9618c2ecf20Sopenharmony_ci
9628c2ecf20Sopenharmony_ci		/*
9638c2ecf20Sopenharmony_ci		 * Using an existing client.  Make sure ->pg_pools is up to
9648c2ecf20Sopenharmony_ci		 * date before we look up the pool id in do_rbd_add().
9658c2ecf20Sopenharmony_ci		 */
9668c2ecf20Sopenharmony_ci		ret = ceph_wait_for_latest_osdmap(rbdc->client,
9678c2ecf20Sopenharmony_ci					rbdc->client->options->mount_timeout);
9688c2ecf20Sopenharmony_ci		if (ret) {
9698c2ecf20Sopenharmony_ci			rbd_warn(NULL, "failed to get latest osdmap: %d", ret);
9708c2ecf20Sopenharmony_ci			rbd_put_client(rbdc);
9718c2ecf20Sopenharmony_ci			rbdc = ERR_PTR(ret);
9728c2ecf20Sopenharmony_ci		}
9738c2ecf20Sopenharmony_ci	} else {
9748c2ecf20Sopenharmony_ci		rbdc = rbd_client_create(ceph_opts);
9758c2ecf20Sopenharmony_ci	}
9768c2ecf20Sopenharmony_ci	mutex_unlock(&client_mutex);
9778c2ecf20Sopenharmony_ci
9788c2ecf20Sopenharmony_ci	return rbdc;
9798c2ecf20Sopenharmony_ci}
9808c2ecf20Sopenharmony_ci
9818c2ecf20Sopenharmony_cistatic bool rbd_image_format_valid(u32 image_format)
9828c2ecf20Sopenharmony_ci{
9838c2ecf20Sopenharmony_ci	return image_format == 1 || image_format == 2;
9848c2ecf20Sopenharmony_ci}
9858c2ecf20Sopenharmony_ci
9868c2ecf20Sopenharmony_cistatic bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
9878c2ecf20Sopenharmony_ci{
9888c2ecf20Sopenharmony_ci	size_t size;
9898c2ecf20Sopenharmony_ci	u32 snap_count;
9908c2ecf20Sopenharmony_ci
9918c2ecf20Sopenharmony_ci	/* The header has to start with the magic rbd header text */
9928c2ecf20Sopenharmony_ci	if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT)))
9938c2ecf20Sopenharmony_ci		return false;
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_ci	/* The bio layer requires at least sector-sized I/O */
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_ci	if (ondisk->options.order < SECTOR_SHIFT)
9988c2ecf20Sopenharmony_ci		return false;
9998c2ecf20Sopenharmony_ci
10008c2ecf20Sopenharmony_ci	/* If we use u64 in a few spots we may be able to loosen this */
10018c2ecf20Sopenharmony_ci
10028c2ecf20Sopenharmony_ci	if (ondisk->options.order > 8 * sizeof (int) - 1)
10038c2ecf20Sopenharmony_ci		return false;
10048c2ecf20Sopenharmony_ci
10058c2ecf20Sopenharmony_ci	/*
10068c2ecf20Sopenharmony_ci	 * The size of a snapshot header has to fit in a size_t, and
10078c2ecf20Sopenharmony_ci	 * that limits the number of snapshots.
10088c2ecf20Sopenharmony_ci	 */
10098c2ecf20Sopenharmony_ci	snap_count = le32_to_cpu(ondisk->snap_count);
10108c2ecf20Sopenharmony_ci	size = SIZE_MAX - sizeof (struct ceph_snap_context);
10118c2ecf20Sopenharmony_ci	if (snap_count > size / sizeof (__le64))
10128c2ecf20Sopenharmony_ci		return false;
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_ci	/*
10158c2ecf20Sopenharmony_ci	 * Not only that, but the size of the entire the snapshot
10168c2ecf20Sopenharmony_ci	 * header must also be representable in a size_t.
10178c2ecf20Sopenharmony_ci	 */
10188c2ecf20Sopenharmony_ci	size -= snap_count * sizeof (__le64);
10198c2ecf20Sopenharmony_ci	if ((u64) size < le64_to_cpu(ondisk->snap_names_len))
10208c2ecf20Sopenharmony_ci		return false;
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_ci	return true;
10238c2ecf20Sopenharmony_ci}
10248c2ecf20Sopenharmony_ci
10258c2ecf20Sopenharmony_ci/*
10268c2ecf20Sopenharmony_ci * returns the size of an object in the image
10278c2ecf20Sopenharmony_ci */
10288c2ecf20Sopenharmony_cistatic u32 rbd_obj_bytes(struct rbd_image_header *header)
10298c2ecf20Sopenharmony_ci{
10308c2ecf20Sopenharmony_ci	return 1U << header->obj_order;
10318c2ecf20Sopenharmony_ci}
10328c2ecf20Sopenharmony_ci
10338c2ecf20Sopenharmony_cistatic void rbd_init_layout(struct rbd_device *rbd_dev)
10348c2ecf20Sopenharmony_ci{
10358c2ecf20Sopenharmony_ci	if (rbd_dev->header.stripe_unit == 0 ||
10368c2ecf20Sopenharmony_ci	    rbd_dev->header.stripe_count == 0) {
10378c2ecf20Sopenharmony_ci		rbd_dev->header.stripe_unit = rbd_obj_bytes(&rbd_dev->header);
10388c2ecf20Sopenharmony_ci		rbd_dev->header.stripe_count = 1;
10398c2ecf20Sopenharmony_ci	}
10408c2ecf20Sopenharmony_ci
10418c2ecf20Sopenharmony_ci	rbd_dev->layout.stripe_unit = rbd_dev->header.stripe_unit;
10428c2ecf20Sopenharmony_ci	rbd_dev->layout.stripe_count = rbd_dev->header.stripe_count;
10438c2ecf20Sopenharmony_ci	rbd_dev->layout.object_size = rbd_obj_bytes(&rbd_dev->header);
10448c2ecf20Sopenharmony_ci	rbd_dev->layout.pool_id = rbd_dev->header.data_pool_id == CEPH_NOPOOL ?
10458c2ecf20Sopenharmony_ci			  rbd_dev->spec->pool_id : rbd_dev->header.data_pool_id;
10468c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
10478c2ecf20Sopenharmony_ci}
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_cistatic void rbd_image_header_cleanup(struct rbd_image_header *header)
10508c2ecf20Sopenharmony_ci{
10518c2ecf20Sopenharmony_ci	kfree(header->object_prefix);
10528c2ecf20Sopenharmony_ci	ceph_put_snap_context(header->snapc);
10538c2ecf20Sopenharmony_ci	kfree(header->snap_sizes);
10548c2ecf20Sopenharmony_ci	kfree(header->snap_names);
10558c2ecf20Sopenharmony_ci
10568c2ecf20Sopenharmony_ci	memset(header, 0, sizeof(*header));
10578c2ecf20Sopenharmony_ci}
10588c2ecf20Sopenharmony_ci
10598c2ecf20Sopenharmony_ci/*
10608c2ecf20Sopenharmony_ci * Fill an rbd image header with information from the given format 1
10618c2ecf20Sopenharmony_ci * on-disk header.
10628c2ecf20Sopenharmony_ci */
10638c2ecf20Sopenharmony_cistatic int rbd_header_from_disk(struct rbd_image_header *header,
10648c2ecf20Sopenharmony_ci				struct rbd_image_header_ondisk *ondisk,
10658c2ecf20Sopenharmony_ci				bool first_time)
10668c2ecf20Sopenharmony_ci{
10678c2ecf20Sopenharmony_ci	struct ceph_snap_context *snapc;
10688c2ecf20Sopenharmony_ci	char *object_prefix = NULL;
10698c2ecf20Sopenharmony_ci	char *snap_names = NULL;
10708c2ecf20Sopenharmony_ci	u64 *snap_sizes = NULL;
10718c2ecf20Sopenharmony_ci	u32 snap_count;
10728c2ecf20Sopenharmony_ci	int ret = -ENOMEM;
10738c2ecf20Sopenharmony_ci	u32 i;
10748c2ecf20Sopenharmony_ci
10758c2ecf20Sopenharmony_ci	/* Allocate this now to avoid having to handle failure below */
10768c2ecf20Sopenharmony_ci
10778c2ecf20Sopenharmony_ci	if (first_time) {
10788c2ecf20Sopenharmony_ci		object_prefix = kstrndup(ondisk->object_prefix,
10798c2ecf20Sopenharmony_ci					 sizeof(ondisk->object_prefix),
10808c2ecf20Sopenharmony_ci					 GFP_KERNEL);
10818c2ecf20Sopenharmony_ci		if (!object_prefix)
10828c2ecf20Sopenharmony_ci			return -ENOMEM;
10838c2ecf20Sopenharmony_ci	}
10848c2ecf20Sopenharmony_ci
10858c2ecf20Sopenharmony_ci	/* Allocate the snapshot context and fill it in */
10868c2ecf20Sopenharmony_ci
10878c2ecf20Sopenharmony_ci	snap_count = le32_to_cpu(ondisk->snap_count);
10888c2ecf20Sopenharmony_ci	snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
10898c2ecf20Sopenharmony_ci	if (!snapc)
10908c2ecf20Sopenharmony_ci		goto out_err;
10918c2ecf20Sopenharmony_ci	snapc->seq = le64_to_cpu(ondisk->snap_seq);
10928c2ecf20Sopenharmony_ci	if (snap_count) {
10938c2ecf20Sopenharmony_ci		struct rbd_image_snap_ondisk *snaps;
10948c2ecf20Sopenharmony_ci		u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len);
10958c2ecf20Sopenharmony_ci
10968c2ecf20Sopenharmony_ci		/* We'll keep a copy of the snapshot names... */
10978c2ecf20Sopenharmony_ci
10988c2ecf20Sopenharmony_ci		if (snap_names_len > (u64)SIZE_MAX)
10998c2ecf20Sopenharmony_ci			goto out_2big;
11008c2ecf20Sopenharmony_ci		snap_names = kmalloc(snap_names_len, GFP_KERNEL);
11018c2ecf20Sopenharmony_ci		if (!snap_names)
11028c2ecf20Sopenharmony_ci			goto out_err;
11038c2ecf20Sopenharmony_ci
11048c2ecf20Sopenharmony_ci		/* ...as well as the array of their sizes. */
11058c2ecf20Sopenharmony_ci		snap_sizes = kmalloc_array(snap_count,
11068c2ecf20Sopenharmony_ci					   sizeof(*header->snap_sizes),
11078c2ecf20Sopenharmony_ci					   GFP_KERNEL);
11088c2ecf20Sopenharmony_ci		if (!snap_sizes)
11098c2ecf20Sopenharmony_ci			goto out_err;
11108c2ecf20Sopenharmony_ci
11118c2ecf20Sopenharmony_ci		/*
11128c2ecf20Sopenharmony_ci		 * Copy the names, and fill in each snapshot's id
11138c2ecf20Sopenharmony_ci		 * and size.
11148c2ecf20Sopenharmony_ci		 *
11158c2ecf20Sopenharmony_ci		 * Note that rbd_dev_v1_header_info() guarantees the
11168c2ecf20Sopenharmony_ci		 * ondisk buffer we're working with has
11178c2ecf20Sopenharmony_ci		 * snap_names_len bytes beyond the end of the
11188c2ecf20Sopenharmony_ci		 * snapshot id array, this memcpy() is safe.
11198c2ecf20Sopenharmony_ci		 */
11208c2ecf20Sopenharmony_ci		memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len);
11218c2ecf20Sopenharmony_ci		snaps = ondisk->snaps;
11228c2ecf20Sopenharmony_ci		for (i = 0; i < snap_count; i++) {
11238c2ecf20Sopenharmony_ci			snapc->snaps[i] = le64_to_cpu(snaps[i].id);
11248c2ecf20Sopenharmony_ci			snap_sizes[i] = le64_to_cpu(snaps[i].image_size);
11258c2ecf20Sopenharmony_ci		}
11268c2ecf20Sopenharmony_ci	}
11278c2ecf20Sopenharmony_ci
11288c2ecf20Sopenharmony_ci	/* We won't fail any more, fill in the header */
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_ci	if (first_time) {
11318c2ecf20Sopenharmony_ci		header->object_prefix = object_prefix;
11328c2ecf20Sopenharmony_ci		header->obj_order = ondisk->options.order;
11338c2ecf20Sopenharmony_ci	}
11348c2ecf20Sopenharmony_ci
11358c2ecf20Sopenharmony_ci	/* The remaining fields always get updated (when we refresh) */
11368c2ecf20Sopenharmony_ci
11378c2ecf20Sopenharmony_ci	header->image_size = le64_to_cpu(ondisk->image_size);
11388c2ecf20Sopenharmony_ci	header->snapc = snapc;
11398c2ecf20Sopenharmony_ci	header->snap_names = snap_names;
11408c2ecf20Sopenharmony_ci	header->snap_sizes = snap_sizes;
11418c2ecf20Sopenharmony_ci
11428c2ecf20Sopenharmony_ci	return 0;
11438c2ecf20Sopenharmony_ciout_2big:
11448c2ecf20Sopenharmony_ci	ret = -EIO;
11458c2ecf20Sopenharmony_ciout_err:
11468c2ecf20Sopenharmony_ci	kfree(snap_sizes);
11478c2ecf20Sopenharmony_ci	kfree(snap_names);
11488c2ecf20Sopenharmony_ci	ceph_put_snap_context(snapc);
11498c2ecf20Sopenharmony_ci	kfree(object_prefix);
11508c2ecf20Sopenharmony_ci
11518c2ecf20Sopenharmony_ci	return ret;
11528c2ecf20Sopenharmony_ci}
11538c2ecf20Sopenharmony_ci
11548c2ecf20Sopenharmony_cistatic const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which)
11558c2ecf20Sopenharmony_ci{
11568c2ecf20Sopenharmony_ci	const char *snap_name;
11578c2ecf20Sopenharmony_ci
11588c2ecf20Sopenharmony_ci	rbd_assert(which < rbd_dev->header.snapc->num_snaps);
11598c2ecf20Sopenharmony_ci
11608c2ecf20Sopenharmony_ci	/* Skip over names until we find the one we are looking for */
11618c2ecf20Sopenharmony_ci
11628c2ecf20Sopenharmony_ci	snap_name = rbd_dev->header.snap_names;
11638c2ecf20Sopenharmony_ci	while (which--)
11648c2ecf20Sopenharmony_ci		snap_name += strlen(snap_name) + 1;
11658c2ecf20Sopenharmony_ci
11668c2ecf20Sopenharmony_ci	return kstrdup(snap_name, GFP_KERNEL);
11678c2ecf20Sopenharmony_ci}
11688c2ecf20Sopenharmony_ci
11698c2ecf20Sopenharmony_ci/*
11708c2ecf20Sopenharmony_ci * Snapshot id comparison function for use with qsort()/bsearch().
11718c2ecf20Sopenharmony_ci * Note that result is for snapshots in *descending* order.
11728c2ecf20Sopenharmony_ci */
11738c2ecf20Sopenharmony_cistatic int snapid_compare_reverse(const void *s1, const void *s2)
11748c2ecf20Sopenharmony_ci{
11758c2ecf20Sopenharmony_ci	u64 snap_id1 = *(u64 *)s1;
11768c2ecf20Sopenharmony_ci	u64 snap_id2 = *(u64 *)s2;
11778c2ecf20Sopenharmony_ci
11788c2ecf20Sopenharmony_ci	if (snap_id1 < snap_id2)
11798c2ecf20Sopenharmony_ci		return 1;
11808c2ecf20Sopenharmony_ci	return snap_id1 == snap_id2 ? 0 : -1;
11818c2ecf20Sopenharmony_ci}
11828c2ecf20Sopenharmony_ci
11838c2ecf20Sopenharmony_ci/*
11848c2ecf20Sopenharmony_ci * Search a snapshot context to see if the given snapshot id is
11858c2ecf20Sopenharmony_ci * present.
11868c2ecf20Sopenharmony_ci *
11878c2ecf20Sopenharmony_ci * Returns the position of the snapshot id in the array if it's found,
11888c2ecf20Sopenharmony_ci * or BAD_SNAP_INDEX otherwise.
11898c2ecf20Sopenharmony_ci *
11908c2ecf20Sopenharmony_ci * Note: The snapshot array is in kept sorted (by the osd) in
11918c2ecf20Sopenharmony_ci * reverse order, highest snapshot id first.
11928c2ecf20Sopenharmony_ci */
11938c2ecf20Sopenharmony_cistatic u32 rbd_dev_snap_index(struct rbd_device *rbd_dev, u64 snap_id)
11948c2ecf20Sopenharmony_ci{
11958c2ecf20Sopenharmony_ci	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
11968c2ecf20Sopenharmony_ci	u64 *found;
11978c2ecf20Sopenharmony_ci
11988c2ecf20Sopenharmony_ci	found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps,
11998c2ecf20Sopenharmony_ci				sizeof (snap_id), snapid_compare_reverse);
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_ci	return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX;
12028c2ecf20Sopenharmony_ci}
12038c2ecf20Sopenharmony_ci
12048c2ecf20Sopenharmony_cistatic const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev,
12058c2ecf20Sopenharmony_ci					u64 snap_id)
12068c2ecf20Sopenharmony_ci{
12078c2ecf20Sopenharmony_ci	u32 which;
12088c2ecf20Sopenharmony_ci	const char *snap_name;
12098c2ecf20Sopenharmony_ci
12108c2ecf20Sopenharmony_ci	which = rbd_dev_snap_index(rbd_dev, snap_id);
12118c2ecf20Sopenharmony_ci	if (which == BAD_SNAP_INDEX)
12128c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOENT);
12138c2ecf20Sopenharmony_ci
12148c2ecf20Sopenharmony_ci	snap_name = _rbd_dev_v1_snap_name(rbd_dev, which);
12158c2ecf20Sopenharmony_ci	return snap_name ? snap_name : ERR_PTR(-ENOMEM);
12168c2ecf20Sopenharmony_ci}
12178c2ecf20Sopenharmony_ci
12188c2ecf20Sopenharmony_cistatic const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id)
12198c2ecf20Sopenharmony_ci{
12208c2ecf20Sopenharmony_ci	if (snap_id == CEPH_NOSNAP)
12218c2ecf20Sopenharmony_ci		return RBD_SNAP_HEAD_NAME;
12228c2ecf20Sopenharmony_ci
12238c2ecf20Sopenharmony_ci	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
12248c2ecf20Sopenharmony_ci	if (rbd_dev->image_format == 1)
12258c2ecf20Sopenharmony_ci		return rbd_dev_v1_snap_name(rbd_dev, snap_id);
12268c2ecf20Sopenharmony_ci
12278c2ecf20Sopenharmony_ci	return rbd_dev_v2_snap_name(rbd_dev, snap_id);
12288c2ecf20Sopenharmony_ci}
12298c2ecf20Sopenharmony_ci
12308c2ecf20Sopenharmony_cistatic int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
12318c2ecf20Sopenharmony_ci				u64 *snap_size)
12328c2ecf20Sopenharmony_ci{
12338c2ecf20Sopenharmony_ci	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
12348c2ecf20Sopenharmony_ci	if (snap_id == CEPH_NOSNAP) {
12358c2ecf20Sopenharmony_ci		*snap_size = rbd_dev->header.image_size;
12368c2ecf20Sopenharmony_ci	} else if (rbd_dev->image_format == 1) {
12378c2ecf20Sopenharmony_ci		u32 which;
12388c2ecf20Sopenharmony_ci
12398c2ecf20Sopenharmony_ci		which = rbd_dev_snap_index(rbd_dev, snap_id);
12408c2ecf20Sopenharmony_ci		if (which == BAD_SNAP_INDEX)
12418c2ecf20Sopenharmony_ci			return -ENOENT;
12428c2ecf20Sopenharmony_ci
12438c2ecf20Sopenharmony_ci		*snap_size = rbd_dev->header.snap_sizes[which];
12448c2ecf20Sopenharmony_ci	} else {
12458c2ecf20Sopenharmony_ci		u64 size = 0;
12468c2ecf20Sopenharmony_ci		int ret;
12478c2ecf20Sopenharmony_ci
12488c2ecf20Sopenharmony_ci		ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, NULL, &size);
12498c2ecf20Sopenharmony_ci		if (ret)
12508c2ecf20Sopenharmony_ci			return ret;
12518c2ecf20Sopenharmony_ci
12528c2ecf20Sopenharmony_ci		*snap_size = size;
12538c2ecf20Sopenharmony_ci	}
12548c2ecf20Sopenharmony_ci	return 0;
12558c2ecf20Sopenharmony_ci}
12568c2ecf20Sopenharmony_ci
12578c2ecf20Sopenharmony_cistatic int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
12588c2ecf20Sopenharmony_ci{
12598c2ecf20Sopenharmony_ci	u64 snap_id = rbd_dev->spec->snap_id;
12608c2ecf20Sopenharmony_ci	u64 size = 0;
12618c2ecf20Sopenharmony_ci	int ret;
12628c2ecf20Sopenharmony_ci
12638c2ecf20Sopenharmony_ci	ret = rbd_snap_size(rbd_dev, snap_id, &size);
12648c2ecf20Sopenharmony_ci	if (ret)
12658c2ecf20Sopenharmony_ci		return ret;
12668c2ecf20Sopenharmony_ci
12678c2ecf20Sopenharmony_ci	rbd_dev->mapping.size = size;
12688c2ecf20Sopenharmony_ci	return 0;
12698c2ecf20Sopenharmony_ci}
12708c2ecf20Sopenharmony_ci
12718c2ecf20Sopenharmony_cistatic void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
12728c2ecf20Sopenharmony_ci{
12738c2ecf20Sopenharmony_ci	rbd_dev->mapping.size = 0;
12748c2ecf20Sopenharmony_ci}
12758c2ecf20Sopenharmony_ci
12768c2ecf20Sopenharmony_cistatic void zero_bvec(struct bio_vec *bv)
12778c2ecf20Sopenharmony_ci{
12788c2ecf20Sopenharmony_ci	void *buf;
12798c2ecf20Sopenharmony_ci	unsigned long flags;
12808c2ecf20Sopenharmony_ci
12818c2ecf20Sopenharmony_ci	buf = bvec_kmap_irq(bv, &flags);
12828c2ecf20Sopenharmony_ci	memset(buf, 0, bv->bv_len);
12838c2ecf20Sopenharmony_ci	flush_dcache_page(bv->bv_page);
12848c2ecf20Sopenharmony_ci	bvec_kunmap_irq(buf, &flags);
12858c2ecf20Sopenharmony_ci}
12868c2ecf20Sopenharmony_ci
12878c2ecf20Sopenharmony_cistatic void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes)
12888c2ecf20Sopenharmony_ci{
12898c2ecf20Sopenharmony_ci	struct ceph_bio_iter it = *bio_pos;
12908c2ecf20Sopenharmony_ci
12918c2ecf20Sopenharmony_ci	ceph_bio_iter_advance(&it, off);
12928c2ecf20Sopenharmony_ci	ceph_bio_iter_advance_step(&it, bytes, ({
12938c2ecf20Sopenharmony_ci		zero_bvec(&bv);
12948c2ecf20Sopenharmony_ci	}));
12958c2ecf20Sopenharmony_ci}
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_cistatic void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes)
12988c2ecf20Sopenharmony_ci{
12998c2ecf20Sopenharmony_ci	struct ceph_bvec_iter it = *bvec_pos;
13008c2ecf20Sopenharmony_ci
13018c2ecf20Sopenharmony_ci	ceph_bvec_iter_advance(&it, off);
13028c2ecf20Sopenharmony_ci	ceph_bvec_iter_advance_step(&it, bytes, ({
13038c2ecf20Sopenharmony_ci		zero_bvec(&bv);
13048c2ecf20Sopenharmony_ci	}));
13058c2ecf20Sopenharmony_ci}
13068c2ecf20Sopenharmony_ci
13078c2ecf20Sopenharmony_ci/*
13088c2ecf20Sopenharmony_ci * Zero a range in @obj_req data buffer defined by a bio (list) or
13098c2ecf20Sopenharmony_ci * (private) bio_vec array.
13108c2ecf20Sopenharmony_ci *
13118c2ecf20Sopenharmony_ci * @off is relative to the start of the data buffer.
13128c2ecf20Sopenharmony_ci */
13138c2ecf20Sopenharmony_cistatic void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off,
13148c2ecf20Sopenharmony_ci			       u32 bytes)
13158c2ecf20Sopenharmony_ci{
13168c2ecf20Sopenharmony_ci	dout("%s %p data buf %u~%u\n", __func__, obj_req, off, bytes);
13178c2ecf20Sopenharmony_ci
13188c2ecf20Sopenharmony_ci	switch (obj_req->img_request->data_type) {
13198c2ecf20Sopenharmony_ci	case OBJ_REQUEST_BIO:
13208c2ecf20Sopenharmony_ci		zero_bios(&obj_req->bio_pos, off, bytes);
13218c2ecf20Sopenharmony_ci		break;
13228c2ecf20Sopenharmony_ci	case OBJ_REQUEST_BVECS:
13238c2ecf20Sopenharmony_ci	case OBJ_REQUEST_OWN_BVECS:
13248c2ecf20Sopenharmony_ci		zero_bvecs(&obj_req->bvec_pos, off, bytes);
13258c2ecf20Sopenharmony_ci		break;
13268c2ecf20Sopenharmony_ci	default:
13278c2ecf20Sopenharmony_ci		BUG();
13288c2ecf20Sopenharmony_ci	}
13298c2ecf20Sopenharmony_ci}
13308c2ecf20Sopenharmony_ci
13318c2ecf20Sopenharmony_cistatic void rbd_obj_request_destroy(struct kref *kref);
13328c2ecf20Sopenharmony_cistatic void rbd_obj_request_put(struct rbd_obj_request *obj_request)
13338c2ecf20Sopenharmony_ci{
13348c2ecf20Sopenharmony_ci	rbd_assert(obj_request != NULL);
13358c2ecf20Sopenharmony_ci	dout("%s: obj %p (was %d)\n", __func__, obj_request,
13368c2ecf20Sopenharmony_ci		kref_read(&obj_request->kref));
13378c2ecf20Sopenharmony_ci	kref_put(&obj_request->kref, rbd_obj_request_destroy);
13388c2ecf20Sopenharmony_ci}
13398c2ecf20Sopenharmony_ci
13408c2ecf20Sopenharmony_cistatic inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
13418c2ecf20Sopenharmony_ci					struct rbd_obj_request *obj_request)
13428c2ecf20Sopenharmony_ci{
13438c2ecf20Sopenharmony_ci	rbd_assert(obj_request->img_request == NULL);
13448c2ecf20Sopenharmony_ci
13458c2ecf20Sopenharmony_ci	/* Image request now owns object's original reference */
13468c2ecf20Sopenharmony_ci	obj_request->img_request = img_request;
13478c2ecf20Sopenharmony_ci	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
13488c2ecf20Sopenharmony_ci}
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_cistatic inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
13518c2ecf20Sopenharmony_ci					struct rbd_obj_request *obj_request)
13528c2ecf20Sopenharmony_ci{
13538c2ecf20Sopenharmony_ci	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
13548c2ecf20Sopenharmony_ci	list_del(&obj_request->ex.oe_item);
13558c2ecf20Sopenharmony_ci	rbd_assert(obj_request->img_request == img_request);
13568c2ecf20Sopenharmony_ci	rbd_obj_request_put(obj_request);
13578c2ecf20Sopenharmony_ci}
13588c2ecf20Sopenharmony_ci
13598c2ecf20Sopenharmony_cistatic void rbd_osd_submit(struct ceph_osd_request *osd_req)
13608c2ecf20Sopenharmony_ci{
13618c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req = osd_req->r_priv;
13628c2ecf20Sopenharmony_ci
13638c2ecf20Sopenharmony_ci	dout("%s osd_req %p for obj_req %p objno %llu %llu~%llu\n",
13648c2ecf20Sopenharmony_ci	     __func__, osd_req, obj_req, obj_req->ex.oe_objno,
13658c2ecf20Sopenharmony_ci	     obj_req->ex.oe_off, obj_req->ex.oe_len);
13668c2ecf20Sopenharmony_ci	ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
13678c2ecf20Sopenharmony_ci}
13688c2ecf20Sopenharmony_ci
13698c2ecf20Sopenharmony_ci/*
13708c2ecf20Sopenharmony_ci * The default/initial value for all image request flags is 0.  Each
13718c2ecf20Sopenharmony_ci * is conditionally set to 1 at image request initialization time
13728c2ecf20Sopenharmony_ci * and currently never change thereafter.
13738c2ecf20Sopenharmony_ci */
13748c2ecf20Sopenharmony_cistatic void img_request_layered_set(struct rbd_img_request *img_request)
13758c2ecf20Sopenharmony_ci{
13768c2ecf20Sopenharmony_ci	set_bit(IMG_REQ_LAYERED, &img_request->flags);
13778c2ecf20Sopenharmony_ci}
13788c2ecf20Sopenharmony_ci
13798c2ecf20Sopenharmony_cistatic bool img_request_layered_test(struct rbd_img_request *img_request)
13808c2ecf20Sopenharmony_ci{
13818c2ecf20Sopenharmony_ci	return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0;
13828c2ecf20Sopenharmony_ci}
13838c2ecf20Sopenharmony_ci
13848c2ecf20Sopenharmony_cistatic bool rbd_obj_is_entire(struct rbd_obj_request *obj_req)
13858c2ecf20Sopenharmony_ci{
13868c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
13878c2ecf20Sopenharmony_ci
13888c2ecf20Sopenharmony_ci	return !obj_req->ex.oe_off &&
13898c2ecf20Sopenharmony_ci	       obj_req->ex.oe_len == rbd_dev->layout.object_size;
13908c2ecf20Sopenharmony_ci}
13918c2ecf20Sopenharmony_ci
13928c2ecf20Sopenharmony_cistatic bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
13938c2ecf20Sopenharmony_ci{
13948c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
13958c2ecf20Sopenharmony_ci
13968c2ecf20Sopenharmony_ci	return obj_req->ex.oe_off + obj_req->ex.oe_len ==
13978c2ecf20Sopenharmony_ci					rbd_dev->layout.object_size;
13988c2ecf20Sopenharmony_ci}
13998c2ecf20Sopenharmony_ci
14008c2ecf20Sopenharmony_ci/*
14018c2ecf20Sopenharmony_ci * Must be called after rbd_obj_calc_img_extents().
14028c2ecf20Sopenharmony_ci */
14038c2ecf20Sopenharmony_cistatic void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req)
14048c2ecf20Sopenharmony_ci{
14058c2ecf20Sopenharmony_ci	rbd_assert(obj_req->img_request->snapc);
14068c2ecf20Sopenharmony_ci
14078c2ecf20Sopenharmony_ci	if (obj_req->img_request->op_type == OBJ_OP_DISCARD) {
14088c2ecf20Sopenharmony_ci		dout("%s %p objno %llu discard\n", __func__, obj_req,
14098c2ecf20Sopenharmony_ci		     obj_req->ex.oe_objno);
14108c2ecf20Sopenharmony_ci		return;
14118c2ecf20Sopenharmony_ci	}
14128c2ecf20Sopenharmony_ci
14138c2ecf20Sopenharmony_ci	if (!obj_req->num_img_extents) {
14148c2ecf20Sopenharmony_ci		dout("%s %p objno %llu not overlapping\n", __func__, obj_req,
14158c2ecf20Sopenharmony_ci		     obj_req->ex.oe_objno);
14168c2ecf20Sopenharmony_ci		return;
14178c2ecf20Sopenharmony_ci	}
14188c2ecf20Sopenharmony_ci
14198c2ecf20Sopenharmony_ci	if (rbd_obj_is_entire(obj_req) &&
14208c2ecf20Sopenharmony_ci	    !obj_req->img_request->snapc->num_snaps) {
14218c2ecf20Sopenharmony_ci		dout("%s %p objno %llu entire\n", __func__, obj_req,
14228c2ecf20Sopenharmony_ci		     obj_req->ex.oe_objno);
14238c2ecf20Sopenharmony_ci		return;
14248c2ecf20Sopenharmony_ci	}
14258c2ecf20Sopenharmony_ci
14268c2ecf20Sopenharmony_ci	obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED;
14278c2ecf20Sopenharmony_ci}
14288c2ecf20Sopenharmony_ci
14298c2ecf20Sopenharmony_cistatic u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
14308c2ecf20Sopenharmony_ci{
14318c2ecf20Sopenharmony_ci	return ceph_file_extents_bytes(obj_req->img_extents,
14328c2ecf20Sopenharmony_ci				       obj_req->num_img_extents);
14338c2ecf20Sopenharmony_ci}
14348c2ecf20Sopenharmony_ci
14358c2ecf20Sopenharmony_cistatic bool rbd_img_is_write(struct rbd_img_request *img_req)
14368c2ecf20Sopenharmony_ci{
14378c2ecf20Sopenharmony_ci	switch (img_req->op_type) {
14388c2ecf20Sopenharmony_ci	case OBJ_OP_READ:
14398c2ecf20Sopenharmony_ci		return false;
14408c2ecf20Sopenharmony_ci	case OBJ_OP_WRITE:
14418c2ecf20Sopenharmony_ci	case OBJ_OP_DISCARD:
14428c2ecf20Sopenharmony_ci	case OBJ_OP_ZEROOUT:
14438c2ecf20Sopenharmony_ci		return true;
14448c2ecf20Sopenharmony_ci	default:
14458c2ecf20Sopenharmony_ci		BUG();
14468c2ecf20Sopenharmony_ci	}
14478c2ecf20Sopenharmony_ci}
14488c2ecf20Sopenharmony_ci
14498c2ecf20Sopenharmony_cistatic void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
14508c2ecf20Sopenharmony_ci{
14518c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req = osd_req->r_priv;
14528c2ecf20Sopenharmony_ci	int result;
14538c2ecf20Sopenharmony_ci
14548c2ecf20Sopenharmony_ci	dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req,
14558c2ecf20Sopenharmony_ci	     osd_req->r_result, obj_req);
14568c2ecf20Sopenharmony_ci
14578c2ecf20Sopenharmony_ci	/*
14588c2ecf20Sopenharmony_ci	 * Writes aren't allowed to return a data payload.  In some
14598c2ecf20Sopenharmony_ci	 * guarded write cases (e.g. stat + zero on an empty object)
14608c2ecf20Sopenharmony_ci	 * a stat response makes it through, but we don't care.
14618c2ecf20Sopenharmony_ci	 */
14628c2ecf20Sopenharmony_ci	if (osd_req->r_result > 0 && rbd_img_is_write(obj_req->img_request))
14638c2ecf20Sopenharmony_ci		result = 0;
14648c2ecf20Sopenharmony_ci	else
14658c2ecf20Sopenharmony_ci		result = osd_req->r_result;
14668c2ecf20Sopenharmony_ci
14678c2ecf20Sopenharmony_ci	rbd_obj_handle_request(obj_req, result);
14688c2ecf20Sopenharmony_ci}
14698c2ecf20Sopenharmony_ci
14708c2ecf20Sopenharmony_cistatic void rbd_osd_format_read(struct ceph_osd_request *osd_req)
14718c2ecf20Sopenharmony_ci{
14728c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_request = osd_req->r_priv;
14738c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
14748c2ecf20Sopenharmony_ci	struct ceph_options *opt = rbd_dev->rbd_client->client->options;
14758c2ecf20Sopenharmony_ci
14768c2ecf20Sopenharmony_ci	osd_req->r_flags = CEPH_OSD_FLAG_READ | opt->read_from_replica;
14778c2ecf20Sopenharmony_ci	osd_req->r_snapid = obj_request->img_request->snap_id;
14788c2ecf20Sopenharmony_ci}
14798c2ecf20Sopenharmony_ci
14808c2ecf20Sopenharmony_cistatic void rbd_osd_format_write(struct ceph_osd_request *osd_req)
14818c2ecf20Sopenharmony_ci{
14828c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_request = osd_req->r_priv;
14838c2ecf20Sopenharmony_ci
14848c2ecf20Sopenharmony_ci	osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
14858c2ecf20Sopenharmony_ci	ktime_get_real_ts64(&osd_req->r_mtime);
14868c2ecf20Sopenharmony_ci	osd_req->r_data_offset = obj_request->ex.oe_off;
14878c2ecf20Sopenharmony_ci}
14888c2ecf20Sopenharmony_ci
14898c2ecf20Sopenharmony_cistatic struct ceph_osd_request *
14908c2ecf20Sopenharmony_ci__rbd_obj_add_osd_request(struct rbd_obj_request *obj_req,
14918c2ecf20Sopenharmony_ci			  struct ceph_snap_context *snapc, int num_ops)
14928c2ecf20Sopenharmony_ci{
14938c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
14948c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
14958c2ecf20Sopenharmony_ci	struct ceph_osd_request *req;
14968c2ecf20Sopenharmony_ci	const char *name_format = rbd_dev->image_format == 1 ?
14978c2ecf20Sopenharmony_ci				      RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT;
14988c2ecf20Sopenharmony_ci	int ret;
14998c2ecf20Sopenharmony_ci
15008c2ecf20Sopenharmony_ci	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO);
15018c2ecf20Sopenharmony_ci	if (!req)
15028c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_ci	list_add_tail(&req->r_private_item, &obj_req->osd_reqs);
15058c2ecf20Sopenharmony_ci	req->r_callback = rbd_osd_req_callback;
15068c2ecf20Sopenharmony_ci	req->r_priv = obj_req;
15078c2ecf20Sopenharmony_ci
15088c2ecf20Sopenharmony_ci	/*
15098c2ecf20Sopenharmony_ci	 * Data objects may be stored in a separate pool, but always in
15108c2ecf20Sopenharmony_ci	 * the same namespace in that pool as the header in its pool.
15118c2ecf20Sopenharmony_ci	 */
15128c2ecf20Sopenharmony_ci	ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
15138c2ecf20Sopenharmony_ci	req->r_base_oloc.pool = rbd_dev->layout.pool_id;
15148c2ecf20Sopenharmony_ci
15158c2ecf20Sopenharmony_ci	ret = ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
15168c2ecf20Sopenharmony_ci			       rbd_dev->header.object_prefix,
15178c2ecf20Sopenharmony_ci			       obj_req->ex.oe_objno);
15188c2ecf20Sopenharmony_ci	if (ret)
15198c2ecf20Sopenharmony_ci		return ERR_PTR(ret);
15208c2ecf20Sopenharmony_ci
15218c2ecf20Sopenharmony_ci	return req;
15228c2ecf20Sopenharmony_ci}
15238c2ecf20Sopenharmony_ci
15248c2ecf20Sopenharmony_cistatic struct ceph_osd_request *
15258c2ecf20Sopenharmony_cirbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops)
15268c2ecf20Sopenharmony_ci{
15278c2ecf20Sopenharmony_ci	rbd_assert(obj_req->img_request->snapc);
15288c2ecf20Sopenharmony_ci	return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc,
15298c2ecf20Sopenharmony_ci					 num_ops);
15308c2ecf20Sopenharmony_ci}
15318c2ecf20Sopenharmony_ci
15328c2ecf20Sopenharmony_cistatic struct rbd_obj_request *rbd_obj_request_create(void)
15338c2ecf20Sopenharmony_ci{
15348c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_request;
15358c2ecf20Sopenharmony_ci
15368c2ecf20Sopenharmony_ci	obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
15378c2ecf20Sopenharmony_ci	if (!obj_request)
15388c2ecf20Sopenharmony_ci		return NULL;
15398c2ecf20Sopenharmony_ci
15408c2ecf20Sopenharmony_ci	ceph_object_extent_init(&obj_request->ex);
15418c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&obj_request->osd_reqs);
15428c2ecf20Sopenharmony_ci	mutex_init(&obj_request->state_mutex);
15438c2ecf20Sopenharmony_ci	kref_init(&obj_request->kref);
15448c2ecf20Sopenharmony_ci
15458c2ecf20Sopenharmony_ci	dout("%s %p\n", __func__, obj_request);
15468c2ecf20Sopenharmony_ci	return obj_request;
15478c2ecf20Sopenharmony_ci}
15488c2ecf20Sopenharmony_ci
15498c2ecf20Sopenharmony_cistatic void rbd_obj_request_destroy(struct kref *kref)
15508c2ecf20Sopenharmony_ci{
15518c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_request;
15528c2ecf20Sopenharmony_ci	struct ceph_osd_request *osd_req;
15538c2ecf20Sopenharmony_ci	u32 i;
15548c2ecf20Sopenharmony_ci
15558c2ecf20Sopenharmony_ci	obj_request = container_of(kref, struct rbd_obj_request, kref);
15568c2ecf20Sopenharmony_ci
15578c2ecf20Sopenharmony_ci	dout("%s: obj %p\n", __func__, obj_request);
15588c2ecf20Sopenharmony_ci
15598c2ecf20Sopenharmony_ci	while (!list_empty(&obj_request->osd_reqs)) {
15608c2ecf20Sopenharmony_ci		osd_req = list_first_entry(&obj_request->osd_reqs,
15618c2ecf20Sopenharmony_ci				    struct ceph_osd_request, r_private_item);
15628c2ecf20Sopenharmony_ci		list_del_init(&osd_req->r_private_item);
15638c2ecf20Sopenharmony_ci		ceph_osdc_put_request(osd_req);
15648c2ecf20Sopenharmony_ci	}
15658c2ecf20Sopenharmony_ci
15668c2ecf20Sopenharmony_ci	switch (obj_request->img_request->data_type) {
15678c2ecf20Sopenharmony_ci	case OBJ_REQUEST_NODATA:
15688c2ecf20Sopenharmony_ci	case OBJ_REQUEST_BIO:
15698c2ecf20Sopenharmony_ci	case OBJ_REQUEST_BVECS:
15708c2ecf20Sopenharmony_ci		break;		/* Nothing to do */
15718c2ecf20Sopenharmony_ci	case OBJ_REQUEST_OWN_BVECS:
15728c2ecf20Sopenharmony_ci		kfree(obj_request->bvec_pos.bvecs);
15738c2ecf20Sopenharmony_ci		break;
15748c2ecf20Sopenharmony_ci	default:
15758c2ecf20Sopenharmony_ci		BUG();
15768c2ecf20Sopenharmony_ci	}
15778c2ecf20Sopenharmony_ci
15788c2ecf20Sopenharmony_ci	kfree(obj_request->img_extents);
15798c2ecf20Sopenharmony_ci	if (obj_request->copyup_bvecs) {
15808c2ecf20Sopenharmony_ci		for (i = 0; i < obj_request->copyup_bvec_count; i++) {
15818c2ecf20Sopenharmony_ci			if (obj_request->copyup_bvecs[i].bv_page)
15828c2ecf20Sopenharmony_ci				__free_page(obj_request->copyup_bvecs[i].bv_page);
15838c2ecf20Sopenharmony_ci		}
15848c2ecf20Sopenharmony_ci		kfree(obj_request->copyup_bvecs);
15858c2ecf20Sopenharmony_ci	}
15868c2ecf20Sopenharmony_ci
15878c2ecf20Sopenharmony_ci	kmem_cache_free(rbd_obj_request_cache, obj_request);
15888c2ecf20Sopenharmony_ci}
15898c2ecf20Sopenharmony_ci
15908c2ecf20Sopenharmony_ci/* It's OK to call this for a device with no parent */
15918c2ecf20Sopenharmony_ci
15928c2ecf20Sopenharmony_cistatic void rbd_spec_put(struct rbd_spec *spec);
15938c2ecf20Sopenharmony_cistatic void rbd_dev_unparent(struct rbd_device *rbd_dev)
15948c2ecf20Sopenharmony_ci{
15958c2ecf20Sopenharmony_ci	rbd_dev_remove_parent(rbd_dev);
15968c2ecf20Sopenharmony_ci	rbd_spec_put(rbd_dev->parent_spec);
15978c2ecf20Sopenharmony_ci	rbd_dev->parent_spec = NULL;
15988c2ecf20Sopenharmony_ci	rbd_dev->parent_overlap = 0;
15998c2ecf20Sopenharmony_ci}
16008c2ecf20Sopenharmony_ci
16018c2ecf20Sopenharmony_ci/*
16028c2ecf20Sopenharmony_ci * Parent image reference counting is used to determine when an
16038c2ecf20Sopenharmony_ci * image's parent fields can be safely torn down--after there are no
16048c2ecf20Sopenharmony_ci * more in-flight requests to the parent image.  When the last
16058c2ecf20Sopenharmony_ci * reference is dropped, cleaning them up is safe.
16068c2ecf20Sopenharmony_ci */
16078c2ecf20Sopenharmony_cistatic void rbd_dev_parent_put(struct rbd_device *rbd_dev)
16088c2ecf20Sopenharmony_ci{
16098c2ecf20Sopenharmony_ci	int counter;
16108c2ecf20Sopenharmony_ci
16118c2ecf20Sopenharmony_ci	if (!rbd_dev->parent_spec)
16128c2ecf20Sopenharmony_ci		return;
16138c2ecf20Sopenharmony_ci
16148c2ecf20Sopenharmony_ci	counter = atomic_dec_return_safe(&rbd_dev->parent_ref);
16158c2ecf20Sopenharmony_ci	if (counter > 0)
16168c2ecf20Sopenharmony_ci		return;
16178c2ecf20Sopenharmony_ci
16188c2ecf20Sopenharmony_ci	/* Last reference; clean up parent data structures */
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci	if (!counter)
16218c2ecf20Sopenharmony_ci		rbd_dev_unparent(rbd_dev);
16228c2ecf20Sopenharmony_ci	else
16238c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "parent reference underflow");
16248c2ecf20Sopenharmony_ci}
16258c2ecf20Sopenharmony_ci
16268c2ecf20Sopenharmony_ci/*
16278c2ecf20Sopenharmony_ci * If an image has a non-zero parent overlap, get a reference to its
16288c2ecf20Sopenharmony_ci * parent.
16298c2ecf20Sopenharmony_ci *
16308c2ecf20Sopenharmony_ci * Returns true if the rbd device has a parent with a non-zero
16318c2ecf20Sopenharmony_ci * overlap and a reference for it was successfully taken, or
16328c2ecf20Sopenharmony_ci * false otherwise.
16338c2ecf20Sopenharmony_ci */
16348c2ecf20Sopenharmony_cistatic bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
16358c2ecf20Sopenharmony_ci{
16368c2ecf20Sopenharmony_ci	int counter = 0;
16378c2ecf20Sopenharmony_ci
16388c2ecf20Sopenharmony_ci	if (!rbd_dev->parent_spec)
16398c2ecf20Sopenharmony_ci		return false;
16408c2ecf20Sopenharmony_ci
16418c2ecf20Sopenharmony_ci	if (rbd_dev->parent_overlap)
16428c2ecf20Sopenharmony_ci		counter = atomic_inc_return_safe(&rbd_dev->parent_ref);
16438c2ecf20Sopenharmony_ci
16448c2ecf20Sopenharmony_ci	if (counter < 0)
16458c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "parent reference overflow");
16468c2ecf20Sopenharmony_ci
16478c2ecf20Sopenharmony_ci	return counter > 0;
16488c2ecf20Sopenharmony_ci}
16498c2ecf20Sopenharmony_ci
16508c2ecf20Sopenharmony_cistatic void rbd_img_request_init(struct rbd_img_request *img_request,
16518c2ecf20Sopenharmony_ci				 struct rbd_device *rbd_dev,
16528c2ecf20Sopenharmony_ci				 enum obj_operation_type op_type)
16538c2ecf20Sopenharmony_ci{
16548c2ecf20Sopenharmony_ci	memset(img_request, 0, sizeof(*img_request));
16558c2ecf20Sopenharmony_ci
16568c2ecf20Sopenharmony_ci	img_request->rbd_dev = rbd_dev;
16578c2ecf20Sopenharmony_ci	img_request->op_type = op_type;
16588c2ecf20Sopenharmony_ci
16598c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&img_request->lock_item);
16608c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&img_request->object_extents);
16618c2ecf20Sopenharmony_ci	mutex_init(&img_request->state_mutex);
16628c2ecf20Sopenharmony_ci}
16638c2ecf20Sopenharmony_ci
16648c2ecf20Sopenharmony_ci/*
16658c2ecf20Sopenharmony_ci * Only snap_id is captured here, for reads.  For writes, snapshot
16668c2ecf20Sopenharmony_ci * context is captured in rbd_img_object_requests() after exclusive
16678c2ecf20Sopenharmony_ci * lock is ensured to be held.
16688c2ecf20Sopenharmony_ci */
16698c2ecf20Sopenharmony_cistatic void rbd_img_capture_header(struct rbd_img_request *img_req)
16708c2ecf20Sopenharmony_ci{
16718c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = img_req->rbd_dev;
16728c2ecf20Sopenharmony_ci
16738c2ecf20Sopenharmony_ci	lockdep_assert_held(&rbd_dev->header_rwsem);
16748c2ecf20Sopenharmony_ci
16758c2ecf20Sopenharmony_ci	if (!rbd_img_is_write(img_req))
16768c2ecf20Sopenharmony_ci		img_req->snap_id = rbd_dev->spec->snap_id;
16778c2ecf20Sopenharmony_ci
16788c2ecf20Sopenharmony_ci	if (rbd_dev_parent_get(rbd_dev))
16798c2ecf20Sopenharmony_ci		img_request_layered_set(img_req);
16808c2ecf20Sopenharmony_ci}
16818c2ecf20Sopenharmony_ci
16828c2ecf20Sopenharmony_cistatic void rbd_img_request_destroy(struct rbd_img_request *img_request)
16838c2ecf20Sopenharmony_ci{
16848c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_request;
16858c2ecf20Sopenharmony_ci	struct rbd_obj_request *next_obj_request;
16868c2ecf20Sopenharmony_ci
16878c2ecf20Sopenharmony_ci	dout("%s: img %p\n", __func__, img_request);
16888c2ecf20Sopenharmony_ci
16898c2ecf20Sopenharmony_ci	WARN_ON(!list_empty(&img_request->lock_item));
16908c2ecf20Sopenharmony_ci	for_each_obj_request_safe(img_request, obj_request, next_obj_request)
16918c2ecf20Sopenharmony_ci		rbd_img_obj_request_del(img_request, obj_request);
16928c2ecf20Sopenharmony_ci
16938c2ecf20Sopenharmony_ci	if (img_request_layered_test(img_request))
16948c2ecf20Sopenharmony_ci		rbd_dev_parent_put(img_request->rbd_dev);
16958c2ecf20Sopenharmony_ci
16968c2ecf20Sopenharmony_ci	if (rbd_img_is_write(img_request))
16978c2ecf20Sopenharmony_ci		ceph_put_snap_context(img_request->snapc);
16988c2ecf20Sopenharmony_ci
16998c2ecf20Sopenharmony_ci	if (test_bit(IMG_REQ_CHILD, &img_request->flags))
17008c2ecf20Sopenharmony_ci		kmem_cache_free(rbd_img_request_cache, img_request);
17018c2ecf20Sopenharmony_ci}
17028c2ecf20Sopenharmony_ci
17038c2ecf20Sopenharmony_ci#define BITS_PER_OBJ	2
17048c2ecf20Sopenharmony_ci#define OBJS_PER_BYTE	(BITS_PER_BYTE / BITS_PER_OBJ)
17058c2ecf20Sopenharmony_ci#define OBJ_MASK	((1 << BITS_PER_OBJ) - 1)
17068c2ecf20Sopenharmony_ci
17078c2ecf20Sopenharmony_cistatic void __rbd_object_map_index(struct rbd_device *rbd_dev, u64 objno,
17088c2ecf20Sopenharmony_ci				   u64 *index, u8 *shift)
17098c2ecf20Sopenharmony_ci{
17108c2ecf20Sopenharmony_ci	u32 off;
17118c2ecf20Sopenharmony_ci
17128c2ecf20Sopenharmony_ci	rbd_assert(objno < rbd_dev->object_map_size);
17138c2ecf20Sopenharmony_ci	*index = div_u64_rem(objno, OBJS_PER_BYTE, &off);
17148c2ecf20Sopenharmony_ci	*shift = (OBJS_PER_BYTE - off - 1) * BITS_PER_OBJ;
17158c2ecf20Sopenharmony_ci}
17168c2ecf20Sopenharmony_ci
17178c2ecf20Sopenharmony_cistatic u8 __rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno)
17188c2ecf20Sopenharmony_ci{
17198c2ecf20Sopenharmony_ci	u64 index;
17208c2ecf20Sopenharmony_ci	u8 shift;
17218c2ecf20Sopenharmony_ci
17228c2ecf20Sopenharmony_ci	lockdep_assert_held(&rbd_dev->object_map_lock);
17238c2ecf20Sopenharmony_ci	__rbd_object_map_index(rbd_dev, objno, &index, &shift);
17248c2ecf20Sopenharmony_ci	return (rbd_dev->object_map[index] >> shift) & OBJ_MASK;
17258c2ecf20Sopenharmony_ci}
17268c2ecf20Sopenharmony_ci
17278c2ecf20Sopenharmony_cistatic void __rbd_object_map_set(struct rbd_device *rbd_dev, u64 objno, u8 val)
17288c2ecf20Sopenharmony_ci{
17298c2ecf20Sopenharmony_ci	u64 index;
17308c2ecf20Sopenharmony_ci	u8 shift;
17318c2ecf20Sopenharmony_ci	u8 *p;
17328c2ecf20Sopenharmony_ci
17338c2ecf20Sopenharmony_ci	lockdep_assert_held(&rbd_dev->object_map_lock);
17348c2ecf20Sopenharmony_ci	rbd_assert(!(val & ~OBJ_MASK));
17358c2ecf20Sopenharmony_ci
17368c2ecf20Sopenharmony_ci	__rbd_object_map_index(rbd_dev, objno, &index, &shift);
17378c2ecf20Sopenharmony_ci	p = &rbd_dev->object_map[index];
17388c2ecf20Sopenharmony_ci	*p = (*p & ~(OBJ_MASK << shift)) | (val << shift);
17398c2ecf20Sopenharmony_ci}
17408c2ecf20Sopenharmony_ci
17418c2ecf20Sopenharmony_cistatic u8 rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno)
17428c2ecf20Sopenharmony_ci{
17438c2ecf20Sopenharmony_ci	u8 state;
17448c2ecf20Sopenharmony_ci
17458c2ecf20Sopenharmony_ci	spin_lock(&rbd_dev->object_map_lock);
17468c2ecf20Sopenharmony_ci	state = __rbd_object_map_get(rbd_dev, objno);
17478c2ecf20Sopenharmony_ci	spin_unlock(&rbd_dev->object_map_lock);
17488c2ecf20Sopenharmony_ci	return state;
17498c2ecf20Sopenharmony_ci}
17508c2ecf20Sopenharmony_ci
17518c2ecf20Sopenharmony_cistatic bool use_object_map(struct rbd_device *rbd_dev)
17528c2ecf20Sopenharmony_ci{
17538c2ecf20Sopenharmony_ci	/*
17548c2ecf20Sopenharmony_ci	 * An image mapped read-only can't use the object map -- it isn't
17558c2ecf20Sopenharmony_ci	 * loaded because the header lock isn't acquired.  Someone else can
17568c2ecf20Sopenharmony_ci	 * write to the image and update the object map behind our back.
17578c2ecf20Sopenharmony_ci	 *
17588c2ecf20Sopenharmony_ci	 * A snapshot can't be written to, so using the object map is always
17598c2ecf20Sopenharmony_ci	 * safe.
17608c2ecf20Sopenharmony_ci	 */
17618c2ecf20Sopenharmony_ci	if (!rbd_is_snap(rbd_dev) && rbd_is_ro(rbd_dev))
17628c2ecf20Sopenharmony_ci		return false;
17638c2ecf20Sopenharmony_ci
17648c2ecf20Sopenharmony_ci	return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) &&
17658c2ecf20Sopenharmony_ci		!(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID));
17668c2ecf20Sopenharmony_ci}
17678c2ecf20Sopenharmony_ci
17688c2ecf20Sopenharmony_cistatic bool rbd_object_map_may_exist(struct rbd_device *rbd_dev, u64 objno)
17698c2ecf20Sopenharmony_ci{
17708c2ecf20Sopenharmony_ci	u8 state;
17718c2ecf20Sopenharmony_ci
17728c2ecf20Sopenharmony_ci	/* fall back to default logic if object map is disabled or invalid */
17738c2ecf20Sopenharmony_ci	if (!use_object_map(rbd_dev))
17748c2ecf20Sopenharmony_ci		return true;
17758c2ecf20Sopenharmony_ci
17768c2ecf20Sopenharmony_ci	state = rbd_object_map_get(rbd_dev, objno);
17778c2ecf20Sopenharmony_ci	return state != OBJECT_NONEXISTENT;
17788c2ecf20Sopenharmony_ci}
17798c2ecf20Sopenharmony_ci
17808c2ecf20Sopenharmony_cistatic void rbd_object_map_name(struct rbd_device *rbd_dev, u64 snap_id,
17818c2ecf20Sopenharmony_ci				struct ceph_object_id *oid)
17828c2ecf20Sopenharmony_ci{
17838c2ecf20Sopenharmony_ci	if (snap_id == CEPH_NOSNAP)
17848c2ecf20Sopenharmony_ci		ceph_oid_printf(oid, "%s%s", RBD_OBJECT_MAP_PREFIX,
17858c2ecf20Sopenharmony_ci				rbd_dev->spec->image_id);
17868c2ecf20Sopenharmony_ci	else
17878c2ecf20Sopenharmony_ci		ceph_oid_printf(oid, "%s%s.%016llx", RBD_OBJECT_MAP_PREFIX,
17888c2ecf20Sopenharmony_ci				rbd_dev->spec->image_id, snap_id);
17898c2ecf20Sopenharmony_ci}
17908c2ecf20Sopenharmony_ci
17918c2ecf20Sopenharmony_cistatic int rbd_object_map_lock(struct rbd_device *rbd_dev)
17928c2ecf20Sopenharmony_ci{
17938c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
17948c2ecf20Sopenharmony_ci	CEPH_DEFINE_OID_ONSTACK(oid);
17958c2ecf20Sopenharmony_ci	u8 lock_type;
17968c2ecf20Sopenharmony_ci	char *lock_tag;
17978c2ecf20Sopenharmony_ci	struct ceph_locker *lockers;
17988c2ecf20Sopenharmony_ci	u32 num_lockers;
17998c2ecf20Sopenharmony_ci	bool broke_lock = false;
18008c2ecf20Sopenharmony_ci	int ret;
18018c2ecf20Sopenharmony_ci
18028c2ecf20Sopenharmony_ci	rbd_object_map_name(rbd_dev, CEPH_NOSNAP, &oid);
18038c2ecf20Sopenharmony_ci
18048c2ecf20Sopenharmony_ciagain:
18058c2ecf20Sopenharmony_ci	ret = ceph_cls_lock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME,
18068c2ecf20Sopenharmony_ci			    CEPH_CLS_LOCK_EXCLUSIVE, "", "", "", 0);
18078c2ecf20Sopenharmony_ci	if (ret != -EBUSY || broke_lock) {
18088c2ecf20Sopenharmony_ci		if (ret == -EEXIST)
18098c2ecf20Sopenharmony_ci			ret = 0; /* already locked by myself */
18108c2ecf20Sopenharmony_ci		if (ret)
18118c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "failed to lock object map: %d", ret);
18128c2ecf20Sopenharmony_ci		return ret;
18138c2ecf20Sopenharmony_ci	}
18148c2ecf20Sopenharmony_ci
18158c2ecf20Sopenharmony_ci	ret = ceph_cls_lock_info(osdc, &oid, &rbd_dev->header_oloc,
18168c2ecf20Sopenharmony_ci				 RBD_LOCK_NAME, &lock_type, &lock_tag,
18178c2ecf20Sopenharmony_ci				 &lockers, &num_lockers);
18188c2ecf20Sopenharmony_ci	if (ret) {
18198c2ecf20Sopenharmony_ci		if (ret == -ENOENT)
18208c2ecf20Sopenharmony_ci			goto again;
18218c2ecf20Sopenharmony_ci
18228c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to get object map lockers: %d", ret);
18238c2ecf20Sopenharmony_ci		return ret;
18248c2ecf20Sopenharmony_ci	}
18258c2ecf20Sopenharmony_ci
18268c2ecf20Sopenharmony_ci	kfree(lock_tag);
18278c2ecf20Sopenharmony_ci	if (num_lockers == 0)
18288c2ecf20Sopenharmony_ci		goto again;
18298c2ecf20Sopenharmony_ci
18308c2ecf20Sopenharmony_ci	rbd_warn(rbd_dev, "breaking object map lock owned by %s%llu",
18318c2ecf20Sopenharmony_ci		 ENTITY_NAME(lockers[0].id.name));
18328c2ecf20Sopenharmony_ci
18338c2ecf20Sopenharmony_ci	ret = ceph_cls_break_lock(osdc, &oid, &rbd_dev->header_oloc,
18348c2ecf20Sopenharmony_ci				  RBD_LOCK_NAME, lockers[0].id.cookie,
18358c2ecf20Sopenharmony_ci				  &lockers[0].id.name);
18368c2ecf20Sopenharmony_ci	ceph_free_lockers(lockers, num_lockers);
18378c2ecf20Sopenharmony_ci	if (ret) {
18388c2ecf20Sopenharmony_ci		if (ret == -ENOENT)
18398c2ecf20Sopenharmony_ci			goto again;
18408c2ecf20Sopenharmony_ci
18418c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to break object map lock: %d", ret);
18428c2ecf20Sopenharmony_ci		return ret;
18438c2ecf20Sopenharmony_ci	}
18448c2ecf20Sopenharmony_ci
18458c2ecf20Sopenharmony_ci	broke_lock = true;
18468c2ecf20Sopenharmony_ci	goto again;
18478c2ecf20Sopenharmony_ci}
18488c2ecf20Sopenharmony_ci
18498c2ecf20Sopenharmony_cistatic void rbd_object_map_unlock(struct rbd_device *rbd_dev)
18508c2ecf20Sopenharmony_ci{
18518c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
18528c2ecf20Sopenharmony_ci	CEPH_DEFINE_OID_ONSTACK(oid);
18538c2ecf20Sopenharmony_ci	int ret;
18548c2ecf20Sopenharmony_ci
18558c2ecf20Sopenharmony_ci	rbd_object_map_name(rbd_dev, CEPH_NOSNAP, &oid);
18568c2ecf20Sopenharmony_ci
18578c2ecf20Sopenharmony_ci	ret = ceph_cls_unlock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME,
18588c2ecf20Sopenharmony_ci			      "");
18598c2ecf20Sopenharmony_ci	if (ret && ret != -ENOENT)
18608c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to unlock object map: %d", ret);
18618c2ecf20Sopenharmony_ci}
18628c2ecf20Sopenharmony_ci
18638c2ecf20Sopenharmony_cistatic int decode_object_map_header(void **p, void *end, u64 *object_map_size)
18648c2ecf20Sopenharmony_ci{
18658c2ecf20Sopenharmony_ci	u8 struct_v;
18668c2ecf20Sopenharmony_ci	u32 struct_len;
18678c2ecf20Sopenharmony_ci	u32 header_len;
18688c2ecf20Sopenharmony_ci	void *header_end;
18698c2ecf20Sopenharmony_ci	int ret;
18708c2ecf20Sopenharmony_ci
18718c2ecf20Sopenharmony_ci	ceph_decode_32_safe(p, end, header_len, e_inval);
18728c2ecf20Sopenharmony_ci	header_end = *p + header_len;
18738c2ecf20Sopenharmony_ci
18748c2ecf20Sopenharmony_ci	ret = ceph_start_decoding(p, end, 1, "BitVector header", &struct_v,
18758c2ecf20Sopenharmony_ci				  &struct_len);
18768c2ecf20Sopenharmony_ci	if (ret)
18778c2ecf20Sopenharmony_ci		return ret;
18788c2ecf20Sopenharmony_ci
18798c2ecf20Sopenharmony_ci	ceph_decode_64_safe(p, end, *object_map_size, e_inval);
18808c2ecf20Sopenharmony_ci
18818c2ecf20Sopenharmony_ci	*p = header_end;
18828c2ecf20Sopenharmony_ci	return 0;
18838c2ecf20Sopenharmony_ci
18848c2ecf20Sopenharmony_cie_inval:
18858c2ecf20Sopenharmony_ci	return -EINVAL;
18868c2ecf20Sopenharmony_ci}
18878c2ecf20Sopenharmony_ci
18888c2ecf20Sopenharmony_cistatic int __rbd_object_map_load(struct rbd_device *rbd_dev)
18898c2ecf20Sopenharmony_ci{
18908c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
18918c2ecf20Sopenharmony_ci	CEPH_DEFINE_OID_ONSTACK(oid);
18928c2ecf20Sopenharmony_ci	struct page **pages;
18938c2ecf20Sopenharmony_ci	void *p, *end;
18948c2ecf20Sopenharmony_ci	size_t reply_len;
18958c2ecf20Sopenharmony_ci	u64 num_objects;
18968c2ecf20Sopenharmony_ci	u64 object_map_bytes;
18978c2ecf20Sopenharmony_ci	u64 object_map_size;
18988c2ecf20Sopenharmony_ci	int num_pages;
18998c2ecf20Sopenharmony_ci	int ret;
19008c2ecf20Sopenharmony_ci
19018c2ecf20Sopenharmony_ci	rbd_assert(!rbd_dev->object_map && !rbd_dev->object_map_size);
19028c2ecf20Sopenharmony_ci
19038c2ecf20Sopenharmony_ci	num_objects = ceph_get_num_objects(&rbd_dev->layout,
19048c2ecf20Sopenharmony_ci					   rbd_dev->mapping.size);
19058c2ecf20Sopenharmony_ci	object_map_bytes = DIV_ROUND_UP_ULL(num_objects * BITS_PER_OBJ,
19068c2ecf20Sopenharmony_ci					    BITS_PER_BYTE);
19078c2ecf20Sopenharmony_ci	num_pages = calc_pages_for(0, object_map_bytes) + 1;
19088c2ecf20Sopenharmony_ci	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
19098c2ecf20Sopenharmony_ci	if (IS_ERR(pages))
19108c2ecf20Sopenharmony_ci		return PTR_ERR(pages);
19118c2ecf20Sopenharmony_ci
19128c2ecf20Sopenharmony_ci	reply_len = num_pages * PAGE_SIZE;
19138c2ecf20Sopenharmony_ci	rbd_object_map_name(rbd_dev, rbd_dev->spec->snap_id, &oid);
19148c2ecf20Sopenharmony_ci	ret = ceph_osdc_call(osdc, &oid, &rbd_dev->header_oloc,
19158c2ecf20Sopenharmony_ci			     "rbd", "object_map_load", CEPH_OSD_FLAG_READ,
19168c2ecf20Sopenharmony_ci			     NULL, 0, pages, &reply_len);
19178c2ecf20Sopenharmony_ci	if (ret)
19188c2ecf20Sopenharmony_ci		goto out;
19198c2ecf20Sopenharmony_ci
19208c2ecf20Sopenharmony_ci	p = page_address(pages[0]);
19218c2ecf20Sopenharmony_ci	end = p + min(reply_len, (size_t)PAGE_SIZE);
19228c2ecf20Sopenharmony_ci	ret = decode_object_map_header(&p, end, &object_map_size);
19238c2ecf20Sopenharmony_ci	if (ret)
19248c2ecf20Sopenharmony_ci		goto out;
19258c2ecf20Sopenharmony_ci
19268c2ecf20Sopenharmony_ci	if (object_map_size != num_objects) {
19278c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "object map size mismatch: %llu vs %llu",
19288c2ecf20Sopenharmony_ci			 object_map_size, num_objects);
19298c2ecf20Sopenharmony_ci		ret = -EINVAL;
19308c2ecf20Sopenharmony_ci		goto out;
19318c2ecf20Sopenharmony_ci	}
19328c2ecf20Sopenharmony_ci
19338c2ecf20Sopenharmony_ci	if (offset_in_page(p) + object_map_bytes > reply_len) {
19348c2ecf20Sopenharmony_ci		ret = -EINVAL;
19358c2ecf20Sopenharmony_ci		goto out;
19368c2ecf20Sopenharmony_ci	}
19378c2ecf20Sopenharmony_ci
19388c2ecf20Sopenharmony_ci	rbd_dev->object_map = kvmalloc(object_map_bytes, GFP_KERNEL);
19398c2ecf20Sopenharmony_ci	if (!rbd_dev->object_map) {
19408c2ecf20Sopenharmony_ci		ret = -ENOMEM;
19418c2ecf20Sopenharmony_ci		goto out;
19428c2ecf20Sopenharmony_ci	}
19438c2ecf20Sopenharmony_ci
19448c2ecf20Sopenharmony_ci	rbd_dev->object_map_size = object_map_size;
19458c2ecf20Sopenharmony_ci	ceph_copy_from_page_vector(pages, rbd_dev->object_map,
19468c2ecf20Sopenharmony_ci				   offset_in_page(p), object_map_bytes);
19478c2ecf20Sopenharmony_ci
19488c2ecf20Sopenharmony_ciout:
19498c2ecf20Sopenharmony_ci	ceph_release_page_vector(pages, num_pages);
19508c2ecf20Sopenharmony_ci	return ret;
19518c2ecf20Sopenharmony_ci}
19528c2ecf20Sopenharmony_ci
19538c2ecf20Sopenharmony_cistatic void rbd_object_map_free(struct rbd_device *rbd_dev)
19548c2ecf20Sopenharmony_ci{
19558c2ecf20Sopenharmony_ci	kvfree(rbd_dev->object_map);
19568c2ecf20Sopenharmony_ci	rbd_dev->object_map = NULL;
19578c2ecf20Sopenharmony_ci	rbd_dev->object_map_size = 0;
19588c2ecf20Sopenharmony_ci}
19598c2ecf20Sopenharmony_ci
19608c2ecf20Sopenharmony_cistatic int rbd_object_map_load(struct rbd_device *rbd_dev)
19618c2ecf20Sopenharmony_ci{
19628c2ecf20Sopenharmony_ci	int ret;
19638c2ecf20Sopenharmony_ci
19648c2ecf20Sopenharmony_ci	ret = __rbd_object_map_load(rbd_dev);
19658c2ecf20Sopenharmony_ci	if (ret)
19668c2ecf20Sopenharmony_ci		return ret;
19678c2ecf20Sopenharmony_ci
19688c2ecf20Sopenharmony_ci	ret = rbd_dev_v2_get_flags(rbd_dev);
19698c2ecf20Sopenharmony_ci	if (ret) {
19708c2ecf20Sopenharmony_ci		rbd_object_map_free(rbd_dev);
19718c2ecf20Sopenharmony_ci		return ret;
19728c2ecf20Sopenharmony_ci	}
19738c2ecf20Sopenharmony_ci
19748c2ecf20Sopenharmony_ci	if (rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID)
19758c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "object map is invalid");
19768c2ecf20Sopenharmony_ci
19778c2ecf20Sopenharmony_ci	return 0;
19788c2ecf20Sopenharmony_ci}
19798c2ecf20Sopenharmony_ci
19808c2ecf20Sopenharmony_cistatic int rbd_object_map_open(struct rbd_device *rbd_dev)
19818c2ecf20Sopenharmony_ci{
19828c2ecf20Sopenharmony_ci	int ret;
19838c2ecf20Sopenharmony_ci
19848c2ecf20Sopenharmony_ci	ret = rbd_object_map_lock(rbd_dev);
19858c2ecf20Sopenharmony_ci	if (ret)
19868c2ecf20Sopenharmony_ci		return ret;
19878c2ecf20Sopenharmony_ci
19888c2ecf20Sopenharmony_ci	ret = rbd_object_map_load(rbd_dev);
19898c2ecf20Sopenharmony_ci	if (ret) {
19908c2ecf20Sopenharmony_ci		rbd_object_map_unlock(rbd_dev);
19918c2ecf20Sopenharmony_ci		return ret;
19928c2ecf20Sopenharmony_ci	}
19938c2ecf20Sopenharmony_ci
19948c2ecf20Sopenharmony_ci	return 0;
19958c2ecf20Sopenharmony_ci}
19968c2ecf20Sopenharmony_ci
19978c2ecf20Sopenharmony_cistatic void rbd_object_map_close(struct rbd_device *rbd_dev)
19988c2ecf20Sopenharmony_ci{
19998c2ecf20Sopenharmony_ci	rbd_object_map_free(rbd_dev);
20008c2ecf20Sopenharmony_ci	rbd_object_map_unlock(rbd_dev);
20018c2ecf20Sopenharmony_ci}
20028c2ecf20Sopenharmony_ci
20038c2ecf20Sopenharmony_ci/*
20048c2ecf20Sopenharmony_ci * This function needs snap_id (or more precisely just something to
20058c2ecf20Sopenharmony_ci * distinguish between HEAD and snapshot object maps), new_state and
20068c2ecf20Sopenharmony_ci * current_state that were passed to rbd_object_map_update().
20078c2ecf20Sopenharmony_ci *
20088c2ecf20Sopenharmony_ci * To avoid allocating and stashing a context we piggyback on the OSD
20098c2ecf20Sopenharmony_ci * request.  A HEAD update has two ops (assert_locked).  For new_state
20108c2ecf20Sopenharmony_ci * and current_state we decode our own object_map_update op, encoded in
20118c2ecf20Sopenharmony_ci * rbd_cls_object_map_update().
20128c2ecf20Sopenharmony_ci */
20138c2ecf20Sopenharmony_cistatic int rbd_object_map_update_finish(struct rbd_obj_request *obj_req,
20148c2ecf20Sopenharmony_ci					struct ceph_osd_request *osd_req)
20158c2ecf20Sopenharmony_ci{
20168c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
20178c2ecf20Sopenharmony_ci	struct ceph_osd_data *osd_data;
20188c2ecf20Sopenharmony_ci	u64 objno;
20198c2ecf20Sopenharmony_ci	u8 state, new_state, current_state;
20208c2ecf20Sopenharmony_ci	bool has_current_state;
20218c2ecf20Sopenharmony_ci	void *p;
20228c2ecf20Sopenharmony_ci
20238c2ecf20Sopenharmony_ci	if (osd_req->r_result)
20248c2ecf20Sopenharmony_ci		return osd_req->r_result;
20258c2ecf20Sopenharmony_ci
20268c2ecf20Sopenharmony_ci	/*
20278c2ecf20Sopenharmony_ci	 * Nothing to do for a snapshot object map.
20288c2ecf20Sopenharmony_ci	 */
20298c2ecf20Sopenharmony_ci	if (osd_req->r_num_ops == 1)
20308c2ecf20Sopenharmony_ci		return 0;
20318c2ecf20Sopenharmony_ci
20328c2ecf20Sopenharmony_ci	/*
20338c2ecf20Sopenharmony_ci	 * Update in-memory HEAD object map.
20348c2ecf20Sopenharmony_ci	 */
20358c2ecf20Sopenharmony_ci	rbd_assert(osd_req->r_num_ops == 2);
20368c2ecf20Sopenharmony_ci	osd_data = osd_req_op_data(osd_req, 1, cls, request_data);
20378c2ecf20Sopenharmony_ci	rbd_assert(osd_data->type == CEPH_OSD_DATA_TYPE_PAGES);
20388c2ecf20Sopenharmony_ci
20398c2ecf20Sopenharmony_ci	p = page_address(osd_data->pages[0]);
20408c2ecf20Sopenharmony_ci	objno = ceph_decode_64(&p);
20418c2ecf20Sopenharmony_ci	rbd_assert(objno == obj_req->ex.oe_objno);
20428c2ecf20Sopenharmony_ci	rbd_assert(ceph_decode_64(&p) == objno + 1);
20438c2ecf20Sopenharmony_ci	new_state = ceph_decode_8(&p);
20448c2ecf20Sopenharmony_ci	has_current_state = ceph_decode_8(&p);
20458c2ecf20Sopenharmony_ci	if (has_current_state)
20468c2ecf20Sopenharmony_ci		current_state = ceph_decode_8(&p);
20478c2ecf20Sopenharmony_ci
20488c2ecf20Sopenharmony_ci	spin_lock(&rbd_dev->object_map_lock);
20498c2ecf20Sopenharmony_ci	state = __rbd_object_map_get(rbd_dev, objno);
20508c2ecf20Sopenharmony_ci	if (!has_current_state || current_state == state ||
20518c2ecf20Sopenharmony_ci	    (current_state == OBJECT_EXISTS && state == OBJECT_EXISTS_CLEAN))
20528c2ecf20Sopenharmony_ci		__rbd_object_map_set(rbd_dev, objno, new_state);
20538c2ecf20Sopenharmony_ci	spin_unlock(&rbd_dev->object_map_lock);
20548c2ecf20Sopenharmony_ci
20558c2ecf20Sopenharmony_ci	return 0;
20568c2ecf20Sopenharmony_ci}
20578c2ecf20Sopenharmony_ci
20588c2ecf20Sopenharmony_cistatic void rbd_object_map_callback(struct ceph_osd_request *osd_req)
20598c2ecf20Sopenharmony_ci{
20608c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req = osd_req->r_priv;
20618c2ecf20Sopenharmony_ci	int result;
20628c2ecf20Sopenharmony_ci
20638c2ecf20Sopenharmony_ci	dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req,
20648c2ecf20Sopenharmony_ci	     osd_req->r_result, obj_req);
20658c2ecf20Sopenharmony_ci
20668c2ecf20Sopenharmony_ci	result = rbd_object_map_update_finish(obj_req, osd_req);
20678c2ecf20Sopenharmony_ci	rbd_obj_handle_request(obj_req, result);
20688c2ecf20Sopenharmony_ci}
20698c2ecf20Sopenharmony_ci
20708c2ecf20Sopenharmony_cistatic bool update_needed(struct rbd_device *rbd_dev, u64 objno, u8 new_state)
20718c2ecf20Sopenharmony_ci{
20728c2ecf20Sopenharmony_ci	u8 state = rbd_object_map_get(rbd_dev, objno);
20738c2ecf20Sopenharmony_ci
20748c2ecf20Sopenharmony_ci	if (state == new_state ||
20758c2ecf20Sopenharmony_ci	    (new_state == OBJECT_PENDING && state == OBJECT_NONEXISTENT) ||
20768c2ecf20Sopenharmony_ci	    (new_state == OBJECT_NONEXISTENT && state != OBJECT_PENDING))
20778c2ecf20Sopenharmony_ci		return false;
20788c2ecf20Sopenharmony_ci
20798c2ecf20Sopenharmony_ci	return true;
20808c2ecf20Sopenharmony_ci}
20818c2ecf20Sopenharmony_ci
20828c2ecf20Sopenharmony_cistatic int rbd_cls_object_map_update(struct ceph_osd_request *req,
20838c2ecf20Sopenharmony_ci				     int which, u64 objno, u8 new_state,
20848c2ecf20Sopenharmony_ci				     const u8 *current_state)
20858c2ecf20Sopenharmony_ci{
20868c2ecf20Sopenharmony_ci	struct page **pages;
20878c2ecf20Sopenharmony_ci	void *p, *start;
20888c2ecf20Sopenharmony_ci	int ret;
20898c2ecf20Sopenharmony_ci
20908c2ecf20Sopenharmony_ci	ret = osd_req_op_cls_init(req, which, "rbd", "object_map_update");
20918c2ecf20Sopenharmony_ci	if (ret)
20928c2ecf20Sopenharmony_ci		return ret;
20938c2ecf20Sopenharmony_ci
20948c2ecf20Sopenharmony_ci	pages = ceph_alloc_page_vector(1, GFP_NOIO);
20958c2ecf20Sopenharmony_ci	if (IS_ERR(pages))
20968c2ecf20Sopenharmony_ci		return PTR_ERR(pages);
20978c2ecf20Sopenharmony_ci
20988c2ecf20Sopenharmony_ci	p = start = page_address(pages[0]);
20998c2ecf20Sopenharmony_ci	ceph_encode_64(&p, objno);
21008c2ecf20Sopenharmony_ci	ceph_encode_64(&p, objno + 1);
21018c2ecf20Sopenharmony_ci	ceph_encode_8(&p, new_state);
21028c2ecf20Sopenharmony_ci	if (current_state) {
21038c2ecf20Sopenharmony_ci		ceph_encode_8(&p, 1);
21048c2ecf20Sopenharmony_ci		ceph_encode_8(&p, *current_state);
21058c2ecf20Sopenharmony_ci	} else {
21068c2ecf20Sopenharmony_ci		ceph_encode_8(&p, 0);
21078c2ecf20Sopenharmony_ci	}
21088c2ecf20Sopenharmony_ci
21098c2ecf20Sopenharmony_ci	osd_req_op_cls_request_data_pages(req, which, pages, p - start, 0,
21108c2ecf20Sopenharmony_ci					  false, true);
21118c2ecf20Sopenharmony_ci	return 0;
21128c2ecf20Sopenharmony_ci}
21138c2ecf20Sopenharmony_ci
21148c2ecf20Sopenharmony_ci/*
21158c2ecf20Sopenharmony_ci * Return:
21168c2ecf20Sopenharmony_ci *   0 - object map update sent
21178c2ecf20Sopenharmony_ci *   1 - object map update isn't needed
21188c2ecf20Sopenharmony_ci *  <0 - error
21198c2ecf20Sopenharmony_ci */
21208c2ecf20Sopenharmony_cistatic int rbd_object_map_update(struct rbd_obj_request *obj_req, u64 snap_id,
21218c2ecf20Sopenharmony_ci				 u8 new_state, const u8 *current_state)
21228c2ecf20Sopenharmony_ci{
21238c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
21248c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
21258c2ecf20Sopenharmony_ci	struct ceph_osd_request *req;
21268c2ecf20Sopenharmony_ci	int num_ops = 1;
21278c2ecf20Sopenharmony_ci	int which = 0;
21288c2ecf20Sopenharmony_ci	int ret;
21298c2ecf20Sopenharmony_ci
21308c2ecf20Sopenharmony_ci	if (snap_id == CEPH_NOSNAP) {
21318c2ecf20Sopenharmony_ci		if (!update_needed(rbd_dev, obj_req->ex.oe_objno, new_state))
21328c2ecf20Sopenharmony_ci			return 1;
21338c2ecf20Sopenharmony_ci
21348c2ecf20Sopenharmony_ci		num_ops++; /* assert_locked */
21358c2ecf20Sopenharmony_ci	}
21368c2ecf20Sopenharmony_ci
21378c2ecf20Sopenharmony_ci	req = ceph_osdc_alloc_request(osdc, NULL, num_ops, false, GFP_NOIO);
21388c2ecf20Sopenharmony_ci	if (!req)
21398c2ecf20Sopenharmony_ci		return -ENOMEM;
21408c2ecf20Sopenharmony_ci
21418c2ecf20Sopenharmony_ci	list_add_tail(&req->r_private_item, &obj_req->osd_reqs);
21428c2ecf20Sopenharmony_ci	req->r_callback = rbd_object_map_callback;
21438c2ecf20Sopenharmony_ci	req->r_priv = obj_req;
21448c2ecf20Sopenharmony_ci
21458c2ecf20Sopenharmony_ci	rbd_object_map_name(rbd_dev, snap_id, &req->r_base_oid);
21468c2ecf20Sopenharmony_ci	ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc);
21478c2ecf20Sopenharmony_ci	req->r_flags = CEPH_OSD_FLAG_WRITE;
21488c2ecf20Sopenharmony_ci	ktime_get_real_ts64(&req->r_mtime);
21498c2ecf20Sopenharmony_ci
21508c2ecf20Sopenharmony_ci	if (snap_id == CEPH_NOSNAP) {
21518c2ecf20Sopenharmony_ci		/*
21528c2ecf20Sopenharmony_ci		 * Protect against possible race conditions during lock
21538c2ecf20Sopenharmony_ci		 * ownership transitions.
21548c2ecf20Sopenharmony_ci		 */
21558c2ecf20Sopenharmony_ci		ret = ceph_cls_assert_locked(req, which++, RBD_LOCK_NAME,
21568c2ecf20Sopenharmony_ci					     CEPH_CLS_LOCK_EXCLUSIVE, "", "");
21578c2ecf20Sopenharmony_ci		if (ret)
21588c2ecf20Sopenharmony_ci			return ret;
21598c2ecf20Sopenharmony_ci	}
21608c2ecf20Sopenharmony_ci
21618c2ecf20Sopenharmony_ci	ret = rbd_cls_object_map_update(req, which, obj_req->ex.oe_objno,
21628c2ecf20Sopenharmony_ci					new_state, current_state);
21638c2ecf20Sopenharmony_ci	if (ret)
21648c2ecf20Sopenharmony_ci		return ret;
21658c2ecf20Sopenharmony_ci
21668c2ecf20Sopenharmony_ci	ret = ceph_osdc_alloc_messages(req, GFP_NOIO);
21678c2ecf20Sopenharmony_ci	if (ret)
21688c2ecf20Sopenharmony_ci		return ret;
21698c2ecf20Sopenharmony_ci
21708c2ecf20Sopenharmony_ci	ceph_osdc_start_request(osdc, req, false);
21718c2ecf20Sopenharmony_ci	return 0;
21728c2ecf20Sopenharmony_ci}
21738c2ecf20Sopenharmony_ci
21748c2ecf20Sopenharmony_cistatic void prune_extents(struct ceph_file_extent *img_extents,
21758c2ecf20Sopenharmony_ci			  u32 *num_img_extents, u64 overlap)
21768c2ecf20Sopenharmony_ci{
21778c2ecf20Sopenharmony_ci	u32 cnt = *num_img_extents;
21788c2ecf20Sopenharmony_ci
21798c2ecf20Sopenharmony_ci	/* drop extents completely beyond the overlap */
21808c2ecf20Sopenharmony_ci	while (cnt && img_extents[cnt - 1].fe_off >= overlap)
21818c2ecf20Sopenharmony_ci		cnt--;
21828c2ecf20Sopenharmony_ci
21838c2ecf20Sopenharmony_ci	if (cnt) {
21848c2ecf20Sopenharmony_ci		struct ceph_file_extent *ex = &img_extents[cnt - 1];
21858c2ecf20Sopenharmony_ci
21868c2ecf20Sopenharmony_ci		/* trim final overlapping extent */
21878c2ecf20Sopenharmony_ci		if (ex->fe_off + ex->fe_len > overlap)
21888c2ecf20Sopenharmony_ci			ex->fe_len = overlap - ex->fe_off;
21898c2ecf20Sopenharmony_ci	}
21908c2ecf20Sopenharmony_ci
21918c2ecf20Sopenharmony_ci	*num_img_extents = cnt;
21928c2ecf20Sopenharmony_ci}
21938c2ecf20Sopenharmony_ci
21948c2ecf20Sopenharmony_ci/*
21958c2ecf20Sopenharmony_ci * Determine the byte range(s) covered by either just the object extent
21968c2ecf20Sopenharmony_ci * or the entire object in the parent image.
21978c2ecf20Sopenharmony_ci */
21988c2ecf20Sopenharmony_cistatic int rbd_obj_calc_img_extents(struct rbd_obj_request *obj_req,
21998c2ecf20Sopenharmony_ci				    bool entire)
22008c2ecf20Sopenharmony_ci{
22018c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
22028c2ecf20Sopenharmony_ci	int ret;
22038c2ecf20Sopenharmony_ci
22048c2ecf20Sopenharmony_ci	if (!rbd_dev->parent_overlap)
22058c2ecf20Sopenharmony_ci		return 0;
22068c2ecf20Sopenharmony_ci
22078c2ecf20Sopenharmony_ci	ret = ceph_extent_to_file(&rbd_dev->layout, obj_req->ex.oe_objno,
22088c2ecf20Sopenharmony_ci				  entire ? 0 : obj_req->ex.oe_off,
22098c2ecf20Sopenharmony_ci				  entire ? rbd_dev->layout.object_size :
22108c2ecf20Sopenharmony_ci							obj_req->ex.oe_len,
22118c2ecf20Sopenharmony_ci				  &obj_req->img_extents,
22128c2ecf20Sopenharmony_ci				  &obj_req->num_img_extents);
22138c2ecf20Sopenharmony_ci	if (ret)
22148c2ecf20Sopenharmony_ci		return ret;
22158c2ecf20Sopenharmony_ci
22168c2ecf20Sopenharmony_ci	prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
22178c2ecf20Sopenharmony_ci		      rbd_dev->parent_overlap);
22188c2ecf20Sopenharmony_ci	return 0;
22198c2ecf20Sopenharmony_ci}
22208c2ecf20Sopenharmony_ci
22218c2ecf20Sopenharmony_cistatic void rbd_osd_setup_data(struct ceph_osd_request *osd_req, int which)
22228c2ecf20Sopenharmony_ci{
22238c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req = osd_req->r_priv;
22248c2ecf20Sopenharmony_ci
22258c2ecf20Sopenharmony_ci	switch (obj_req->img_request->data_type) {
22268c2ecf20Sopenharmony_ci	case OBJ_REQUEST_BIO:
22278c2ecf20Sopenharmony_ci		osd_req_op_extent_osd_data_bio(osd_req, which,
22288c2ecf20Sopenharmony_ci					       &obj_req->bio_pos,
22298c2ecf20Sopenharmony_ci					       obj_req->ex.oe_len);
22308c2ecf20Sopenharmony_ci		break;
22318c2ecf20Sopenharmony_ci	case OBJ_REQUEST_BVECS:
22328c2ecf20Sopenharmony_ci	case OBJ_REQUEST_OWN_BVECS:
22338c2ecf20Sopenharmony_ci		rbd_assert(obj_req->bvec_pos.iter.bi_size ==
22348c2ecf20Sopenharmony_ci							obj_req->ex.oe_len);
22358c2ecf20Sopenharmony_ci		rbd_assert(obj_req->bvec_idx == obj_req->bvec_count);
22368c2ecf20Sopenharmony_ci		osd_req_op_extent_osd_data_bvec_pos(osd_req, which,
22378c2ecf20Sopenharmony_ci						    &obj_req->bvec_pos);
22388c2ecf20Sopenharmony_ci		break;
22398c2ecf20Sopenharmony_ci	default:
22408c2ecf20Sopenharmony_ci		BUG();
22418c2ecf20Sopenharmony_ci	}
22428c2ecf20Sopenharmony_ci}
22438c2ecf20Sopenharmony_ci
22448c2ecf20Sopenharmony_cistatic int rbd_osd_setup_stat(struct ceph_osd_request *osd_req, int which)
22458c2ecf20Sopenharmony_ci{
22468c2ecf20Sopenharmony_ci	struct page **pages;
22478c2ecf20Sopenharmony_ci
22488c2ecf20Sopenharmony_ci	/*
22498c2ecf20Sopenharmony_ci	 * The response data for a STAT call consists of:
22508c2ecf20Sopenharmony_ci	 *     le64 length;
22518c2ecf20Sopenharmony_ci	 *     struct {
22528c2ecf20Sopenharmony_ci	 *         le32 tv_sec;
22538c2ecf20Sopenharmony_ci	 *         le32 tv_nsec;
22548c2ecf20Sopenharmony_ci	 *     } mtime;
22558c2ecf20Sopenharmony_ci	 */
22568c2ecf20Sopenharmony_ci	pages = ceph_alloc_page_vector(1, GFP_NOIO);
22578c2ecf20Sopenharmony_ci	if (IS_ERR(pages))
22588c2ecf20Sopenharmony_ci		return PTR_ERR(pages);
22598c2ecf20Sopenharmony_ci
22608c2ecf20Sopenharmony_ci	osd_req_op_init(osd_req, which, CEPH_OSD_OP_STAT, 0);
22618c2ecf20Sopenharmony_ci	osd_req_op_raw_data_in_pages(osd_req, which, pages,
22628c2ecf20Sopenharmony_ci				     8 + sizeof(struct ceph_timespec),
22638c2ecf20Sopenharmony_ci				     0, false, true);
22648c2ecf20Sopenharmony_ci	return 0;
22658c2ecf20Sopenharmony_ci}
22668c2ecf20Sopenharmony_ci
22678c2ecf20Sopenharmony_cistatic int rbd_osd_setup_copyup(struct ceph_osd_request *osd_req, int which,
22688c2ecf20Sopenharmony_ci				u32 bytes)
22698c2ecf20Sopenharmony_ci{
22708c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req = osd_req->r_priv;
22718c2ecf20Sopenharmony_ci	int ret;
22728c2ecf20Sopenharmony_ci
22738c2ecf20Sopenharmony_ci	ret = osd_req_op_cls_init(osd_req, which, "rbd", "copyup");
22748c2ecf20Sopenharmony_ci	if (ret)
22758c2ecf20Sopenharmony_ci		return ret;
22768c2ecf20Sopenharmony_ci
22778c2ecf20Sopenharmony_ci	osd_req_op_cls_request_data_bvecs(osd_req, which, obj_req->copyup_bvecs,
22788c2ecf20Sopenharmony_ci					  obj_req->copyup_bvec_count, bytes);
22798c2ecf20Sopenharmony_ci	return 0;
22808c2ecf20Sopenharmony_ci}
22818c2ecf20Sopenharmony_ci
22828c2ecf20Sopenharmony_cistatic int rbd_obj_init_read(struct rbd_obj_request *obj_req)
22838c2ecf20Sopenharmony_ci{
22848c2ecf20Sopenharmony_ci	obj_req->read_state = RBD_OBJ_READ_START;
22858c2ecf20Sopenharmony_ci	return 0;
22868c2ecf20Sopenharmony_ci}
22878c2ecf20Sopenharmony_ci
22888c2ecf20Sopenharmony_cistatic void __rbd_osd_setup_write_ops(struct ceph_osd_request *osd_req,
22898c2ecf20Sopenharmony_ci				      int which)
22908c2ecf20Sopenharmony_ci{
22918c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req = osd_req->r_priv;
22928c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
22938c2ecf20Sopenharmony_ci	u16 opcode;
22948c2ecf20Sopenharmony_ci
22958c2ecf20Sopenharmony_ci	if (!use_object_map(rbd_dev) ||
22968c2ecf20Sopenharmony_ci	    !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) {
22978c2ecf20Sopenharmony_ci		osd_req_op_alloc_hint_init(osd_req, which++,
22988c2ecf20Sopenharmony_ci					   rbd_dev->layout.object_size,
22998c2ecf20Sopenharmony_ci					   rbd_dev->layout.object_size,
23008c2ecf20Sopenharmony_ci					   rbd_dev->opts->alloc_hint_flags);
23018c2ecf20Sopenharmony_ci	}
23028c2ecf20Sopenharmony_ci
23038c2ecf20Sopenharmony_ci	if (rbd_obj_is_entire(obj_req))
23048c2ecf20Sopenharmony_ci		opcode = CEPH_OSD_OP_WRITEFULL;
23058c2ecf20Sopenharmony_ci	else
23068c2ecf20Sopenharmony_ci		opcode = CEPH_OSD_OP_WRITE;
23078c2ecf20Sopenharmony_ci
23088c2ecf20Sopenharmony_ci	osd_req_op_extent_init(osd_req, which, opcode,
23098c2ecf20Sopenharmony_ci			       obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
23108c2ecf20Sopenharmony_ci	rbd_osd_setup_data(osd_req, which);
23118c2ecf20Sopenharmony_ci}
23128c2ecf20Sopenharmony_ci
23138c2ecf20Sopenharmony_cistatic int rbd_obj_init_write(struct rbd_obj_request *obj_req)
23148c2ecf20Sopenharmony_ci{
23158c2ecf20Sopenharmony_ci	int ret;
23168c2ecf20Sopenharmony_ci
23178c2ecf20Sopenharmony_ci	/* reverse map the entire object onto the parent */
23188c2ecf20Sopenharmony_ci	ret = rbd_obj_calc_img_extents(obj_req, true);
23198c2ecf20Sopenharmony_ci	if (ret)
23208c2ecf20Sopenharmony_ci		return ret;
23218c2ecf20Sopenharmony_ci
23228c2ecf20Sopenharmony_ci	obj_req->write_state = RBD_OBJ_WRITE_START;
23238c2ecf20Sopenharmony_ci	return 0;
23248c2ecf20Sopenharmony_ci}
23258c2ecf20Sopenharmony_ci
23268c2ecf20Sopenharmony_cistatic u16 truncate_or_zero_opcode(struct rbd_obj_request *obj_req)
23278c2ecf20Sopenharmony_ci{
23288c2ecf20Sopenharmony_ci	return rbd_obj_is_tail(obj_req) ? CEPH_OSD_OP_TRUNCATE :
23298c2ecf20Sopenharmony_ci					  CEPH_OSD_OP_ZERO;
23308c2ecf20Sopenharmony_ci}
23318c2ecf20Sopenharmony_ci
23328c2ecf20Sopenharmony_cistatic void __rbd_osd_setup_discard_ops(struct ceph_osd_request *osd_req,
23338c2ecf20Sopenharmony_ci					int which)
23348c2ecf20Sopenharmony_ci{
23358c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req = osd_req->r_priv;
23368c2ecf20Sopenharmony_ci
23378c2ecf20Sopenharmony_ci	if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) {
23388c2ecf20Sopenharmony_ci		rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION);
23398c2ecf20Sopenharmony_ci		osd_req_op_init(osd_req, which, CEPH_OSD_OP_DELETE, 0);
23408c2ecf20Sopenharmony_ci	} else {
23418c2ecf20Sopenharmony_ci		osd_req_op_extent_init(osd_req, which,
23428c2ecf20Sopenharmony_ci				       truncate_or_zero_opcode(obj_req),
23438c2ecf20Sopenharmony_ci				       obj_req->ex.oe_off, obj_req->ex.oe_len,
23448c2ecf20Sopenharmony_ci				       0, 0);
23458c2ecf20Sopenharmony_ci	}
23468c2ecf20Sopenharmony_ci}
23478c2ecf20Sopenharmony_ci
23488c2ecf20Sopenharmony_cistatic int rbd_obj_init_discard(struct rbd_obj_request *obj_req)
23498c2ecf20Sopenharmony_ci{
23508c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
23518c2ecf20Sopenharmony_ci	u64 off, next_off;
23528c2ecf20Sopenharmony_ci	int ret;
23538c2ecf20Sopenharmony_ci
23548c2ecf20Sopenharmony_ci	/*
23558c2ecf20Sopenharmony_ci	 * Align the range to alloc_size boundary and punt on discards
23568c2ecf20Sopenharmony_ci	 * that are too small to free up any space.
23578c2ecf20Sopenharmony_ci	 *
23588c2ecf20Sopenharmony_ci	 * alloc_size == object_size && is_tail() is a special case for
23598c2ecf20Sopenharmony_ci	 * filestore with filestore_punch_hole = false, needed to allow
23608c2ecf20Sopenharmony_ci	 * truncate (in addition to delete).
23618c2ecf20Sopenharmony_ci	 */
23628c2ecf20Sopenharmony_ci	if (rbd_dev->opts->alloc_size != rbd_dev->layout.object_size ||
23638c2ecf20Sopenharmony_ci	    !rbd_obj_is_tail(obj_req)) {
23648c2ecf20Sopenharmony_ci		off = round_up(obj_req->ex.oe_off, rbd_dev->opts->alloc_size);
23658c2ecf20Sopenharmony_ci		next_off = round_down(obj_req->ex.oe_off + obj_req->ex.oe_len,
23668c2ecf20Sopenharmony_ci				      rbd_dev->opts->alloc_size);
23678c2ecf20Sopenharmony_ci		if (off >= next_off)
23688c2ecf20Sopenharmony_ci			return 1;
23698c2ecf20Sopenharmony_ci
23708c2ecf20Sopenharmony_ci		dout("%s %p %llu~%llu -> %llu~%llu\n", __func__,
23718c2ecf20Sopenharmony_ci		     obj_req, obj_req->ex.oe_off, obj_req->ex.oe_len,
23728c2ecf20Sopenharmony_ci		     off, next_off - off);
23738c2ecf20Sopenharmony_ci		obj_req->ex.oe_off = off;
23748c2ecf20Sopenharmony_ci		obj_req->ex.oe_len = next_off - off;
23758c2ecf20Sopenharmony_ci	}
23768c2ecf20Sopenharmony_ci
23778c2ecf20Sopenharmony_ci	/* reverse map the entire object onto the parent */
23788c2ecf20Sopenharmony_ci	ret = rbd_obj_calc_img_extents(obj_req, true);
23798c2ecf20Sopenharmony_ci	if (ret)
23808c2ecf20Sopenharmony_ci		return ret;
23818c2ecf20Sopenharmony_ci
23828c2ecf20Sopenharmony_ci	obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
23838c2ecf20Sopenharmony_ci	if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents)
23848c2ecf20Sopenharmony_ci		obj_req->flags |= RBD_OBJ_FLAG_DELETION;
23858c2ecf20Sopenharmony_ci
23868c2ecf20Sopenharmony_ci	obj_req->write_state = RBD_OBJ_WRITE_START;
23878c2ecf20Sopenharmony_ci	return 0;
23888c2ecf20Sopenharmony_ci}
23898c2ecf20Sopenharmony_ci
23908c2ecf20Sopenharmony_cistatic void __rbd_osd_setup_zeroout_ops(struct ceph_osd_request *osd_req,
23918c2ecf20Sopenharmony_ci					int which)
23928c2ecf20Sopenharmony_ci{
23938c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req = osd_req->r_priv;
23948c2ecf20Sopenharmony_ci	u16 opcode;
23958c2ecf20Sopenharmony_ci
23968c2ecf20Sopenharmony_ci	if (rbd_obj_is_entire(obj_req)) {
23978c2ecf20Sopenharmony_ci		if (obj_req->num_img_extents) {
23988c2ecf20Sopenharmony_ci			if (!(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED))
23998c2ecf20Sopenharmony_ci				osd_req_op_init(osd_req, which++,
24008c2ecf20Sopenharmony_ci						CEPH_OSD_OP_CREATE, 0);
24018c2ecf20Sopenharmony_ci			opcode = CEPH_OSD_OP_TRUNCATE;
24028c2ecf20Sopenharmony_ci		} else {
24038c2ecf20Sopenharmony_ci			rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION);
24048c2ecf20Sopenharmony_ci			osd_req_op_init(osd_req, which++,
24058c2ecf20Sopenharmony_ci					CEPH_OSD_OP_DELETE, 0);
24068c2ecf20Sopenharmony_ci			opcode = 0;
24078c2ecf20Sopenharmony_ci		}
24088c2ecf20Sopenharmony_ci	} else {
24098c2ecf20Sopenharmony_ci		opcode = truncate_or_zero_opcode(obj_req);
24108c2ecf20Sopenharmony_ci	}
24118c2ecf20Sopenharmony_ci
24128c2ecf20Sopenharmony_ci	if (opcode)
24138c2ecf20Sopenharmony_ci		osd_req_op_extent_init(osd_req, which, opcode,
24148c2ecf20Sopenharmony_ci				       obj_req->ex.oe_off, obj_req->ex.oe_len,
24158c2ecf20Sopenharmony_ci				       0, 0);
24168c2ecf20Sopenharmony_ci}
24178c2ecf20Sopenharmony_ci
24188c2ecf20Sopenharmony_cistatic int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req)
24198c2ecf20Sopenharmony_ci{
24208c2ecf20Sopenharmony_ci	int ret;
24218c2ecf20Sopenharmony_ci
24228c2ecf20Sopenharmony_ci	/* reverse map the entire object onto the parent */
24238c2ecf20Sopenharmony_ci	ret = rbd_obj_calc_img_extents(obj_req, true);
24248c2ecf20Sopenharmony_ci	if (ret)
24258c2ecf20Sopenharmony_ci		return ret;
24268c2ecf20Sopenharmony_ci
24278c2ecf20Sopenharmony_ci	if (!obj_req->num_img_extents) {
24288c2ecf20Sopenharmony_ci		obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT;
24298c2ecf20Sopenharmony_ci		if (rbd_obj_is_entire(obj_req))
24308c2ecf20Sopenharmony_ci			obj_req->flags |= RBD_OBJ_FLAG_DELETION;
24318c2ecf20Sopenharmony_ci	}
24328c2ecf20Sopenharmony_ci
24338c2ecf20Sopenharmony_ci	obj_req->write_state = RBD_OBJ_WRITE_START;
24348c2ecf20Sopenharmony_ci	return 0;
24358c2ecf20Sopenharmony_ci}
24368c2ecf20Sopenharmony_ci
24378c2ecf20Sopenharmony_cistatic int count_write_ops(struct rbd_obj_request *obj_req)
24388c2ecf20Sopenharmony_ci{
24398c2ecf20Sopenharmony_ci	struct rbd_img_request *img_req = obj_req->img_request;
24408c2ecf20Sopenharmony_ci
24418c2ecf20Sopenharmony_ci	switch (img_req->op_type) {
24428c2ecf20Sopenharmony_ci	case OBJ_OP_WRITE:
24438c2ecf20Sopenharmony_ci		if (!use_object_map(img_req->rbd_dev) ||
24448c2ecf20Sopenharmony_ci		    !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST))
24458c2ecf20Sopenharmony_ci			return 2; /* setallochint + write/writefull */
24468c2ecf20Sopenharmony_ci
24478c2ecf20Sopenharmony_ci		return 1; /* write/writefull */
24488c2ecf20Sopenharmony_ci	case OBJ_OP_DISCARD:
24498c2ecf20Sopenharmony_ci		return 1; /* delete/truncate/zero */
24508c2ecf20Sopenharmony_ci	case OBJ_OP_ZEROOUT:
24518c2ecf20Sopenharmony_ci		if (rbd_obj_is_entire(obj_req) && obj_req->num_img_extents &&
24528c2ecf20Sopenharmony_ci		    !(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED))
24538c2ecf20Sopenharmony_ci			return 2; /* create + truncate */
24548c2ecf20Sopenharmony_ci
24558c2ecf20Sopenharmony_ci		return 1; /* delete/truncate/zero */
24568c2ecf20Sopenharmony_ci	default:
24578c2ecf20Sopenharmony_ci		BUG();
24588c2ecf20Sopenharmony_ci	}
24598c2ecf20Sopenharmony_ci}
24608c2ecf20Sopenharmony_ci
24618c2ecf20Sopenharmony_cistatic void rbd_osd_setup_write_ops(struct ceph_osd_request *osd_req,
24628c2ecf20Sopenharmony_ci				    int which)
24638c2ecf20Sopenharmony_ci{
24648c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req = osd_req->r_priv;
24658c2ecf20Sopenharmony_ci
24668c2ecf20Sopenharmony_ci	switch (obj_req->img_request->op_type) {
24678c2ecf20Sopenharmony_ci	case OBJ_OP_WRITE:
24688c2ecf20Sopenharmony_ci		__rbd_osd_setup_write_ops(osd_req, which);
24698c2ecf20Sopenharmony_ci		break;
24708c2ecf20Sopenharmony_ci	case OBJ_OP_DISCARD:
24718c2ecf20Sopenharmony_ci		__rbd_osd_setup_discard_ops(osd_req, which);
24728c2ecf20Sopenharmony_ci		break;
24738c2ecf20Sopenharmony_ci	case OBJ_OP_ZEROOUT:
24748c2ecf20Sopenharmony_ci		__rbd_osd_setup_zeroout_ops(osd_req, which);
24758c2ecf20Sopenharmony_ci		break;
24768c2ecf20Sopenharmony_ci	default:
24778c2ecf20Sopenharmony_ci		BUG();
24788c2ecf20Sopenharmony_ci	}
24798c2ecf20Sopenharmony_ci}
24808c2ecf20Sopenharmony_ci
24818c2ecf20Sopenharmony_ci/*
24828c2ecf20Sopenharmony_ci * Prune the list of object requests (adjust offset and/or length, drop
24838c2ecf20Sopenharmony_ci * redundant requests).  Prepare object request state machines and image
24848c2ecf20Sopenharmony_ci * request state machine for execution.
24858c2ecf20Sopenharmony_ci */
24868c2ecf20Sopenharmony_cistatic int __rbd_img_fill_request(struct rbd_img_request *img_req)
24878c2ecf20Sopenharmony_ci{
24888c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req, *next_obj_req;
24898c2ecf20Sopenharmony_ci	int ret;
24908c2ecf20Sopenharmony_ci
24918c2ecf20Sopenharmony_ci	for_each_obj_request_safe(img_req, obj_req, next_obj_req) {
24928c2ecf20Sopenharmony_ci		switch (img_req->op_type) {
24938c2ecf20Sopenharmony_ci		case OBJ_OP_READ:
24948c2ecf20Sopenharmony_ci			ret = rbd_obj_init_read(obj_req);
24958c2ecf20Sopenharmony_ci			break;
24968c2ecf20Sopenharmony_ci		case OBJ_OP_WRITE:
24978c2ecf20Sopenharmony_ci			ret = rbd_obj_init_write(obj_req);
24988c2ecf20Sopenharmony_ci			break;
24998c2ecf20Sopenharmony_ci		case OBJ_OP_DISCARD:
25008c2ecf20Sopenharmony_ci			ret = rbd_obj_init_discard(obj_req);
25018c2ecf20Sopenharmony_ci			break;
25028c2ecf20Sopenharmony_ci		case OBJ_OP_ZEROOUT:
25038c2ecf20Sopenharmony_ci			ret = rbd_obj_init_zeroout(obj_req);
25048c2ecf20Sopenharmony_ci			break;
25058c2ecf20Sopenharmony_ci		default:
25068c2ecf20Sopenharmony_ci			BUG();
25078c2ecf20Sopenharmony_ci		}
25088c2ecf20Sopenharmony_ci		if (ret < 0)
25098c2ecf20Sopenharmony_ci			return ret;
25108c2ecf20Sopenharmony_ci		if (ret > 0) {
25118c2ecf20Sopenharmony_ci			rbd_img_obj_request_del(img_req, obj_req);
25128c2ecf20Sopenharmony_ci			continue;
25138c2ecf20Sopenharmony_ci		}
25148c2ecf20Sopenharmony_ci	}
25158c2ecf20Sopenharmony_ci
25168c2ecf20Sopenharmony_ci	img_req->state = RBD_IMG_START;
25178c2ecf20Sopenharmony_ci	return 0;
25188c2ecf20Sopenharmony_ci}
25198c2ecf20Sopenharmony_ci
25208c2ecf20Sopenharmony_ciunion rbd_img_fill_iter {
25218c2ecf20Sopenharmony_ci	struct ceph_bio_iter	bio_iter;
25228c2ecf20Sopenharmony_ci	struct ceph_bvec_iter	bvec_iter;
25238c2ecf20Sopenharmony_ci};
25248c2ecf20Sopenharmony_ci
25258c2ecf20Sopenharmony_cistruct rbd_img_fill_ctx {
25268c2ecf20Sopenharmony_ci	enum obj_request_type	pos_type;
25278c2ecf20Sopenharmony_ci	union rbd_img_fill_iter	*pos;
25288c2ecf20Sopenharmony_ci	union rbd_img_fill_iter	iter;
25298c2ecf20Sopenharmony_ci	ceph_object_extent_fn_t	set_pos_fn;
25308c2ecf20Sopenharmony_ci	ceph_object_extent_fn_t	count_fn;
25318c2ecf20Sopenharmony_ci	ceph_object_extent_fn_t	copy_fn;
25328c2ecf20Sopenharmony_ci};
25338c2ecf20Sopenharmony_ci
25348c2ecf20Sopenharmony_cistatic struct ceph_object_extent *alloc_object_extent(void *arg)
25358c2ecf20Sopenharmony_ci{
25368c2ecf20Sopenharmony_ci	struct rbd_img_request *img_req = arg;
25378c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req;
25388c2ecf20Sopenharmony_ci
25398c2ecf20Sopenharmony_ci	obj_req = rbd_obj_request_create();
25408c2ecf20Sopenharmony_ci	if (!obj_req)
25418c2ecf20Sopenharmony_ci		return NULL;
25428c2ecf20Sopenharmony_ci
25438c2ecf20Sopenharmony_ci	rbd_img_obj_request_add(img_req, obj_req);
25448c2ecf20Sopenharmony_ci	return &obj_req->ex;
25458c2ecf20Sopenharmony_ci}
25468c2ecf20Sopenharmony_ci
25478c2ecf20Sopenharmony_ci/*
25488c2ecf20Sopenharmony_ci * While su != os && sc == 1 is technically not fancy (it's the same
25498c2ecf20Sopenharmony_ci * layout as su == os && sc == 1), we can't use the nocopy path for it
25508c2ecf20Sopenharmony_ci * because ->set_pos_fn() should be called only once per object.
25518c2ecf20Sopenharmony_ci * ceph_file_to_extents() invokes action_fn once per stripe unit, so
25528c2ecf20Sopenharmony_ci * treat su != os && sc == 1 as fancy.
25538c2ecf20Sopenharmony_ci */
25548c2ecf20Sopenharmony_cistatic bool rbd_layout_is_fancy(struct ceph_file_layout *l)
25558c2ecf20Sopenharmony_ci{
25568c2ecf20Sopenharmony_ci	return l->stripe_unit != l->object_size;
25578c2ecf20Sopenharmony_ci}
25588c2ecf20Sopenharmony_ci
25598c2ecf20Sopenharmony_cistatic int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req,
25608c2ecf20Sopenharmony_ci				       struct ceph_file_extent *img_extents,
25618c2ecf20Sopenharmony_ci				       u32 num_img_extents,
25628c2ecf20Sopenharmony_ci				       struct rbd_img_fill_ctx *fctx)
25638c2ecf20Sopenharmony_ci{
25648c2ecf20Sopenharmony_ci	u32 i;
25658c2ecf20Sopenharmony_ci	int ret;
25668c2ecf20Sopenharmony_ci
25678c2ecf20Sopenharmony_ci	img_req->data_type = fctx->pos_type;
25688c2ecf20Sopenharmony_ci
25698c2ecf20Sopenharmony_ci	/*
25708c2ecf20Sopenharmony_ci	 * Create object requests and set each object request's starting
25718c2ecf20Sopenharmony_ci	 * position in the provided bio (list) or bio_vec array.
25728c2ecf20Sopenharmony_ci	 */
25738c2ecf20Sopenharmony_ci	fctx->iter = *fctx->pos;
25748c2ecf20Sopenharmony_ci	for (i = 0; i < num_img_extents; i++) {
25758c2ecf20Sopenharmony_ci		ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
25768c2ecf20Sopenharmony_ci					   img_extents[i].fe_off,
25778c2ecf20Sopenharmony_ci					   img_extents[i].fe_len,
25788c2ecf20Sopenharmony_ci					   &img_req->object_extents,
25798c2ecf20Sopenharmony_ci					   alloc_object_extent, img_req,
25808c2ecf20Sopenharmony_ci					   fctx->set_pos_fn, &fctx->iter);
25818c2ecf20Sopenharmony_ci		if (ret)
25828c2ecf20Sopenharmony_ci			return ret;
25838c2ecf20Sopenharmony_ci	}
25848c2ecf20Sopenharmony_ci
25858c2ecf20Sopenharmony_ci	return __rbd_img_fill_request(img_req);
25868c2ecf20Sopenharmony_ci}
25878c2ecf20Sopenharmony_ci
25888c2ecf20Sopenharmony_ci/*
25898c2ecf20Sopenharmony_ci * Map a list of image extents to a list of object extents, create the
25908c2ecf20Sopenharmony_ci * corresponding object requests (normally each to a different object,
25918c2ecf20Sopenharmony_ci * but not always) and add them to @img_req.  For each object request,
25928c2ecf20Sopenharmony_ci * set up its data descriptor to point to the corresponding chunk(s) of
25938c2ecf20Sopenharmony_ci * @fctx->pos data buffer.
25948c2ecf20Sopenharmony_ci *
25958c2ecf20Sopenharmony_ci * Because ceph_file_to_extents() will merge adjacent object extents
25968c2ecf20Sopenharmony_ci * together, each object request's data descriptor may point to multiple
25978c2ecf20Sopenharmony_ci * different chunks of @fctx->pos data buffer.
25988c2ecf20Sopenharmony_ci *
25998c2ecf20Sopenharmony_ci * @fctx->pos data buffer is assumed to be large enough.
26008c2ecf20Sopenharmony_ci */
26018c2ecf20Sopenharmony_cistatic int rbd_img_fill_request(struct rbd_img_request *img_req,
26028c2ecf20Sopenharmony_ci				struct ceph_file_extent *img_extents,
26038c2ecf20Sopenharmony_ci				u32 num_img_extents,
26048c2ecf20Sopenharmony_ci				struct rbd_img_fill_ctx *fctx)
26058c2ecf20Sopenharmony_ci{
26068c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = img_req->rbd_dev;
26078c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req;
26088c2ecf20Sopenharmony_ci	u32 i;
26098c2ecf20Sopenharmony_ci	int ret;
26108c2ecf20Sopenharmony_ci
26118c2ecf20Sopenharmony_ci	if (fctx->pos_type == OBJ_REQUEST_NODATA ||
26128c2ecf20Sopenharmony_ci	    !rbd_layout_is_fancy(&rbd_dev->layout))
26138c2ecf20Sopenharmony_ci		return rbd_img_fill_request_nocopy(img_req, img_extents,
26148c2ecf20Sopenharmony_ci						   num_img_extents, fctx);
26158c2ecf20Sopenharmony_ci
26168c2ecf20Sopenharmony_ci	img_req->data_type = OBJ_REQUEST_OWN_BVECS;
26178c2ecf20Sopenharmony_ci
26188c2ecf20Sopenharmony_ci	/*
26198c2ecf20Sopenharmony_ci	 * Create object requests and determine ->bvec_count for each object
26208c2ecf20Sopenharmony_ci	 * request.  Note that ->bvec_count sum over all object requests may
26218c2ecf20Sopenharmony_ci	 * be greater than the number of bio_vecs in the provided bio (list)
26228c2ecf20Sopenharmony_ci	 * or bio_vec array because when mapped, those bio_vecs can straddle
26238c2ecf20Sopenharmony_ci	 * stripe unit boundaries.
26248c2ecf20Sopenharmony_ci	 */
26258c2ecf20Sopenharmony_ci	fctx->iter = *fctx->pos;
26268c2ecf20Sopenharmony_ci	for (i = 0; i < num_img_extents; i++) {
26278c2ecf20Sopenharmony_ci		ret = ceph_file_to_extents(&rbd_dev->layout,
26288c2ecf20Sopenharmony_ci					   img_extents[i].fe_off,
26298c2ecf20Sopenharmony_ci					   img_extents[i].fe_len,
26308c2ecf20Sopenharmony_ci					   &img_req->object_extents,
26318c2ecf20Sopenharmony_ci					   alloc_object_extent, img_req,
26328c2ecf20Sopenharmony_ci					   fctx->count_fn, &fctx->iter);
26338c2ecf20Sopenharmony_ci		if (ret)
26348c2ecf20Sopenharmony_ci			return ret;
26358c2ecf20Sopenharmony_ci	}
26368c2ecf20Sopenharmony_ci
26378c2ecf20Sopenharmony_ci	for_each_obj_request(img_req, obj_req) {
26388c2ecf20Sopenharmony_ci		obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count,
26398c2ecf20Sopenharmony_ci					      sizeof(*obj_req->bvec_pos.bvecs),
26408c2ecf20Sopenharmony_ci					      GFP_NOIO);
26418c2ecf20Sopenharmony_ci		if (!obj_req->bvec_pos.bvecs)
26428c2ecf20Sopenharmony_ci			return -ENOMEM;
26438c2ecf20Sopenharmony_ci	}
26448c2ecf20Sopenharmony_ci
26458c2ecf20Sopenharmony_ci	/*
26468c2ecf20Sopenharmony_ci	 * Fill in each object request's private bio_vec array, splitting and
26478c2ecf20Sopenharmony_ci	 * rearranging the provided bio_vecs in stripe unit chunks as needed.
26488c2ecf20Sopenharmony_ci	 */
26498c2ecf20Sopenharmony_ci	fctx->iter = *fctx->pos;
26508c2ecf20Sopenharmony_ci	for (i = 0; i < num_img_extents; i++) {
26518c2ecf20Sopenharmony_ci		ret = ceph_iterate_extents(&rbd_dev->layout,
26528c2ecf20Sopenharmony_ci					   img_extents[i].fe_off,
26538c2ecf20Sopenharmony_ci					   img_extents[i].fe_len,
26548c2ecf20Sopenharmony_ci					   &img_req->object_extents,
26558c2ecf20Sopenharmony_ci					   fctx->copy_fn, &fctx->iter);
26568c2ecf20Sopenharmony_ci		if (ret)
26578c2ecf20Sopenharmony_ci			return ret;
26588c2ecf20Sopenharmony_ci	}
26598c2ecf20Sopenharmony_ci
26608c2ecf20Sopenharmony_ci	return __rbd_img_fill_request(img_req);
26618c2ecf20Sopenharmony_ci}
26628c2ecf20Sopenharmony_ci
26638c2ecf20Sopenharmony_cistatic int rbd_img_fill_nodata(struct rbd_img_request *img_req,
26648c2ecf20Sopenharmony_ci			       u64 off, u64 len)
26658c2ecf20Sopenharmony_ci{
26668c2ecf20Sopenharmony_ci	struct ceph_file_extent ex = { off, len };
26678c2ecf20Sopenharmony_ci	union rbd_img_fill_iter dummy = {};
26688c2ecf20Sopenharmony_ci	struct rbd_img_fill_ctx fctx = {
26698c2ecf20Sopenharmony_ci		.pos_type = OBJ_REQUEST_NODATA,
26708c2ecf20Sopenharmony_ci		.pos = &dummy,
26718c2ecf20Sopenharmony_ci	};
26728c2ecf20Sopenharmony_ci
26738c2ecf20Sopenharmony_ci	return rbd_img_fill_request(img_req, &ex, 1, &fctx);
26748c2ecf20Sopenharmony_ci}
26758c2ecf20Sopenharmony_ci
26768c2ecf20Sopenharmony_cistatic void set_bio_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
26778c2ecf20Sopenharmony_ci{
26788c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req =
26798c2ecf20Sopenharmony_ci	    container_of(ex, struct rbd_obj_request, ex);
26808c2ecf20Sopenharmony_ci	struct ceph_bio_iter *it = arg;
26818c2ecf20Sopenharmony_ci
26828c2ecf20Sopenharmony_ci	dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
26838c2ecf20Sopenharmony_ci	obj_req->bio_pos = *it;
26848c2ecf20Sopenharmony_ci	ceph_bio_iter_advance(it, bytes);
26858c2ecf20Sopenharmony_ci}
26868c2ecf20Sopenharmony_ci
26878c2ecf20Sopenharmony_cistatic void count_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
26888c2ecf20Sopenharmony_ci{
26898c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req =
26908c2ecf20Sopenharmony_ci	    container_of(ex, struct rbd_obj_request, ex);
26918c2ecf20Sopenharmony_ci	struct ceph_bio_iter *it = arg;
26928c2ecf20Sopenharmony_ci
26938c2ecf20Sopenharmony_ci	dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
26948c2ecf20Sopenharmony_ci	ceph_bio_iter_advance_step(it, bytes, ({
26958c2ecf20Sopenharmony_ci		obj_req->bvec_count++;
26968c2ecf20Sopenharmony_ci	}));
26978c2ecf20Sopenharmony_ci
26988c2ecf20Sopenharmony_ci}
26998c2ecf20Sopenharmony_ci
27008c2ecf20Sopenharmony_cistatic void copy_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
27018c2ecf20Sopenharmony_ci{
27028c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req =
27038c2ecf20Sopenharmony_ci	    container_of(ex, struct rbd_obj_request, ex);
27048c2ecf20Sopenharmony_ci	struct ceph_bio_iter *it = arg;
27058c2ecf20Sopenharmony_ci
27068c2ecf20Sopenharmony_ci	dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
27078c2ecf20Sopenharmony_ci	ceph_bio_iter_advance_step(it, bytes, ({
27088c2ecf20Sopenharmony_ci		obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
27098c2ecf20Sopenharmony_ci		obj_req->bvec_pos.iter.bi_size += bv.bv_len;
27108c2ecf20Sopenharmony_ci	}));
27118c2ecf20Sopenharmony_ci}
27128c2ecf20Sopenharmony_ci
27138c2ecf20Sopenharmony_cistatic int __rbd_img_fill_from_bio(struct rbd_img_request *img_req,
27148c2ecf20Sopenharmony_ci				   struct ceph_file_extent *img_extents,
27158c2ecf20Sopenharmony_ci				   u32 num_img_extents,
27168c2ecf20Sopenharmony_ci				   struct ceph_bio_iter *bio_pos)
27178c2ecf20Sopenharmony_ci{
27188c2ecf20Sopenharmony_ci	struct rbd_img_fill_ctx fctx = {
27198c2ecf20Sopenharmony_ci		.pos_type = OBJ_REQUEST_BIO,
27208c2ecf20Sopenharmony_ci		.pos = (union rbd_img_fill_iter *)bio_pos,
27218c2ecf20Sopenharmony_ci		.set_pos_fn = set_bio_pos,
27228c2ecf20Sopenharmony_ci		.count_fn = count_bio_bvecs,
27238c2ecf20Sopenharmony_ci		.copy_fn = copy_bio_bvecs,
27248c2ecf20Sopenharmony_ci	};
27258c2ecf20Sopenharmony_ci
27268c2ecf20Sopenharmony_ci	return rbd_img_fill_request(img_req, img_extents, num_img_extents,
27278c2ecf20Sopenharmony_ci				    &fctx);
27288c2ecf20Sopenharmony_ci}
27298c2ecf20Sopenharmony_ci
27308c2ecf20Sopenharmony_cistatic int rbd_img_fill_from_bio(struct rbd_img_request *img_req,
27318c2ecf20Sopenharmony_ci				 u64 off, u64 len, struct bio *bio)
27328c2ecf20Sopenharmony_ci{
27338c2ecf20Sopenharmony_ci	struct ceph_file_extent ex = { off, len };
27348c2ecf20Sopenharmony_ci	struct ceph_bio_iter it = { .bio = bio, .iter = bio->bi_iter };
27358c2ecf20Sopenharmony_ci
27368c2ecf20Sopenharmony_ci	return __rbd_img_fill_from_bio(img_req, &ex, 1, &it);
27378c2ecf20Sopenharmony_ci}
27388c2ecf20Sopenharmony_ci
27398c2ecf20Sopenharmony_cistatic void set_bvec_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
27408c2ecf20Sopenharmony_ci{
27418c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req =
27428c2ecf20Sopenharmony_ci	    container_of(ex, struct rbd_obj_request, ex);
27438c2ecf20Sopenharmony_ci	struct ceph_bvec_iter *it = arg;
27448c2ecf20Sopenharmony_ci
27458c2ecf20Sopenharmony_ci	obj_req->bvec_pos = *it;
27468c2ecf20Sopenharmony_ci	ceph_bvec_iter_shorten(&obj_req->bvec_pos, bytes);
27478c2ecf20Sopenharmony_ci	ceph_bvec_iter_advance(it, bytes);
27488c2ecf20Sopenharmony_ci}
27498c2ecf20Sopenharmony_ci
27508c2ecf20Sopenharmony_cistatic void count_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
27518c2ecf20Sopenharmony_ci{
27528c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req =
27538c2ecf20Sopenharmony_ci	    container_of(ex, struct rbd_obj_request, ex);
27548c2ecf20Sopenharmony_ci	struct ceph_bvec_iter *it = arg;
27558c2ecf20Sopenharmony_ci
27568c2ecf20Sopenharmony_ci	ceph_bvec_iter_advance_step(it, bytes, ({
27578c2ecf20Sopenharmony_ci		obj_req->bvec_count++;
27588c2ecf20Sopenharmony_ci	}));
27598c2ecf20Sopenharmony_ci}
27608c2ecf20Sopenharmony_ci
27618c2ecf20Sopenharmony_cistatic void copy_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
27628c2ecf20Sopenharmony_ci{
27638c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req =
27648c2ecf20Sopenharmony_ci	    container_of(ex, struct rbd_obj_request, ex);
27658c2ecf20Sopenharmony_ci	struct ceph_bvec_iter *it = arg;
27668c2ecf20Sopenharmony_ci
27678c2ecf20Sopenharmony_ci	ceph_bvec_iter_advance_step(it, bytes, ({
27688c2ecf20Sopenharmony_ci		obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
27698c2ecf20Sopenharmony_ci		obj_req->bvec_pos.iter.bi_size += bv.bv_len;
27708c2ecf20Sopenharmony_ci	}));
27718c2ecf20Sopenharmony_ci}
27728c2ecf20Sopenharmony_ci
27738c2ecf20Sopenharmony_cistatic int __rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
27748c2ecf20Sopenharmony_ci				     struct ceph_file_extent *img_extents,
27758c2ecf20Sopenharmony_ci				     u32 num_img_extents,
27768c2ecf20Sopenharmony_ci				     struct ceph_bvec_iter *bvec_pos)
27778c2ecf20Sopenharmony_ci{
27788c2ecf20Sopenharmony_ci	struct rbd_img_fill_ctx fctx = {
27798c2ecf20Sopenharmony_ci		.pos_type = OBJ_REQUEST_BVECS,
27808c2ecf20Sopenharmony_ci		.pos = (union rbd_img_fill_iter *)bvec_pos,
27818c2ecf20Sopenharmony_ci		.set_pos_fn = set_bvec_pos,
27828c2ecf20Sopenharmony_ci		.count_fn = count_bvecs,
27838c2ecf20Sopenharmony_ci		.copy_fn = copy_bvecs,
27848c2ecf20Sopenharmony_ci	};
27858c2ecf20Sopenharmony_ci
27868c2ecf20Sopenharmony_ci	return rbd_img_fill_request(img_req, img_extents, num_img_extents,
27878c2ecf20Sopenharmony_ci				    &fctx);
27888c2ecf20Sopenharmony_ci}
27898c2ecf20Sopenharmony_ci
27908c2ecf20Sopenharmony_cistatic int rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
27918c2ecf20Sopenharmony_ci				   struct ceph_file_extent *img_extents,
27928c2ecf20Sopenharmony_ci				   u32 num_img_extents,
27938c2ecf20Sopenharmony_ci				   struct bio_vec *bvecs)
27948c2ecf20Sopenharmony_ci{
27958c2ecf20Sopenharmony_ci	struct ceph_bvec_iter it = {
27968c2ecf20Sopenharmony_ci		.bvecs = bvecs,
27978c2ecf20Sopenharmony_ci		.iter = { .bi_size = ceph_file_extents_bytes(img_extents,
27988c2ecf20Sopenharmony_ci							     num_img_extents) },
27998c2ecf20Sopenharmony_ci	};
28008c2ecf20Sopenharmony_ci
28018c2ecf20Sopenharmony_ci	return __rbd_img_fill_from_bvecs(img_req, img_extents, num_img_extents,
28028c2ecf20Sopenharmony_ci					 &it);
28038c2ecf20Sopenharmony_ci}
28048c2ecf20Sopenharmony_ci
28058c2ecf20Sopenharmony_cistatic void rbd_img_handle_request_work(struct work_struct *work)
28068c2ecf20Sopenharmony_ci{
28078c2ecf20Sopenharmony_ci	struct rbd_img_request *img_req =
28088c2ecf20Sopenharmony_ci	    container_of(work, struct rbd_img_request, work);
28098c2ecf20Sopenharmony_ci
28108c2ecf20Sopenharmony_ci	rbd_img_handle_request(img_req, img_req->work_result);
28118c2ecf20Sopenharmony_ci}
28128c2ecf20Sopenharmony_ci
28138c2ecf20Sopenharmony_cistatic void rbd_img_schedule(struct rbd_img_request *img_req, int result)
28148c2ecf20Sopenharmony_ci{
28158c2ecf20Sopenharmony_ci	INIT_WORK(&img_req->work, rbd_img_handle_request_work);
28168c2ecf20Sopenharmony_ci	img_req->work_result = result;
28178c2ecf20Sopenharmony_ci	queue_work(rbd_wq, &img_req->work);
28188c2ecf20Sopenharmony_ci}
28198c2ecf20Sopenharmony_ci
28208c2ecf20Sopenharmony_cistatic bool rbd_obj_may_exist(struct rbd_obj_request *obj_req)
28218c2ecf20Sopenharmony_ci{
28228c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
28238c2ecf20Sopenharmony_ci
28248c2ecf20Sopenharmony_ci	if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno)) {
28258c2ecf20Sopenharmony_ci		obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST;
28268c2ecf20Sopenharmony_ci		return true;
28278c2ecf20Sopenharmony_ci	}
28288c2ecf20Sopenharmony_ci
28298c2ecf20Sopenharmony_ci	dout("%s %p objno %llu assuming dne\n", __func__, obj_req,
28308c2ecf20Sopenharmony_ci	     obj_req->ex.oe_objno);
28318c2ecf20Sopenharmony_ci	return false;
28328c2ecf20Sopenharmony_ci}
28338c2ecf20Sopenharmony_ci
28348c2ecf20Sopenharmony_cistatic int rbd_obj_read_object(struct rbd_obj_request *obj_req)
28358c2ecf20Sopenharmony_ci{
28368c2ecf20Sopenharmony_ci	struct ceph_osd_request *osd_req;
28378c2ecf20Sopenharmony_ci	int ret;
28388c2ecf20Sopenharmony_ci
28398c2ecf20Sopenharmony_ci	osd_req = __rbd_obj_add_osd_request(obj_req, NULL, 1);
28408c2ecf20Sopenharmony_ci	if (IS_ERR(osd_req))
28418c2ecf20Sopenharmony_ci		return PTR_ERR(osd_req);
28428c2ecf20Sopenharmony_ci
28438c2ecf20Sopenharmony_ci	osd_req_op_extent_init(osd_req, 0, CEPH_OSD_OP_READ,
28448c2ecf20Sopenharmony_ci			       obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
28458c2ecf20Sopenharmony_ci	rbd_osd_setup_data(osd_req, 0);
28468c2ecf20Sopenharmony_ci	rbd_osd_format_read(osd_req);
28478c2ecf20Sopenharmony_ci
28488c2ecf20Sopenharmony_ci	ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
28498c2ecf20Sopenharmony_ci	if (ret)
28508c2ecf20Sopenharmony_ci		return ret;
28518c2ecf20Sopenharmony_ci
28528c2ecf20Sopenharmony_ci	rbd_osd_submit(osd_req);
28538c2ecf20Sopenharmony_ci	return 0;
28548c2ecf20Sopenharmony_ci}
28558c2ecf20Sopenharmony_ci
28568c2ecf20Sopenharmony_cistatic int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
28578c2ecf20Sopenharmony_ci{
28588c2ecf20Sopenharmony_ci	struct rbd_img_request *img_req = obj_req->img_request;
28598c2ecf20Sopenharmony_ci	struct rbd_device *parent = img_req->rbd_dev->parent;
28608c2ecf20Sopenharmony_ci	struct rbd_img_request *child_img_req;
28618c2ecf20Sopenharmony_ci	int ret;
28628c2ecf20Sopenharmony_ci
28638c2ecf20Sopenharmony_ci	child_img_req = kmem_cache_alloc(rbd_img_request_cache, GFP_NOIO);
28648c2ecf20Sopenharmony_ci	if (!child_img_req)
28658c2ecf20Sopenharmony_ci		return -ENOMEM;
28668c2ecf20Sopenharmony_ci
28678c2ecf20Sopenharmony_ci	rbd_img_request_init(child_img_req, parent, OBJ_OP_READ);
28688c2ecf20Sopenharmony_ci	__set_bit(IMG_REQ_CHILD, &child_img_req->flags);
28698c2ecf20Sopenharmony_ci	child_img_req->obj_request = obj_req;
28708c2ecf20Sopenharmony_ci
28718c2ecf20Sopenharmony_ci	down_read(&parent->header_rwsem);
28728c2ecf20Sopenharmony_ci	rbd_img_capture_header(child_img_req);
28738c2ecf20Sopenharmony_ci	up_read(&parent->header_rwsem);
28748c2ecf20Sopenharmony_ci
28758c2ecf20Sopenharmony_ci	dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req,
28768c2ecf20Sopenharmony_ci	     obj_req);
28778c2ecf20Sopenharmony_ci
28788c2ecf20Sopenharmony_ci	if (!rbd_img_is_write(img_req)) {
28798c2ecf20Sopenharmony_ci		switch (img_req->data_type) {
28808c2ecf20Sopenharmony_ci		case OBJ_REQUEST_BIO:
28818c2ecf20Sopenharmony_ci			ret = __rbd_img_fill_from_bio(child_img_req,
28828c2ecf20Sopenharmony_ci						      obj_req->img_extents,
28838c2ecf20Sopenharmony_ci						      obj_req->num_img_extents,
28848c2ecf20Sopenharmony_ci						      &obj_req->bio_pos);
28858c2ecf20Sopenharmony_ci			break;
28868c2ecf20Sopenharmony_ci		case OBJ_REQUEST_BVECS:
28878c2ecf20Sopenharmony_ci		case OBJ_REQUEST_OWN_BVECS:
28888c2ecf20Sopenharmony_ci			ret = __rbd_img_fill_from_bvecs(child_img_req,
28898c2ecf20Sopenharmony_ci						      obj_req->img_extents,
28908c2ecf20Sopenharmony_ci						      obj_req->num_img_extents,
28918c2ecf20Sopenharmony_ci						      &obj_req->bvec_pos);
28928c2ecf20Sopenharmony_ci			break;
28938c2ecf20Sopenharmony_ci		default:
28948c2ecf20Sopenharmony_ci			BUG();
28958c2ecf20Sopenharmony_ci		}
28968c2ecf20Sopenharmony_ci	} else {
28978c2ecf20Sopenharmony_ci		ret = rbd_img_fill_from_bvecs(child_img_req,
28988c2ecf20Sopenharmony_ci					      obj_req->img_extents,
28998c2ecf20Sopenharmony_ci					      obj_req->num_img_extents,
29008c2ecf20Sopenharmony_ci					      obj_req->copyup_bvecs);
29018c2ecf20Sopenharmony_ci	}
29028c2ecf20Sopenharmony_ci	if (ret) {
29038c2ecf20Sopenharmony_ci		rbd_img_request_destroy(child_img_req);
29048c2ecf20Sopenharmony_ci		return ret;
29058c2ecf20Sopenharmony_ci	}
29068c2ecf20Sopenharmony_ci
29078c2ecf20Sopenharmony_ci	/* avoid parent chain recursion */
29088c2ecf20Sopenharmony_ci	rbd_img_schedule(child_img_req, 0);
29098c2ecf20Sopenharmony_ci	return 0;
29108c2ecf20Sopenharmony_ci}
29118c2ecf20Sopenharmony_ci
29128c2ecf20Sopenharmony_cistatic bool rbd_obj_advance_read(struct rbd_obj_request *obj_req, int *result)
29138c2ecf20Sopenharmony_ci{
29148c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
29158c2ecf20Sopenharmony_ci	int ret;
29168c2ecf20Sopenharmony_ci
29178c2ecf20Sopenharmony_ciagain:
29188c2ecf20Sopenharmony_ci	switch (obj_req->read_state) {
29198c2ecf20Sopenharmony_ci	case RBD_OBJ_READ_START:
29208c2ecf20Sopenharmony_ci		rbd_assert(!*result);
29218c2ecf20Sopenharmony_ci
29228c2ecf20Sopenharmony_ci		if (!rbd_obj_may_exist(obj_req)) {
29238c2ecf20Sopenharmony_ci			*result = -ENOENT;
29248c2ecf20Sopenharmony_ci			obj_req->read_state = RBD_OBJ_READ_OBJECT;
29258c2ecf20Sopenharmony_ci			goto again;
29268c2ecf20Sopenharmony_ci		}
29278c2ecf20Sopenharmony_ci
29288c2ecf20Sopenharmony_ci		ret = rbd_obj_read_object(obj_req);
29298c2ecf20Sopenharmony_ci		if (ret) {
29308c2ecf20Sopenharmony_ci			*result = ret;
29318c2ecf20Sopenharmony_ci			return true;
29328c2ecf20Sopenharmony_ci		}
29338c2ecf20Sopenharmony_ci		obj_req->read_state = RBD_OBJ_READ_OBJECT;
29348c2ecf20Sopenharmony_ci		return false;
29358c2ecf20Sopenharmony_ci	case RBD_OBJ_READ_OBJECT:
29368c2ecf20Sopenharmony_ci		if (*result == -ENOENT && rbd_dev->parent_overlap) {
29378c2ecf20Sopenharmony_ci			/* reverse map this object extent onto the parent */
29388c2ecf20Sopenharmony_ci			ret = rbd_obj_calc_img_extents(obj_req, false);
29398c2ecf20Sopenharmony_ci			if (ret) {
29408c2ecf20Sopenharmony_ci				*result = ret;
29418c2ecf20Sopenharmony_ci				return true;
29428c2ecf20Sopenharmony_ci			}
29438c2ecf20Sopenharmony_ci			if (obj_req->num_img_extents) {
29448c2ecf20Sopenharmony_ci				ret = rbd_obj_read_from_parent(obj_req);
29458c2ecf20Sopenharmony_ci				if (ret) {
29468c2ecf20Sopenharmony_ci					*result = ret;
29478c2ecf20Sopenharmony_ci					return true;
29488c2ecf20Sopenharmony_ci				}
29498c2ecf20Sopenharmony_ci				obj_req->read_state = RBD_OBJ_READ_PARENT;
29508c2ecf20Sopenharmony_ci				return false;
29518c2ecf20Sopenharmony_ci			}
29528c2ecf20Sopenharmony_ci		}
29538c2ecf20Sopenharmony_ci
29548c2ecf20Sopenharmony_ci		/*
29558c2ecf20Sopenharmony_ci		 * -ENOENT means a hole in the image -- zero-fill the entire
29568c2ecf20Sopenharmony_ci		 * length of the request.  A short read also implies zero-fill
29578c2ecf20Sopenharmony_ci		 * to the end of the request.
29588c2ecf20Sopenharmony_ci		 */
29598c2ecf20Sopenharmony_ci		if (*result == -ENOENT) {
29608c2ecf20Sopenharmony_ci			rbd_obj_zero_range(obj_req, 0, obj_req->ex.oe_len);
29618c2ecf20Sopenharmony_ci			*result = 0;
29628c2ecf20Sopenharmony_ci		} else if (*result >= 0) {
29638c2ecf20Sopenharmony_ci			if (*result < obj_req->ex.oe_len)
29648c2ecf20Sopenharmony_ci				rbd_obj_zero_range(obj_req, *result,
29658c2ecf20Sopenharmony_ci						obj_req->ex.oe_len - *result);
29668c2ecf20Sopenharmony_ci			else
29678c2ecf20Sopenharmony_ci				rbd_assert(*result == obj_req->ex.oe_len);
29688c2ecf20Sopenharmony_ci			*result = 0;
29698c2ecf20Sopenharmony_ci		}
29708c2ecf20Sopenharmony_ci		return true;
29718c2ecf20Sopenharmony_ci	case RBD_OBJ_READ_PARENT:
29728c2ecf20Sopenharmony_ci		/*
29738c2ecf20Sopenharmony_ci		 * The parent image is read only up to the overlap -- zero-fill
29748c2ecf20Sopenharmony_ci		 * from the overlap to the end of the request.
29758c2ecf20Sopenharmony_ci		 */
29768c2ecf20Sopenharmony_ci		if (!*result) {
29778c2ecf20Sopenharmony_ci			u32 obj_overlap = rbd_obj_img_extents_bytes(obj_req);
29788c2ecf20Sopenharmony_ci
29798c2ecf20Sopenharmony_ci			if (obj_overlap < obj_req->ex.oe_len)
29808c2ecf20Sopenharmony_ci				rbd_obj_zero_range(obj_req, obj_overlap,
29818c2ecf20Sopenharmony_ci					    obj_req->ex.oe_len - obj_overlap);
29828c2ecf20Sopenharmony_ci		}
29838c2ecf20Sopenharmony_ci		return true;
29848c2ecf20Sopenharmony_ci	default:
29858c2ecf20Sopenharmony_ci		BUG();
29868c2ecf20Sopenharmony_ci	}
29878c2ecf20Sopenharmony_ci}
29888c2ecf20Sopenharmony_ci
29898c2ecf20Sopenharmony_cistatic bool rbd_obj_write_is_noop(struct rbd_obj_request *obj_req)
29908c2ecf20Sopenharmony_ci{
29918c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
29928c2ecf20Sopenharmony_ci
29938c2ecf20Sopenharmony_ci	if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno))
29948c2ecf20Sopenharmony_ci		obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST;
29958c2ecf20Sopenharmony_ci
29968c2ecf20Sopenharmony_ci	if (!(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST) &&
29978c2ecf20Sopenharmony_ci	    (obj_req->flags & RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT)) {
29988c2ecf20Sopenharmony_ci		dout("%s %p noop for nonexistent\n", __func__, obj_req);
29998c2ecf20Sopenharmony_ci		return true;
30008c2ecf20Sopenharmony_ci	}
30018c2ecf20Sopenharmony_ci
30028c2ecf20Sopenharmony_ci	return false;
30038c2ecf20Sopenharmony_ci}
30048c2ecf20Sopenharmony_ci
30058c2ecf20Sopenharmony_ci/*
30068c2ecf20Sopenharmony_ci * Return:
30078c2ecf20Sopenharmony_ci *   0 - object map update sent
30088c2ecf20Sopenharmony_ci *   1 - object map update isn't needed
30098c2ecf20Sopenharmony_ci *  <0 - error
30108c2ecf20Sopenharmony_ci */
30118c2ecf20Sopenharmony_cistatic int rbd_obj_write_pre_object_map(struct rbd_obj_request *obj_req)
30128c2ecf20Sopenharmony_ci{
30138c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
30148c2ecf20Sopenharmony_ci	u8 new_state;
30158c2ecf20Sopenharmony_ci
30168c2ecf20Sopenharmony_ci	if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
30178c2ecf20Sopenharmony_ci		return 1;
30188c2ecf20Sopenharmony_ci
30198c2ecf20Sopenharmony_ci	if (obj_req->flags & RBD_OBJ_FLAG_DELETION)
30208c2ecf20Sopenharmony_ci		new_state = OBJECT_PENDING;
30218c2ecf20Sopenharmony_ci	else
30228c2ecf20Sopenharmony_ci		new_state = OBJECT_EXISTS;
30238c2ecf20Sopenharmony_ci
30248c2ecf20Sopenharmony_ci	return rbd_object_map_update(obj_req, CEPH_NOSNAP, new_state, NULL);
30258c2ecf20Sopenharmony_ci}
30268c2ecf20Sopenharmony_ci
30278c2ecf20Sopenharmony_cistatic int rbd_obj_write_object(struct rbd_obj_request *obj_req)
30288c2ecf20Sopenharmony_ci{
30298c2ecf20Sopenharmony_ci	struct ceph_osd_request *osd_req;
30308c2ecf20Sopenharmony_ci	int num_ops = count_write_ops(obj_req);
30318c2ecf20Sopenharmony_ci	int which = 0;
30328c2ecf20Sopenharmony_ci	int ret;
30338c2ecf20Sopenharmony_ci
30348c2ecf20Sopenharmony_ci	if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED)
30358c2ecf20Sopenharmony_ci		num_ops++; /* stat */
30368c2ecf20Sopenharmony_ci
30378c2ecf20Sopenharmony_ci	osd_req = rbd_obj_add_osd_request(obj_req, num_ops);
30388c2ecf20Sopenharmony_ci	if (IS_ERR(osd_req))
30398c2ecf20Sopenharmony_ci		return PTR_ERR(osd_req);
30408c2ecf20Sopenharmony_ci
30418c2ecf20Sopenharmony_ci	if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) {
30428c2ecf20Sopenharmony_ci		ret = rbd_osd_setup_stat(osd_req, which++);
30438c2ecf20Sopenharmony_ci		if (ret)
30448c2ecf20Sopenharmony_ci			return ret;
30458c2ecf20Sopenharmony_ci	}
30468c2ecf20Sopenharmony_ci
30478c2ecf20Sopenharmony_ci	rbd_osd_setup_write_ops(osd_req, which);
30488c2ecf20Sopenharmony_ci	rbd_osd_format_write(osd_req);
30498c2ecf20Sopenharmony_ci
30508c2ecf20Sopenharmony_ci	ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
30518c2ecf20Sopenharmony_ci	if (ret)
30528c2ecf20Sopenharmony_ci		return ret;
30538c2ecf20Sopenharmony_ci
30548c2ecf20Sopenharmony_ci	rbd_osd_submit(osd_req);
30558c2ecf20Sopenharmony_ci	return 0;
30568c2ecf20Sopenharmony_ci}
30578c2ecf20Sopenharmony_ci
30588c2ecf20Sopenharmony_ci/*
30598c2ecf20Sopenharmony_ci * copyup_bvecs pages are never highmem pages
30608c2ecf20Sopenharmony_ci */
30618c2ecf20Sopenharmony_cistatic bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
30628c2ecf20Sopenharmony_ci{
30638c2ecf20Sopenharmony_ci	struct ceph_bvec_iter it = {
30648c2ecf20Sopenharmony_ci		.bvecs = bvecs,
30658c2ecf20Sopenharmony_ci		.iter = { .bi_size = bytes },
30668c2ecf20Sopenharmony_ci	};
30678c2ecf20Sopenharmony_ci
30688c2ecf20Sopenharmony_ci	ceph_bvec_iter_advance_step(&it, bytes, ({
30698c2ecf20Sopenharmony_ci		if (memchr_inv(page_address(bv.bv_page) + bv.bv_offset, 0,
30708c2ecf20Sopenharmony_ci			       bv.bv_len))
30718c2ecf20Sopenharmony_ci			return false;
30728c2ecf20Sopenharmony_ci	}));
30738c2ecf20Sopenharmony_ci	return true;
30748c2ecf20Sopenharmony_ci}
30758c2ecf20Sopenharmony_ci
30768c2ecf20Sopenharmony_ci#define MODS_ONLY	U32_MAX
30778c2ecf20Sopenharmony_ci
30788c2ecf20Sopenharmony_cistatic int rbd_obj_copyup_empty_snapc(struct rbd_obj_request *obj_req,
30798c2ecf20Sopenharmony_ci				      u32 bytes)
30808c2ecf20Sopenharmony_ci{
30818c2ecf20Sopenharmony_ci	struct ceph_osd_request *osd_req;
30828c2ecf20Sopenharmony_ci	int ret;
30838c2ecf20Sopenharmony_ci
30848c2ecf20Sopenharmony_ci	dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
30858c2ecf20Sopenharmony_ci	rbd_assert(bytes > 0 && bytes != MODS_ONLY);
30868c2ecf20Sopenharmony_ci
30878c2ecf20Sopenharmony_ci	osd_req = __rbd_obj_add_osd_request(obj_req, &rbd_empty_snapc, 1);
30888c2ecf20Sopenharmony_ci	if (IS_ERR(osd_req))
30898c2ecf20Sopenharmony_ci		return PTR_ERR(osd_req);
30908c2ecf20Sopenharmony_ci
30918c2ecf20Sopenharmony_ci	ret = rbd_osd_setup_copyup(osd_req, 0, bytes);
30928c2ecf20Sopenharmony_ci	if (ret)
30938c2ecf20Sopenharmony_ci		return ret;
30948c2ecf20Sopenharmony_ci
30958c2ecf20Sopenharmony_ci	rbd_osd_format_write(osd_req);
30968c2ecf20Sopenharmony_ci
30978c2ecf20Sopenharmony_ci	ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
30988c2ecf20Sopenharmony_ci	if (ret)
30998c2ecf20Sopenharmony_ci		return ret;
31008c2ecf20Sopenharmony_ci
31018c2ecf20Sopenharmony_ci	rbd_osd_submit(osd_req);
31028c2ecf20Sopenharmony_ci	return 0;
31038c2ecf20Sopenharmony_ci}
31048c2ecf20Sopenharmony_ci
31058c2ecf20Sopenharmony_cistatic int rbd_obj_copyup_current_snapc(struct rbd_obj_request *obj_req,
31068c2ecf20Sopenharmony_ci					u32 bytes)
31078c2ecf20Sopenharmony_ci{
31088c2ecf20Sopenharmony_ci	struct ceph_osd_request *osd_req;
31098c2ecf20Sopenharmony_ci	int num_ops = count_write_ops(obj_req);
31108c2ecf20Sopenharmony_ci	int which = 0;
31118c2ecf20Sopenharmony_ci	int ret;
31128c2ecf20Sopenharmony_ci
31138c2ecf20Sopenharmony_ci	dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
31148c2ecf20Sopenharmony_ci
31158c2ecf20Sopenharmony_ci	if (bytes != MODS_ONLY)
31168c2ecf20Sopenharmony_ci		num_ops++; /* copyup */
31178c2ecf20Sopenharmony_ci
31188c2ecf20Sopenharmony_ci	osd_req = rbd_obj_add_osd_request(obj_req, num_ops);
31198c2ecf20Sopenharmony_ci	if (IS_ERR(osd_req))
31208c2ecf20Sopenharmony_ci		return PTR_ERR(osd_req);
31218c2ecf20Sopenharmony_ci
31228c2ecf20Sopenharmony_ci	if (bytes != MODS_ONLY) {
31238c2ecf20Sopenharmony_ci		ret = rbd_osd_setup_copyup(osd_req, which++, bytes);
31248c2ecf20Sopenharmony_ci		if (ret)
31258c2ecf20Sopenharmony_ci			return ret;
31268c2ecf20Sopenharmony_ci	}
31278c2ecf20Sopenharmony_ci
31288c2ecf20Sopenharmony_ci	rbd_osd_setup_write_ops(osd_req, which);
31298c2ecf20Sopenharmony_ci	rbd_osd_format_write(osd_req);
31308c2ecf20Sopenharmony_ci
31318c2ecf20Sopenharmony_ci	ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO);
31328c2ecf20Sopenharmony_ci	if (ret)
31338c2ecf20Sopenharmony_ci		return ret;
31348c2ecf20Sopenharmony_ci
31358c2ecf20Sopenharmony_ci	rbd_osd_submit(osd_req);
31368c2ecf20Sopenharmony_ci	return 0;
31378c2ecf20Sopenharmony_ci}
31388c2ecf20Sopenharmony_ci
31398c2ecf20Sopenharmony_cistatic int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
31408c2ecf20Sopenharmony_ci{
31418c2ecf20Sopenharmony_ci	u32 i;
31428c2ecf20Sopenharmony_ci
31438c2ecf20Sopenharmony_ci	rbd_assert(!obj_req->copyup_bvecs);
31448c2ecf20Sopenharmony_ci	obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap);
31458c2ecf20Sopenharmony_ci	obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count,
31468c2ecf20Sopenharmony_ci					sizeof(*obj_req->copyup_bvecs),
31478c2ecf20Sopenharmony_ci					GFP_NOIO);
31488c2ecf20Sopenharmony_ci	if (!obj_req->copyup_bvecs)
31498c2ecf20Sopenharmony_ci		return -ENOMEM;
31508c2ecf20Sopenharmony_ci
31518c2ecf20Sopenharmony_ci	for (i = 0; i < obj_req->copyup_bvec_count; i++) {
31528c2ecf20Sopenharmony_ci		unsigned int len = min(obj_overlap, (u64)PAGE_SIZE);
31538c2ecf20Sopenharmony_ci
31548c2ecf20Sopenharmony_ci		obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO);
31558c2ecf20Sopenharmony_ci		if (!obj_req->copyup_bvecs[i].bv_page)
31568c2ecf20Sopenharmony_ci			return -ENOMEM;
31578c2ecf20Sopenharmony_ci
31588c2ecf20Sopenharmony_ci		obj_req->copyup_bvecs[i].bv_offset = 0;
31598c2ecf20Sopenharmony_ci		obj_req->copyup_bvecs[i].bv_len = len;
31608c2ecf20Sopenharmony_ci		obj_overlap -= len;
31618c2ecf20Sopenharmony_ci	}
31628c2ecf20Sopenharmony_ci
31638c2ecf20Sopenharmony_ci	rbd_assert(!obj_overlap);
31648c2ecf20Sopenharmony_ci	return 0;
31658c2ecf20Sopenharmony_ci}
31668c2ecf20Sopenharmony_ci
31678c2ecf20Sopenharmony_ci/*
31688c2ecf20Sopenharmony_ci * The target object doesn't exist.  Read the data for the entire
31698c2ecf20Sopenharmony_ci * target object up to the overlap point (if any) from the parent,
31708c2ecf20Sopenharmony_ci * so we can use it for a copyup.
31718c2ecf20Sopenharmony_ci */
31728c2ecf20Sopenharmony_cistatic int rbd_obj_copyup_read_parent(struct rbd_obj_request *obj_req)
31738c2ecf20Sopenharmony_ci{
31748c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
31758c2ecf20Sopenharmony_ci	int ret;
31768c2ecf20Sopenharmony_ci
31778c2ecf20Sopenharmony_ci	rbd_assert(obj_req->num_img_extents);
31788c2ecf20Sopenharmony_ci	prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
31798c2ecf20Sopenharmony_ci		      rbd_dev->parent_overlap);
31808c2ecf20Sopenharmony_ci	if (!obj_req->num_img_extents) {
31818c2ecf20Sopenharmony_ci		/*
31828c2ecf20Sopenharmony_ci		 * The overlap has become 0 (most likely because the
31838c2ecf20Sopenharmony_ci		 * image has been flattened).  Re-submit the original write
31848c2ecf20Sopenharmony_ci		 * request -- pass MODS_ONLY since the copyup isn't needed
31858c2ecf20Sopenharmony_ci		 * anymore.
31868c2ecf20Sopenharmony_ci		 */
31878c2ecf20Sopenharmony_ci		return rbd_obj_copyup_current_snapc(obj_req, MODS_ONLY);
31888c2ecf20Sopenharmony_ci	}
31898c2ecf20Sopenharmony_ci
31908c2ecf20Sopenharmony_ci	ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req));
31918c2ecf20Sopenharmony_ci	if (ret)
31928c2ecf20Sopenharmony_ci		return ret;
31938c2ecf20Sopenharmony_ci
31948c2ecf20Sopenharmony_ci	return rbd_obj_read_from_parent(obj_req);
31958c2ecf20Sopenharmony_ci}
31968c2ecf20Sopenharmony_ci
31978c2ecf20Sopenharmony_cistatic void rbd_obj_copyup_object_maps(struct rbd_obj_request *obj_req)
31988c2ecf20Sopenharmony_ci{
31998c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
32008c2ecf20Sopenharmony_ci	struct ceph_snap_context *snapc = obj_req->img_request->snapc;
32018c2ecf20Sopenharmony_ci	u8 new_state;
32028c2ecf20Sopenharmony_ci	u32 i;
32038c2ecf20Sopenharmony_ci	int ret;
32048c2ecf20Sopenharmony_ci
32058c2ecf20Sopenharmony_ci	rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending);
32068c2ecf20Sopenharmony_ci
32078c2ecf20Sopenharmony_ci	if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
32088c2ecf20Sopenharmony_ci		return;
32098c2ecf20Sopenharmony_ci
32108c2ecf20Sopenharmony_ci	if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS)
32118c2ecf20Sopenharmony_ci		return;
32128c2ecf20Sopenharmony_ci
32138c2ecf20Sopenharmony_ci	for (i = 0; i < snapc->num_snaps; i++) {
32148c2ecf20Sopenharmony_ci		if ((rbd_dev->header.features & RBD_FEATURE_FAST_DIFF) &&
32158c2ecf20Sopenharmony_ci		    i + 1 < snapc->num_snaps)
32168c2ecf20Sopenharmony_ci			new_state = OBJECT_EXISTS_CLEAN;
32178c2ecf20Sopenharmony_ci		else
32188c2ecf20Sopenharmony_ci			new_state = OBJECT_EXISTS;
32198c2ecf20Sopenharmony_ci
32208c2ecf20Sopenharmony_ci		ret = rbd_object_map_update(obj_req, snapc->snaps[i],
32218c2ecf20Sopenharmony_ci					    new_state, NULL);
32228c2ecf20Sopenharmony_ci		if (ret < 0) {
32238c2ecf20Sopenharmony_ci			obj_req->pending.result = ret;
32248c2ecf20Sopenharmony_ci			return;
32258c2ecf20Sopenharmony_ci		}
32268c2ecf20Sopenharmony_ci
32278c2ecf20Sopenharmony_ci		rbd_assert(!ret);
32288c2ecf20Sopenharmony_ci		obj_req->pending.num_pending++;
32298c2ecf20Sopenharmony_ci	}
32308c2ecf20Sopenharmony_ci}
32318c2ecf20Sopenharmony_ci
32328c2ecf20Sopenharmony_cistatic void rbd_obj_copyup_write_object(struct rbd_obj_request *obj_req)
32338c2ecf20Sopenharmony_ci{
32348c2ecf20Sopenharmony_ci	u32 bytes = rbd_obj_img_extents_bytes(obj_req);
32358c2ecf20Sopenharmony_ci	int ret;
32368c2ecf20Sopenharmony_ci
32378c2ecf20Sopenharmony_ci	rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending);
32388c2ecf20Sopenharmony_ci
32398c2ecf20Sopenharmony_ci	/*
32408c2ecf20Sopenharmony_ci	 * Only send non-zero copyup data to save some I/O and network
32418c2ecf20Sopenharmony_ci	 * bandwidth -- zero copyup data is equivalent to the object not
32428c2ecf20Sopenharmony_ci	 * existing.
32438c2ecf20Sopenharmony_ci	 */
32448c2ecf20Sopenharmony_ci	if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS)
32458c2ecf20Sopenharmony_ci		bytes = 0;
32468c2ecf20Sopenharmony_ci
32478c2ecf20Sopenharmony_ci	if (obj_req->img_request->snapc->num_snaps && bytes > 0) {
32488c2ecf20Sopenharmony_ci		/*
32498c2ecf20Sopenharmony_ci		 * Send a copyup request with an empty snapshot context to
32508c2ecf20Sopenharmony_ci		 * deep-copyup the object through all existing snapshots.
32518c2ecf20Sopenharmony_ci		 * A second request with the current snapshot context will be
32528c2ecf20Sopenharmony_ci		 * sent for the actual modification.
32538c2ecf20Sopenharmony_ci		 */
32548c2ecf20Sopenharmony_ci		ret = rbd_obj_copyup_empty_snapc(obj_req, bytes);
32558c2ecf20Sopenharmony_ci		if (ret) {
32568c2ecf20Sopenharmony_ci			obj_req->pending.result = ret;
32578c2ecf20Sopenharmony_ci			return;
32588c2ecf20Sopenharmony_ci		}
32598c2ecf20Sopenharmony_ci
32608c2ecf20Sopenharmony_ci		obj_req->pending.num_pending++;
32618c2ecf20Sopenharmony_ci		bytes = MODS_ONLY;
32628c2ecf20Sopenharmony_ci	}
32638c2ecf20Sopenharmony_ci
32648c2ecf20Sopenharmony_ci	ret = rbd_obj_copyup_current_snapc(obj_req, bytes);
32658c2ecf20Sopenharmony_ci	if (ret) {
32668c2ecf20Sopenharmony_ci		obj_req->pending.result = ret;
32678c2ecf20Sopenharmony_ci		return;
32688c2ecf20Sopenharmony_ci	}
32698c2ecf20Sopenharmony_ci
32708c2ecf20Sopenharmony_ci	obj_req->pending.num_pending++;
32718c2ecf20Sopenharmony_ci}
32728c2ecf20Sopenharmony_ci
32738c2ecf20Sopenharmony_cistatic bool rbd_obj_advance_copyup(struct rbd_obj_request *obj_req, int *result)
32748c2ecf20Sopenharmony_ci{
32758c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
32768c2ecf20Sopenharmony_ci	int ret;
32778c2ecf20Sopenharmony_ci
32788c2ecf20Sopenharmony_ciagain:
32798c2ecf20Sopenharmony_ci	switch (obj_req->copyup_state) {
32808c2ecf20Sopenharmony_ci	case RBD_OBJ_COPYUP_START:
32818c2ecf20Sopenharmony_ci		rbd_assert(!*result);
32828c2ecf20Sopenharmony_ci
32838c2ecf20Sopenharmony_ci		ret = rbd_obj_copyup_read_parent(obj_req);
32848c2ecf20Sopenharmony_ci		if (ret) {
32858c2ecf20Sopenharmony_ci			*result = ret;
32868c2ecf20Sopenharmony_ci			return true;
32878c2ecf20Sopenharmony_ci		}
32888c2ecf20Sopenharmony_ci		if (obj_req->num_img_extents)
32898c2ecf20Sopenharmony_ci			obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT;
32908c2ecf20Sopenharmony_ci		else
32918c2ecf20Sopenharmony_ci			obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
32928c2ecf20Sopenharmony_ci		return false;
32938c2ecf20Sopenharmony_ci	case RBD_OBJ_COPYUP_READ_PARENT:
32948c2ecf20Sopenharmony_ci		if (*result)
32958c2ecf20Sopenharmony_ci			return true;
32968c2ecf20Sopenharmony_ci
32978c2ecf20Sopenharmony_ci		if (is_zero_bvecs(obj_req->copyup_bvecs,
32988c2ecf20Sopenharmony_ci				  rbd_obj_img_extents_bytes(obj_req))) {
32998c2ecf20Sopenharmony_ci			dout("%s %p detected zeros\n", __func__, obj_req);
33008c2ecf20Sopenharmony_ci			obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS;
33018c2ecf20Sopenharmony_ci		}
33028c2ecf20Sopenharmony_ci
33038c2ecf20Sopenharmony_ci		rbd_obj_copyup_object_maps(obj_req);
33048c2ecf20Sopenharmony_ci		if (!obj_req->pending.num_pending) {
33058c2ecf20Sopenharmony_ci			*result = obj_req->pending.result;
33068c2ecf20Sopenharmony_ci			obj_req->copyup_state = RBD_OBJ_COPYUP_OBJECT_MAPS;
33078c2ecf20Sopenharmony_ci			goto again;
33088c2ecf20Sopenharmony_ci		}
33098c2ecf20Sopenharmony_ci		obj_req->copyup_state = __RBD_OBJ_COPYUP_OBJECT_MAPS;
33108c2ecf20Sopenharmony_ci		return false;
33118c2ecf20Sopenharmony_ci	case __RBD_OBJ_COPYUP_OBJECT_MAPS:
33128c2ecf20Sopenharmony_ci		if (!pending_result_dec(&obj_req->pending, result))
33138c2ecf20Sopenharmony_ci			return false;
33148c2ecf20Sopenharmony_ci		fallthrough;
33158c2ecf20Sopenharmony_ci	case RBD_OBJ_COPYUP_OBJECT_MAPS:
33168c2ecf20Sopenharmony_ci		if (*result) {
33178c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "snap object map update failed: %d",
33188c2ecf20Sopenharmony_ci				 *result);
33198c2ecf20Sopenharmony_ci			return true;
33208c2ecf20Sopenharmony_ci		}
33218c2ecf20Sopenharmony_ci
33228c2ecf20Sopenharmony_ci		rbd_obj_copyup_write_object(obj_req);
33238c2ecf20Sopenharmony_ci		if (!obj_req->pending.num_pending) {
33248c2ecf20Sopenharmony_ci			*result = obj_req->pending.result;
33258c2ecf20Sopenharmony_ci			obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
33268c2ecf20Sopenharmony_ci			goto again;
33278c2ecf20Sopenharmony_ci		}
33288c2ecf20Sopenharmony_ci		obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT;
33298c2ecf20Sopenharmony_ci		return false;
33308c2ecf20Sopenharmony_ci	case __RBD_OBJ_COPYUP_WRITE_OBJECT:
33318c2ecf20Sopenharmony_ci		if (!pending_result_dec(&obj_req->pending, result))
33328c2ecf20Sopenharmony_ci			return false;
33338c2ecf20Sopenharmony_ci		fallthrough;
33348c2ecf20Sopenharmony_ci	case RBD_OBJ_COPYUP_WRITE_OBJECT:
33358c2ecf20Sopenharmony_ci		return true;
33368c2ecf20Sopenharmony_ci	default:
33378c2ecf20Sopenharmony_ci		BUG();
33388c2ecf20Sopenharmony_ci	}
33398c2ecf20Sopenharmony_ci}
33408c2ecf20Sopenharmony_ci
33418c2ecf20Sopenharmony_ci/*
33428c2ecf20Sopenharmony_ci * Return:
33438c2ecf20Sopenharmony_ci *   0 - object map update sent
33448c2ecf20Sopenharmony_ci *   1 - object map update isn't needed
33458c2ecf20Sopenharmony_ci *  <0 - error
33468c2ecf20Sopenharmony_ci */
33478c2ecf20Sopenharmony_cistatic int rbd_obj_write_post_object_map(struct rbd_obj_request *obj_req)
33488c2ecf20Sopenharmony_ci{
33498c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
33508c2ecf20Sopenharmony_ci	u8 current_state = OBJECT_PENDING;
33518c2ecf20Sopenharmony_ci
33528c2ecf20Sopenharmony_ci	if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
33538c2ecf20Sopenharmony_ci		return 1;
33548c2ecf20Sopenharmony_ci
33558c2ecf20Sopenharmony_ci	if (!(obj_req->flags & RBD_OBJ_FLAG_DELETION))
33568c2ecf20Sopenharmony_ci		return 1;
33578c2ecf20Sopenharmony_ci
33588c2ecf20Sopenharmony_ci	return rbd_object_map_update(obj_req, CEPH_NOSNAP, OBJECT_NONEXISTENT,
33598c2ecf20Sopenharmony_ci				     &current_state);
33608c2ecf20Sopenharmony_ci}
33618c2ecf20Sopenharmony_ci
33628c2ecf20Sopenharmony_cistatic bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
33638c2ecf20Sopenharmony_ci{
33648c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
33658c2ecf20Sopenharmony_ci	int ret;
33668c2ecf20Sopenharmony_ci
33678c2ecf20Sopenharmony_ciagain:
33688c2ecf20Sopenharmony_ci	switch (obj_req->write_state) {
33698c2ecf20Sopenharmony_ci	case RBD_OBJ_WRITE_START:
33708c2ecf20Sopenharmony_ci		rbd_assert(!*result);
33718c2ecf20Sopenharmony_ci
33728c2ecf20Sopenharmony_ci		rbd_obj_set_copyup_enabled(obj_req);
33738c2ecf20Sopenharmony_ci		if (rbd_obj_write_is_noop(obj_req))
33748c2ecf20Sopenharmony_ci			return true;
33758c2ecf20Sopenharmony_ci
33768c2ecf20Sopenharmony_ci		ret = rbd_obj_write_pre_object_map(obj_req);
33778c2ecf20Sopenharmony_ci		if (ret < 0) {
33788c2ecf20Sopenharmony_ci			*result = ret;
33798c2ecf20Sopenharmony_ci			return true;
33808c2ecf20Sopenharmony_ci		}
33818c2ecf20Sopenharmony_ci		obj_req->write_state = RBD_OBJ_WRITE_PRE_OBJECT_MAP;
33828c2ecf20Sopenharmony_ci		if (ret > 0)
33838c2ecf20Sopenharmony_ci			goto again;
33848c2ecf20Sopenharmony_ci		return false;
33858c2ecf20Sopenharmony_ci	case RBD_OBJ_WRITE_PRE_OBJECT_MAP:
33868c2ecf20Sopenharmony_ci		if (*result) {
33878c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "pre object map update failed: %d",
33888c2ecf20Sopenharmony_ci				 *result);
33898c2ecf20Sopenharmony_ci			return true;
33908c2ecf20Sopenharmony_ci		}
33918c2ecf20Sopenharmony_ci		ret = rbd_obj_write_object(obj_req);
33928c2ecf20Sopenharmony_ci		if (ret) {
33938c2ecf20Sopenharmony_ci			*result = ret;
33948c2ecf20Sopenharmony_ci			return true;
33958c2ecf20Sopenharmony_ci		}
33968c2ecf20Sopenharmony_ci		obj_req->write_state = RBD_OBJ_WRITE_OBJECT;
33978c2ecf20Sopenharmony_ci		return false;
33988c2ecf20Sopenharmony_ci	case RBD_OBJ_WRITE_OBJECT:
33998c2ecf20Sopenharmony_ci		if (*result == -ENOENT) {
34008c2ecf20Sopenharmony_ci			if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) {
34018c2ecf20Sopenharmony_ci				*result = 0;
34028c2ecf20Sopenharmony_ci				obj_req->copyup_state = RBD_OBJ_COPYUP_START;
34038c2ecf20Sopenharmony_ci				obj_req->write_state = __RBD_OBJ_WRITE_COPYUP;
34048c2ecf20Sopenharmony_ci				goto again;
34058c2ecf20Sopenharmony_ci			}
34068c2ecf20Sopenharmony_ci			/*
34078c2ecf20Sopenharmony_ci			 * On a non-existent object:
34088c2ecf20Sopenharmony_ci			 *   delete - -ENOENT, truncate/zero - 0
34098c2ecf20Sopenharmony_ci			 */
34108c2ecf20Sopenharmony_ci			if (obj_req->flags & RBD_OBJ_FLAG_DELETION)
34118c2ecf20Sopenharmony_ci				*result = 0;
34128c2ecf20Sopenharmony_ci		}
34138c2ecf20Sopenharmony_ci		if (*result)
34148c2ecf20Sopenharmony_ci			return true;
34158c2ecf20Sopenharmony_ci
34168c2ecf20Sopenharmony_ci		obj_req->write_state = RBD_OBJ_WRITE_COPYUP;
34178c2ecf20Sopenharmony_ci		goto again;
34188c2ecf20Sopenharmony_ci	case __RBD_OBJ_WRITE_COPYUP:
34198c2ecf20Sopenharmony_ci		if (!rbd_obj_advance_copyup(obj_req, result))
34208c2ecf20Sopenharmony_ci			return false;
34218c2ecf20Sopenharmony_ci		fallthrough;
34228c2ecf20Sopenharmony_ci	case RBD_OBJ_WRITE_COPYUP:
34238c2ecf20Sopenharmony_ci		if (*result) {
34248c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "copyup failed: %d", *result);
34258c2ecf20Sopenharmony_ci			return true;
34268c2ecf20Sopenharmony_ci		}
34278c2ecf20Sopenharmony_ci		ret = rbd_obj_write_post_object_map(obj_req);
34288c2ecf20Sopenharmony_ci		if (ret < 0) {
34298c2ecf20Sopenharmony_ci			*result = ret;
34308c2ecf20Sopenharmony_ci			return true;
34318c2ecf20Sopenharmony_ci		}
34328c2ecf20Sopenharmony_ci		obj_req->write_state = RBD_OBJ_WRITE_POST_OBJECT_MAP;
34338c2ecf20Sopenharmony_ci		if (ret > 0)
34348c2ecf20Sopenharmony_ci			goto again;
34358c2ecf20Sopenharmony_ci		return false;
34368c2ecf20Sopenharmony_ci	case RBD_OBJ_WRITE_POST_OBJECT_MAP:
34378c2ecf20Sopenharmony_ci		if (*result)
34388c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "post object map update failed: %d",
34398c2ecf20Sopenharmony_ci				 *result);
34408c2ecf20Sopenharmony_ci		return true;
34418c2ecf20Sopenharmony_ci	default:
34428c2ecf20Sopenharmony_ci		BUG();
34438c2ecf20Sopenharmony_ci	}
34448c2ecf20Sopenharmony_ci}
34458c2ecf20Sopenharmony_ci
34468c2ecf20Sopenharmony_ci/*
34478c2ecf20Sopenharmony_ci * Return true if @obj_req is completed.
34488c2ecf20Sopenharmony_ci */
34498c2ecf20Sopenharmony_cistatic bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req,
34508c2ecf20Sopenharmony_ci				     int *result)
34518c2ecf20Sopenharmony_ci{
34528c2ecf20Sopenharmony_ci	struct rbd_img_request *img_req = obj_req->img_request;
34538c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = img_req->rbd_dev;
34548c2ecf20Sopenharmony_ci	bool done;
34558c2ecf20Sopenharmony_ci
34568c2ecf20Sopenharmony_ci	mutex_lock(&obj_req->state_mutex);
34578c2ecf20Sopenharmony_ci	if (!rbd_img_is_write(img_req))
34588c2ecf20Sopenharmony_ci		done = rbd_obj_advance_read(obj_req, result);
34598c2ecf20Sopenharmony_ci	else
34608c2ecf20Sopenharmony_ci		done = rbd_obj_advance_write(obj_req, result);
34618c2ecf20Sopenharmony_ci	mutex_unlock(&obj_req->state_mutex);
34628c2ecf20Sopenharmony_ci
34638c2ecf20Sopenharmony_ci	if (done && *result) {
34648c2ecf20Sopenharmony_ci		rbd_assert(*result < 0);
34658c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "%s at objno %llu %llu~%llu result %d",
34668c2ecf20Sopenharmony_ci			 obj_op_name(img_req->op_type), obj_req->ex.oe_objno,
34678c2ecf20Sopenharmony_ci			 obj_req->ex.oe_off, obj_req->ex.oe_len, *result);
34688c2ecf20Sopenharmony_ci	}
34698c2ecf20Sopenharmony_ci	return done;
34708c2ecf20Sopenharmony_ci}
34718c2ecf20Sopenharmony_ci
34728c2ecf20Sopenharmony_ci/*
34738c2ecf20Sopenharmony_ci * This is open-coded in rbd_img_handle_request() to avoid parent chain
34748c2ecf20Sopenharmony_ci * recursion.
34758c2ecf20Sopenharmony_ci */
34768c2ecf20Sopenharmony_cistatic void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result)
34778c2ecf20Sopenharmony_ci{
34788c2ecf20Sopenharmony_ci	if (__rbd_obj_handle_request(obj_req, &result))
34798c2ecf20Sopenharmony_ci		rbd_img_handle_request(obj_req->img_request, result);
34808c2ecf20Sopenharmony_ci}
34818c2ecf20Sopenharmony_ci
34828c2ecf20Sopenharmony_cistatic bool need_exclusive_lock(struct rbd_img_request *img_req)
34838c2ecf20Sopenharmony_ci{
34848c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = img_req->rbd_dev;
34858c2ecf20Sopenharmony_ci
34868c2ecf20Sopenharmony_ci	if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK))
34878c2ecf20Sopenharmony_ci		return false;
34888c2ecf20Sopenharmony_ci
34898c2ecf20Sopenharmony_ci	if (rbd_is_ro(rbd_dev))
34908c2ecf20Sopenharmony_ci		return false;
34918c2ecf20Sopenharmony_ci
34928c2ecf20Sopenharmony_ci	rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
34938c2ecf20Sopenharmony_ci	if (rbd_dev->opts->lock_on_read ||
34948c2ecf20Sopenharmony_ci	    (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP))
34958c2ecf20Sopenharmony_ci		return true;
34968c2ecf20Sopenharmony_ci
34978c2ecf20Sopenharmony_ci	return rbd_img_is_write(img_req);
34988c2ecf20Sopenharmony_ci}
34998c2ecf20Sopenharmony_ci
35008c2ecf20Sopenharmony_cistatic bool rbd_lock_add_request(struct rbd_img_request *img_req)
35018c2ecf20Sopenharmony_ci{
35028c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = img_req->rbd_dev;
35038c2ecf20Sopenharmony_ci	bool locked;
35048c2ecf20Sopenharmony_ci
35058c2ecf20Sopenharmony_ci	lockdep_assert_held(&rbd_dev->lock_rwsem);
35068c2ecf20Sopenharmony_ci	locked = rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED;
35078c2ecf20Sopenharmony_ci	spin_lock(&rbd_dev->lock_lists_lock);
35088c2ecf20Sopenharmony_ci	rbd_assert(list_empty(&img_req->lock_item));
35098c2ecf20Sopenharmony_ci	if (!locked)
35108c2ecf20Sopenharmony_ci		list_add_tail(&img_req->lock_item, &rbd_dev->acquiring_list);
35118c2ecf20Sopenharmony_ci	else
35128c2ecf20Sopenharmony_ci		list_add_tail(&img_req->lock_item, &rbd_dev->running_list);
35138c2ecf20Sopenharmony_ci	spin_unlock(&rbd_dev->lock_lists_lock);
35148c2ecf20Sopenharmony_ci	return locked;
35158c2ecf20Sopenharmony_ci}
35168c2ecf20Sopenharmony_ci
35178c2ecf20Sopenharmony_cistatic void rbd_lock_del_request(struct rbd_img_request *img_req)
35188c2ecf20Sopenharmony_ci{
35198c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = img_req->rbd_dev;
35208c2ecf20Sopenharmony_ci	bool need_wakeup = false;
35218c2ecf20Sopenharmony_ci
35228c2ecf20Sopenharmony_ci	lockdep_assert_held(&rbd_dev->lock_rwsem);
35238c2ecf20Sopenharmony_ci	spin_lock(&rbd_dev->lock_lists_lock);
35248c2ecf20Sopenharmony_ci	if (!list_empty(&img_req->lock_item)) {
35258c2ecf20Sopenharmony_ci		list_del_init(&img_req->lock_item);
35268c2ecf20Sopenharmony_ci		need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING &&
35278c2ecf20Sopenharmony_ci			       list_empty(&rbd_dev->running_list));
35288c2ecf20Sopenharmony_ci	}
35298c2ecf20Sopenharmony_ci	spin_unlock(&rbd_dev->lock_lists_lock);
35308c2ecf20Sopenharmony_ci	if (need_wakeup)
35318c2ecf20Sopenharmony_ci		complete(&rbd_dev->releasing_wait);
35328c2ecf20Sopenharmony_ci}
35338c2ecf20Sopenharmony_ci
35348c2ecf20Sopenharmony_cistatic int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
35358c2ecf20Sopenharmony_ci{
35368c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = img_req->rbd_dev;
35378c2ecf20Sopenharmony_ci
35388c2ecf20Sopenharmony_ci	if (!need_exclusive_lock(img_req))
35398c2ecf20Sopenharmony_ci		return 1;
35408c2ecf20Sopenharmony_ci
35418c2ecf20Sopenharmony_ci	if (rbd_lock_add_request(img_req))
35428c2ecf20Sopenharmony_ci		return 1;
35438c2ecf20Sopenharmony_ci
35448c2ecf20Sopenharmony_ci	if (rbd_dev->opts->exclusive) {
35458c2ecf20Sopenharmony_ci		WARN_ON(1); /* lock got released? */
35468c2ecf20Sopenharmony_ci		return -EROFS;
35478c2ecf20Sopenharmony_ci	}
35488c2ecf20Sopenharmony_ci
35498c2ecf20Sopenharmony_ci	/*
35508c2ecf20Sopenharmony_ci	 * Note the use of mod_delayed_work() in rbd_acquire_lock()
35518c2ecf20Sopenharmony_ci	 * and cancel_delayed_work() in wake_lock_waiters().
35528c2ecf20Sopenharmony_ci	 */
35538c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p queueing lock_dwork\n", __func__, rbd_dev);
35548c2ecf20Sopenharmony_ci	queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
35558c2ecf20Sopenharmony_ci	return 0;
35568c2ecf20Sopenharmony_ci}
35578c2ecf20Sopenharmony_ci
35588c2ecf20Sopenharmony_cistatic void rbd_img_object_requests(struct rbd_img_request *img_req)
35598c2ecf20Sopenharmony_ci{
35608c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = img_req->rbd_dev;
35618c2ecf20Sopenharmony_ci	struct rbd_obj_request *obj_req;
35628c2ecf20Sopenharmony_ci
35638c2ecf20Sopenharmony_ci	rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
35648c2ecf20Sopenharmony_ci	rbd_assert(!need_exclusive_lock(img_req) ||
35658c2ecf20Sopenharmony_ci		   __rbd_is_lock_owner(rbd_dev));
35668c2ecf20Sopenharmony_ci
35678c2ecf20Sopenharmony_ci	if (rbd_img_is_write(img_req)) {
35688c2ecf20Sopenharmony_ci		rbd_assert(!img_req->snapc);
35698c2ecf20Sopenharmony_ci		down_read(&rbd_dev->header_rwsem);
35708c2ecf20Sopenharmony_ci		img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc);
35718c2ecf20Sopenharmony_ci		up_read(&rbd_dev->header_rwsem);
35728c2ecf20Sopenharmony_ci	}
35738c2ecf20Sopenharmony_ci
35748c2ecf20Sopenharmony_ci	for_each_obj_request(img_req, obj_req) {
35758c2ecf20Sopenharmony_ci		int result = 0;
35768c2ecf20Sopenharmony_ci
35778c2ecf20Sopenharmony_ci		if (__rbd_obj_handle_request(obj_req, &result)) {
35788c2ecf20Sopenharmony_ci			if (result) {
35798c2ecf20Sopenharmony_ci				img_req->pending.result = result;
35808c2ecf20Sopenharmony_ci				return;
35818c2ecf20Sopenharmony_ci			}
35828c2ecf20Sopenharmony_ci		} else {
35838c2ecf20Sopenharmony_ci			img_req->pending.num_pending++;
35848c2ecf20Sopenharmony_ci		}
35858c2ecf20Sopenharmony_ci	}
35868c2ecf20Sopenharmony_ci}
35878c2ecf20Sopenharmony_ci
35888c2ecf20Sopenharmony_cistatic bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
35898c2ecf20Sopenharmony_ci{
35908c2ecf20Sopenharmony_ci	int ret;
35918c2ecf20Sopenharmony_ci
35928c2ecf20Sopenharmony_ciagain:
35938c2ecf20Sopenharmony_ci	switch (img_req->state) {
35948c2ecf20Sopenharmony_ci	case RBD_IMG_START:
35958c2ecf20Sopenharmony_ci		rbd_assert(!*result);
35968c2ecf20Sopenharmony_ci
35978c2ecf20Sopenharmony_ci		ret = rbd_img_exclusive_lock(img_req);
35988c2ecf20Sopenharmony_ci		if (ret < 0) {
35998c2ecf20Sopenharmony_ci			*result = ret;
36008c2ecf20Sopenharmony_ci			return true;
36018c2ecf20Sopenharmony_ci		}
36028c2ecf20Sopenharmony_ci		img_req->state = RBD_IMG_EXCLUSIVE_LOCK;
36038c2ecf20Sopenharmony_ci		if (ret > 0)
36048c2ecf20Sopenharmony_ci			goto again;
36058c2ecf20Sopenharmony_ci		return false;
36068c2ecf20Sopenharmony_ci	case RBD_IMG_EXCLUSIVE_LOCK:
36078c2ecf20Sopenharmony_ci		if (*result)
36088c2ecf20Sopenharmony_ci			return true;
36098c2ecf20Sopenharmony_ci
36108c2ecf20Sopenharmony_ci		rbd_img_object_requests(img_req);
36118c2ecf20Sopenharmony_ci		if (!img_req->pending.num_pending) {
36128c2ecf20Sopenharmony_ci			*result = img_req->pending.result;
36138c2ecf20Sopenharmony_ci			img_req->state = RBD_IMG_OBJECT_REQUESTS;
36148c2ecf20Sopenharmony_ci			goto again;
36158c2ecf20Sopenharmony_ci		}
36168c2ecf20Sopenharmony_ci		img_req->state = __RBD_IMG_OBJECT_REQUESTS;
36178c2ecf20Sopenharmony_ci		return false;
36188c2ecf20Sopenharmony_ci	case __RBD_IMG_OBJECT_REQUESTS:
36198c2ecf20Sopenharmony_ci		if (!pending_result_dec(&img_req->pending, result))
36208c2ecf20Sopenharmony_ci			return false;
36218c2ecf20Sopenharmony_ci		fallthrough;
36228c2ecf20Sopenharmony_ci	case RBD_IMG_OBJECT_REQUESTS:
36238c2ecf20Sopenharmony_ci		return true;
36248c2ecf20Sopenharmony_ci	default:
36258c2ecf20Sopenharmony_ci		BUG();
36268c2ecf20Sopenharmony_ci	}
36278c2ecf20Sopenharmony_ci}
36288c2ecf20Sopenharmony_ci
36298c2ecf20Sopenharmony_ci/*
36308c2ecf20Sopenharmony_ci * Return true if @img_req is completed.
36318c2ecf20Sopenharmony_ci */
36328c2ecf20Sopenharmony_cistatic bool __rbd_img_handle_request(struct rbd_img_request *img_req,
36338c2ecf20Sopenharmony_ci				     int *result)
36348c2ecf20Sopenharmony_ci{
36358c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = img_req->rbd_dev;
36368c2ecf20Sopenharmony_ci	bool done;
36378c2ecf20Sopenharmony_ci
36388c2ecf20Sopenharmony_ci	if (need_exclusive_lock(img_req)) {
36398c2ecf20Sopenharmony_ci		down_read(&rbd_dev->lock_rwsem);
36408c2ecf20Sopenharmony_ci		mutex_lock(&img_req->state_mutex);
36418c2ecf20Sopenharmony_ci		done = rbd_img_advance(img_req, result);
36428c2ecf20Sopenharmony_ci		if (done)
36438c2ecf20Sopenharmony_ci			rbd_lock_del_request(img_req);
36448c2ecf20Sopenharmony_ci		mutex_unlock(&img_req->state_mutex);
36458c2ecf20Sopenharmony_ci		up_read(&rbd_dev->lock_rwsem);
36468c2ecf20Sopenharmony_ci	} else {
36478c2ecf20Sopenharmony_ci		mutex_lock(&img_req->state_mutex);
36488c2ecf20Sopenharmony_ci		done = rbd_img_advance(img_req, result);
36498c2ecf20Sopenharmony_ci		mutex_unlock(&img_req->state_mutex);
36508c2ecf20Sopenharmony_ci	}
36518c2ecf20Sopenharmony_ci
36528c2ecf20Sopenharmony_ci	if (done && *result) {
36538c2ecf20Sopenharmony_ci		rbd_assert(*result < 0);
36548c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "%s%s result %d",
36558c2ecf20Sopenharmony_ci		      test_bit(IMG_REQ_CHILD, &img_req->flags) ? "child " : "",
36568c2ecf20Sopenharmony_ci		      obj_op_name(img_req->op_type), *result);
36578c2ecf20Sopenharmony_ci	}
36588c2ecf20Sopenharmony_ci	return done;
36598c2ecf20Sopenharmony_ci}
36608c2ecf20Sopenharmony_ci
36618c2ecf20Sopenharmony_cistatic void rbd_img_handle_request(struct rbd_img_request *img_req, int result)
36628c2ecf20Sopenharmony_ci{
36638c2ecf20Sopenharmony_ciagain:
36648c2ecf20Sopenharmony_ci	if (!__rbd_img_handle_request(img_req, &result))
36658c2ecf20Sopenharmony_ci		return;
36668c2ecf20Sopenharmony_ci
36678c2ecf20Sopenharmony_ci	if (test_bit(IMG_REQ_CHILD, &img_req->flags)) {
36688c2ecf20Sopenharmony_ci		struct rbd_obj_request *obj_req = img_req->obj_request;
36698c2ecf20Sopenharmony_ci
36708c2ecf20Sopenharmony_ci		rbd_img_request_destroy(img_req);
36718c2ecf20Sopenharmony_ci		if (__rbd_obj_handle_request(obj_req, &result)) {
36728c2ecf20Sopenharmony_ci			img_req = obj_req->img_request;
36738c2ecf20Sopenharmony_ci			goto again;
36748c2ecf20Sopenharmony_ci		}
36758c2ecf20Sopenharmony_ci	} else {
36768c2ecf20Sopenharmony_ci		struct request *rq = blk_mq_rq_from_pdu(img_req);
36778c2ecf20Sopenharmony_ci
36788c2ecf20Sopenharmony_ci		rbd_img_request_destroy(img_req);
36798c2ecf20Sopenharmony_ci		blk_mq_end_request(rq, errno_to_blk_status(result));
36808c2ecf20Sopenharmony_ci	}
36818c2ecf20Sopenharmony_ci}
36828c2ecf20Sopenharmony_ci
36838c2ecf20Sopenharmony_cistatic const struct rbd_client_id rbd_empty_cid;
36848c2ecf20Sopenharmony_ci
36858c2ecf20Sopenharmony_cistatic bool rbd_cid_equal(const struct rbd_client_id *lhs,
36868c2ecf20Sopenharmony_ci			  const struct rbd_client_id *rhs)
36878c2ecf20Sopenharmony_ci{
36888c2ecf20Sopenharmony_ci	return lhs->gid == rhs->gid && lhs->handle == rhs->handle;
36898c2ecf20Sopenharmony_ci}
36908c2ecf20Sopenharmony_ci
36918c2ecf20Sopenharmony_cistatic struct rbd_client_id rbd_get_cid(struct rbd_device *rbd_dev)
36928c2ecf20Sopenharmony_ci{
36938c2ecf20Sopenharmony_ci	struct rbd_client_id cid;
36948c2ecf20Sopenharmony_ci
36958c2ecf20Sopenharmony_ci	mutex_lock(&rbd_dev->watch_mutex);
36968c2ecf20Sopenharmony_ci	cid.gid = ceph_client_gid(rbd_dev->rbd_client->client);
36978c2ecf20Sopenharmony_ci	cid.handle = rbd_dev->watch_cookie;
36988c2ecf20Sopenharmony_ci	mutex_unlock(&rbd_dev->watch_mutex);
36998c2ecf20Sopenharmony_ci	return cid;
37008c2ecf20Sopenharmony_ci}
37018c2ecf20Sopenharmony_ci
37028c2ecf20Sopenharmony_ci/*
37038c2ecf20Sopenharmony_ci * lock_rwsem must be held for write
37048c2ecf20Sopenharmony_ci */
37058c2ecf20Sopenharmony_cistatic void rbd_set_owner_cid(struct rbd_device *rbd_dev,
37068c2ecf20Sopenharmony_ci			      const struct rbd_client_id *cid)
37078c2ecf20Sopenharmony_ci{
37088c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p %llu-%llu -> %llu-%llu\n", __func__, rbd_dev,
37098c2ecf20Sopenharmony_ci	     rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle,
37108c2ecf20Sopenharmony_ci	     cid->gid, cid->handle);
37118c2ecf20Sopenharmony_ci	rbd_dev->owner_cid = *cid; /* struct */
37128c2ecf20Sopenharmony_ci}
37138c2ecf20Sopenharmony_ci
37148c2ecf20Sopenharmony_cistatic void format_lock_cookie(struct rbd_device *rbd_dev, char *buf)
37158c2ecf20Sopenharmony_ci{
37168c2ecf20Sopenharmony_ci	mutex_lock(&rbd_dev->watch_mutex);
37178c2ecf20Sopenharmony_ci	sprintf(buf, "%s %llu", RBD_LOCK_COOKIE_PREFIX, rbd_dev->watch_cookie);
37188c2ecf20Sopenharmony_ci	mutex_unlock(&rbd_dev->watch_mutex);
37198c2ecf20Sopenharmony_ci}
37208c2ecf20Sopenharmony_ci
37218c2ecf20Sopenharmony_cistatic void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie)
37228c2ecf20Sopenharmony_ci{
37238c2ecf20Sopenharmony_ci	struct rbd_client_id cid = rbd_get_cid(rbd_dev);
37248c2ecf20Sopenharmony_ci
37258c2ecf20Sopenharmony_ci	rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
37268c2ecf20Sopenharmony_ci	strcpy(rbd_dev->lock_cookie, cookie);
37278c2ecf20Sopenharmony_ci	rbd_set_owner_cid(rbd_dev, &cid);
37288c2ecf20Sopenharmony_ci	queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
37298c2ecf20Sopenharmony_ci}
37308c2ecf20Sopenharmony_ci
37318c2ecf20Sopenharmony_ci/*
37328c2ecf20Sopenharmony_ci * lock_rwsem must be held for write
37338c2ecf20Sopenharmony_ci */
37348c2ecf20Sopenharmony_cistatic int rbd_lock(struct rbd_device *rbd_dev)
37358c2ecf20Sopenharmony_ci{
37368c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
37378c2ecf20Sopenharmony_ci	char cookie[32];
37388c2ecf20Sopenharmony_ci	int ret;
37398c2ecf20Sopenharmony_ci
37408c2ecf20Sopenharmony_ci	WARN_ON(__rbd_is_lock_owner(rbd_dev) ||
37418c2ecf20Sopenharmony_ci		rbd_dev->lock_cookie[0] != '\0');
37428c2ecf20Sopenharmony_ci
37438c2ecf20Sopenharmony_ci	format_lock_cookie(rbd_dev, cookie);
37448c2ecf20Sopenharmony_ci	ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
37458c2ecf20Sopenharmony_ci			    RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie,
37468c2ecf20Sopenharmony_ci			    RBD_LOCK_TAG, "", 0);
37478c2ecf20Sopenharmony_ci	if (ret && ret != -EEXIST)
37488c2ecf20Sopenharmony_ci		return ret;
37498c2ecf20Sopenharmony_ci
37508c2ecf20Sopenharmony_ci	__rbd_lock(rbd_dev, cookie);
37518c2ecf20Sopenharmony_ci	return 0;
37528c2ecf20Sopenharmony_ci}
37538c2ecf20Sopenharmony_ci
37548c2ecf20Sopenharmony_ci/*
37558c2ecf20Sopenharmony_ci * lock_rwsem must be held for write
37568c2ecf20Sopenharmony_ci */
37578c2ecf20Sopenharmony_cistatic void rbd_unlock(struct rbd_device *rbd_dev)
37588c2ecf20Sopenharmony_ci{
37598c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
37608c2ecf20Sopenharmony_ci	int ret;
37618c2ecf20Sopenharmony_ci
37628c2ecf20Sopenharmony_ci	WARN_ON(!__rbd_is_lock_owner(rbd_dev) ||
37638c2ecf20Sopenharmony_ci		rbd_dev->lock_cookie[0] == '\0');
37648c2ecf20Sopenharmony_ci
37658c2ecf20Sopenharmony_ci	ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
37668c2ecf20Sopenharmony_ci			      RBD_LOCK_NAME, rbd_dev->lock_cookie);
37678c2ecf20Sopenharmony_ci	if (ret && ret != -ENOENT)
37688c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to unlock header: %d", ret);
37698c2ecf20Sopenharmony_ci
37708c2ecf20Sopenharmony_ci	/* treat errors as the image is unlocked */
37718c2ecf20Sopenharmony_ci	rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
37728c2ecf20Sopenharmony_ci	rbd_dev->lock_cookie[0] = '\0';
37738c2ecf20Sopenharmony_ci	rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
37748c2ecf20Sopenharmony_ci	queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work);
37758c2ecf20Sopenharmony_ci}
37768c2ecf20Sopenharmony_ci
37778c2ecf20Sopenharmony_cistatic int __rbd_notify_op_lock(struct rbd_device *rbd_dev,
37788c2ecf20Sopenharmony_ci				enum rbd_notify_op notify_op,
37798c2ecf20Sopenharmony_ci				struct page ***preply_pages,
37808c2ecf20Sopenharmony_ci				size_t *preply_len)
37818c2ecf20Sopenharmony_ci{
37828c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
37838c2ecf20Sopenharmony_ci	struct rbd_client_id cid = rbd_get_cid(rbd_dev);
37848c2ecf20Sopenharmony_ci	char buf[4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN];
37858c2ecf20Sopenharmony_ci	int buf_size = sizeof(buf);
37868c2ecf20Sopenharmony_ci	void *p = buf;
37878c2ecf20Sopenharmony_ci
37888c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p notify_op %d\n", __func__, rbd_dev, notify_op);
37898c2ecf20Sopenharmony_ci
37908c2ecf20Sopenharmony_ci	/* encode *LockPayload NotifyMessage (op + ClientId) */
37918c2ecf20Sopenharmony_ci	ceph_start_encoding(&p, 2, 1, buf_size - CEPH_ENCODING_START_BLK_LEN);
37928c2ecf20Sopenharmony_ci	ceph_encode_32(&p, notify_op);
37938c2ecf20Sopenharmony_ci	ceph_encode_64(&p, cid.gid);
37948c2ecf20Sopenharmony_ci	ceph_encode_64(&p, cid.handle);
37958c2ecf20Sopenharmony_ci
37968c2ecf20Sopenharmony_ci	return ceph_osdc_notify(osdc, &rbd_dev->header_oid,
37978c2ecf20Sopenharmony_ci				&rbd_dev->header_oloc, buf, buf_size,
37988c2ecf20Sopenharmony_ci				RBD_NOTIFY_TIMEOUT, preply_pages, preply_len);
37998c2ecf20Sopenharmony_ci}
38008c2ecf20Sopenharmony_ci
38018c2ecf20Sopenharmony_cistatic void rbd_notify_op_lock(struct rbd_device *rbd_dev,
38028c2ecf20Sopenharmony_ci			       enum rbd_notify_op notify_op)
38038c2ecf20Sopenharmony_ci{
38048c2ecf20Sopenharmony_ci	__rbd_notify_op_lock(rbd_dev, notify_op, NULL, NULL);
38058c2ecf20Sopenharmony_ci}
38068c2ecf20Sopenharmony_ci
38078c2ecf20Sopenharmony_cistatic void rbd_notify_acquired_lock(struct work_struct *work)
38088c2ecf20Sopenharmony_ci{
38098c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = container_of(work, struct rbd_device,
38108c2ecf20Sopenharmony_ci						  acquired_lock_work);
38118c2ecf20Sopenharmony_ci
38128c2ecf20Sopenharmony_ci	rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_ACQUIRED_LOCK);
38138c2ecf20Sopenharmony_ci}
38148c2ecf20Sopenharmony_ci
38158c2ecf20Sopenharmony_cistatic void rbd_notify_released_lock(struct work_struct *work)
38168c2ecf20Sopenharmony_ci{
38178c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = container_of(work, struct rbd_device,
38188c2ecf20Sopenharmony_ci						  released_lock_work);
38198c2ecf20Sopenharmony_ci
38208c2ecf20Sopenharmony_ci	rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_RELEASED_LOCK);
38218c2ecf20Sopenharmony_ci}
38228c2ecf20Sopenharmony_ci
38238c2ecf20Sopenharmony_cistatic int rbd_request_lock(struct rbd_device *rbd_dev)
38248c2ecf20Sopenharmony_ci{
38258c2ecf20Sopenharmony_ci	struct page **reply_pages;
38268c2ecf20Sopenharmony_ci	size_t reply_len;
38278c2ecf20Sopenharmony_ci	bool lock_owner_responded = false;
38288c2ecf20Sopenharmony_ci	int ret;
38298c2ecf20Sopenharmony_ci
38308c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
38318c2ecf20Sopenharmony_ci
38328c2ecf20Sopenharmony_ci	ret = __rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_REQUEST_LOCK,
38338c2ecf20Sopenharmony_ci				   &reply_pages, &reply_len);
38348c2ecf20Sopenharmony_ci	if (ret && ret != -ETIMEDOUT) {
38358c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to request lock: %d", ret);
38368c2ecf20Sopenharmony_ci		goto out;
38378c2ecf20Sopenharmony_ci	}
38388c2ecf20Sopenharmony_ci
38398c2ecf20Sopenharmony_ci	if (reply_len > 0 && reply_len <= PAGE_SIZE) {
38408c2ecf20Sopenharmony_ci		void *p = page_address(reply_pages[0]);
38418c2ecf20Sopenharmony_ci		void *const end = p + reply_len;
38428c2ecf20Sopenharmony_ci		u32 n;
38438c2ecf20Sopenharmony_ci
38448c2ecf20Sopenharmony_ci		ceph_decode_32_safe(&p, end, n, e_inval); /* num_acks */
38458c2ecf20Sopenharmony_ci		while (n--) {
38468c2ecf20Sopenharmony_ci			u8 struct_v;
38478c2ecf20Sopenharmony_ci			u32 len;
38488c2ecf20Sopenharmony_ci
38498c2ecf20Sopenharmony_ci			ceph_decode_need(&p, end, 8 + 8, e_inval);
38508c2ecf20Sopenharmony_ci			p += 8 + 8; /* skip gid and cookie */
38518c2ecf20Sopenharmony_ci
38528c2ecf20Sopenharmony_ci			ceph_decode_32_safe(&p, end, len, e_inval);
38538c2ecf20Sopenharmony_ci			if (!len)
38548c2ecf20Sopenharmony_ci				continue;
38558c2ecf20Sopenharmony_ci
38568c2ecf20Sopenharmony_ci			if (lock_owner_responded) {
38578c2ecf20Sopenharmony_ci				rbd_warn(rbd_dev,
38588c2ecf20Sopenharmony_ci					 "duplicate lock owners detected");
38598c2ecf20Sopenharmony_ci				ret = -EIO;
38608c2ecf20Sopenharmony_ci				goto out;
38618c2ecf20Sopenharmony_ci			}
38628c2ecf20Sopenharmony_ci
38638c2ecf20Sopenharmony_ci			lock_owner_responded = true;
38648c2ecf20Sopenharmony_ci			ret = ceph_start_decoding(&p, end, 1, "ResponseMessage",
38658c2ecf20Sopenharmony_ci						  &struct_v, &len);
38668c2ecf20Sopenharmony_ci			if (ret) {
38678c2ecf20Sopenharmony_ci				rbd_warn(rbd_dev,
38688c2ecf20Sopenharmony_ci					 "failed to decode ResponseMessage: %d",
38698c2ecf20Sopenharmony_ci					 ret);
38708c2ecf20Sopenharmony_ci				goto e_inval;
38718c2ecf20Sopenharmony_ci			}
38728c2ecf20Sopenharmony_ci
38738c2ecf20Sopenharmony_ci			ret = ceph_decode_32(&p);
38748c2ecf20Sopenharmony_ci		}
38758c2ecf20Sopenharmony_ci	}
38768c2ecf20Sopenharmony_ci
38778c2ecf20Sopenharmony_ci	if (!lock_owner_responded) {
38788c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "no lock owners detected");
38798c2ecf20Sopenharmony_ci		ret = -ETIMEDOUT;
38808c2ecf20Sopenharmony_ci	}
38818c2ecf20Sopenharmony_ci
38828c2ecf20Sopenharmony_ciout:
38838c2ecf20Sopenharmony_ci	ceph_release_page_vector(reply_pages, calc_pages_for(0, reply_len));
38848c2ecf20Sopenharmony_ci	return ret;
38858c2ecf20Sopenharmony_ci
38868c2ecf20Sopenharmony_cie_inval:
38878c2ecf20Sopenharmony_ci	ret = -EINVAL;
38888c2ecf20Sopenharmony_ci	goto out;
38898c2ecf20Sopenharmony_ci}
38908c2ecf20Sopenharmony_ci
38918c2ecf20Sopenharmony_ci/*
38928c2ecf20Sopenharmony_ci * Either image request state machine(s) or rbd_add_acquire_lock()
38938c2ecf20Sopenharmony_ci * (i.e. "rbd map").
38948c2ecf20Sopenharmony_ci */
38958c2ecf20Sopenharmony_cistatic void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
38968c2ecf20Sopenharmony_ci{
38978c2ecf20Sopenharmony_ci	struct rbd_img_request *img_req;
38988c2ecf20Sopenharmony_ci
38998c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result);
39008c2ecf20Sopenharmony_ci	lockdep_assert_held_write(&rbd_dev->lock_rwsem);
39018c2ecf20Sopenharmony_ci
39028c2ecf20Sopenharmony_ci	cancel_delayed_work(&rbd_dev->lock_dwork);
39038c2ecf20Sopenharmony_ci	if (!completion_done(&rbd_dev->acquire_wait)) {
39048c2ecf20Sopenharmony_ci		rbd_assert(list_empty(&rbd_dev->acquiring_list) &&
39058c2ecf20Sopenharmony_ci			   list_empty(&rbd_dev->running_list));
39068c2ecf20Sopenharmony_ci		rbd_dev->acquire_err = result;
39078c2ecf20Sopenharmony_ci		complete_all(&rbd_dev->acquire_wait);
39088c2ecf20Sopenharmony_ci		return;
39098c2ecf20Sopenharmony_ci	}
39108c2ecf20Sopenharmony_ci
39118c2ecf20Sopenharmony_ci	while (!list_empty(&rbd_dev->acquiring_list)) {
39128c2ecf20Sopenharmony_ci		img_req = list_first_entry(&rbd_dev->acquiring_list,
39138c2ecf20Sopenharmony_ci					   struct rbd_img_request, lock_item);
39148c2ecf20Sopenharmony_ci		mutex_lock(&img_req->state_mutex);
39158c2ecf20Sopenharmony_ci		rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK);
39168c2ecf20Sopenharmony_ci		if (!result)
39178c2ecf20Sopenharmony_ci			list_move_tail(&img_req->lock_item,
39188c2ecf20Sopenharmony_ci				       &rbd_dev->running_list);
39198c2ecf20Sopenharmony_ci		else
39208c2ecf20Sopenharmony_ci			list_del_init(&img_req->lock_item);
39218c2ecf20Sopenharmony_ci		rbd_img_schedule(img_req, result);
39228c2ecf20Sopenharmony_ci		mutex_unlock(&img_req->state_mutex);
39238c2ecf20Sopenharmony_ci	}
39248c2ecf20Sopenharmony_ci}
39258c2ecf20Sopenharmony_ci
39268c2ecf20Sopenharmony_cistatic bool locker_equal(const struct ceph_locker *lhs,
39278c2ecf20Sopenharmony_ci			 const struct ceph_locker *rhs)
39288c2ecf20Sopenharmony_ci{
39298c2ecf20Sopenharmony_ci	return lhs->id.name.type == rhs->id.name.type &&
39308c2ecf20Sopenharmony_ci	       lhs->id.name.num == rhs->id.name.num &&
39318c2ecf20Sopenharmony_ci	       !strcmp(lhs->id.cookie, rhs->id.cookie) &&
39328c2ecf20Sopenharmony_ci	       ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr);
39338c2ecf20Sopenharmony_ci}
39348c2ecf20Sopenharmony_ci
39358c2ecf20Sopenharmony_cistatic void free_locker(struct ceph_locker *locker)
39368c2ecf20Sopenharmony_ci{
39378c2ecf20Sopenharmony_ci	if (locker)
39388c2ecf20Sopenharmony_ci		ceph_free_lockers(locker, 1);
39398c2ecf20Sopenharmony_ci}
39408c2ecf20Sopenharmony_ci
39418c2ecf20Sopenharmony_cistatic struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
39428c2ecf20Sopenharmony_ci{
39438c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
39448c2ecf20Sopenharmony_ci	struct ceph_locker *lockers;
39458c2ecf20Sopenharmony_ci	u32 num_lockers;
39468c2ecf20Sopenharmony_ci	u8 lock_type;
39478c2ecf20Sopenharmony_ci	char *lock_tag;
39488c2ecf20Sopenharmony_ci	int ret;
39498c2ecf20Sopenharmony_ci
39508c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
39518c2ecf20Sopenharmony_ci
39528c2ecf20Sopenharmony_ci	ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
39538c2ecf20Sopenharmony_ci				 &rbd_dev->header_oloc, RBD_LOCK_NAME,
39548c2ecf20Sopenharmony_ci				 &lock_type, &lock_tag, &lockers, &num_lockers);
39558c2ecf20Sopenharmony_ci	if (ret) {
39568c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to get header lockers: %d", ret);
39578c2ecf20Sopenharmony_ci		return ERR_PTR(ret);
39588c2ecf20Sopenharmony_ci	}
39598c2ecf20Sopenharmony_ci
39608c2ecf20Sopenharmony_ci	if (num_lockers == 0) {
39618c2ecf20Sopenharmony_ci		dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev);
39628c2ecf20Sopenharmony_ci		lockers = NULL;
39638c2ecf20Sopenharmony_ci		goto out;
39648c2ecf20Sopenharmony_ci	}
39658c2ecf20Sopenharmony_ci
39668c2ecf20Sopenharmony_ci	if (strcmp(lock_tag, RBD_LOCK_TAG)) {
39678c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "locked by external mechanism, tag %s",
39688c2ecf20Sopenharmony_ci			 lock_tag);
39698c2ecf20Sopenharmony_ci		goto err_busy;
39708c2ecf20Sopenharmony_ci	}
39718c2ecf20Sopenharmony_ci
39728c2ecf20Sopenharmony_ci	if (lock_type == CEPH_CLS_LOCK_SHARED) {
39738c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "shared lock type detected");
39748c2ecf20Sopenharmony_ci		goto err_busy;
39758c2ecf20Sopenharmony_ci	}
39768c2ecf20Sopenharmony_ci
39778c2ecf20Sopenharmony_ci	WARN_ON(num_lockers != 1);
39788c2ecf20Sopenharmony_ci	if (strncmp(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
39798c2ecf20Sopenharmony_ci		    strlen(RBD_LOCK_COOKIE_PREFIX))) {
39808c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
39818c2ecf20Sopenharmony_ci			 lockers[0].id.cookie);
39828c2ecf20Sopenharmony_ci		goto err_busy;
39838c2ecf20Sopenharmony_ci	}
39848c2ecf20Sopenharmony_ci
39858c2ecf20Sopenharmony_ciout:
39868c2ecf20Sopenharmony_ci	kfree(lock_tag);
39878c2ecf20Sopenharmony_ci	return lockers;
39888c2ecf20Sopenharmony_ci
39898c2ecf20Sopenharmony_cierr_busy:
39908c2ecf20Sopenharmony_ci	kfree(lock_tag);
39918c2ecf20Sopenharmony_ci	ceph_free_lockers(lockers, num_lockers);
39928c2ecf20Sopenharmony_ci	return ERR_PTR(-EBUSY);
39938c2ecf20Sopenharmony_ci}
39948c2ecf20Sopenharmony_ci
39958c2ecf20Sopenharmony_cistatic int find_watcher(struct rbd_device *rbd_dev,
39968c2ecf20Sopenharmony_ci			const struct ceph_locker *locker)
39978c2ecf20Sopenharmony_ci{
39988c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
39998c2ecf20Sopenharmony_ci	struct ceph_watch_item *watchers;
40008c2ecf20Sopenharmony_ci	u32 num_watchers;
40018c2ecf20Sopenharmony_ci	u64 cookie;
40028c2ecf20Sopenharmony_ci	int i;
40038c2ecf20Sopenharmony_ci	int ret;
40048c2ecf20Sopenharmony_ci
40058c2ecf20Sopenharmony_ci	ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
40068c2ecf20Sopenharmony_ci				      &rbd_dev->header_oloc, &watchers,
40078c2ecf20Sopenharmony_ci				      &num_watchers);
40088c2ecf20Sopenharmony_ci	if (ret) {
40098c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to get watchers: %d", ret);
40108c2ecf20Sopenharmony_ci		return ret;
40118c2ecf20Sopenharmony_ci	}
40128c2ecf20Sopenharmony_ci
40138c2ecf20Sopenharmony_ci	sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
40148c2ecf20Sopenharmony_ci	for (i = 0; i < num_watchers; i++) {
40158c2ecf20Sopenharmony_ci		/*
40168c2ecf20Sopenharmony_ci		 * Ignore addr->type while comparing.  This mimics
40178c2ecf20Sopenharmony_ci		 * entity_addr_t::get_legacy_str() + strcmp().
40188c2ecf20Sopenharmony_ci		 */
40198c2ecf20Sopenharmony_ci		if (ceph_addr_equal_no_type(&watchers[i].addr,
40208c2ecf20Sopenharmony_ci					    &locker->info.addr) &&
40218c2ecf20Sopenharmony_ci		    watchers[i].cookie == cookie) {
40228c2ecf20Sopenharmony_ci			struct rbd_client_id cid = {
40238c2ecf20Sopenharmony_ci				.gid = le64_to_cpu(watchers[i].name.num),
40248c2ecf20Sopenharmony_ci				.handle = cookie,
40258c2ecf20Sopenharmony_ci			};
40268c2ecf20Sopenharmony_ci
40278c2ecf20Sopenharmony_ci			dout("%s rbd_dev %p found cid %llu-%llu\n", __func__,
40288c2ecf20Sopenharmony_ci			     rbd_dev, cid.gid, cid.handle);
40298c2ecf20Sopenharmony_ci			rbd_set_owner_cid(rbd_dev, &cid);
40308c2ecf20Sopenharmony_ci			ret = 1;
40318c2ecf20Sopenharmony_ci			goto out;
40328c2ecf20Sopenharmony_ci		}
40338c2ecf20Sopenharmony_ci	}
40348c2ecf20Sopenharmony_ci
40358c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p no watchers\n", __func__, rbd_dev);
40368c2ecf20Sopenharmony_ci	ret = 0;
40378c2ecf20Sopenharmony_ciout:
40388c2ecf20Sopenharmony_ci	kfree(watchers);
40398c2ecf20Sopenharmony_ci	return ret;
40408c2ecf20Sopenharmony_ci}
40418c2ecf20Sopenharmony_ci
40428c2ecf20Sopenharmony_ci/*
40438c2ecf20Sopenharmony_ci * lock_rwsem must be held for write
40448c2ecf20Sopenharmony_ci */
40458c2ecf20Sopenharmony_cistatic int rbd_try_lock(struct rbd_device *rbd_dev)
40468c2ecf20Sopenharmony_ci{
40478c2ecf20Sopenharmony_ci	struct ceph_client *client = rbd_dev->rbd_client->client;
40488c2ecf20Sopenharmony_ci	struct ceph_locker *locker, *refreshed_locker;
40498c2ecf20Sopenharmony_ci	int ret;
40508c2ecf20Sopenharmony_ci
40518c2ecf20Sopenharmony_ci	for (;;) {
40528c2ecf20Sopenharmony_ci		locker = refreshed_locker = NULL;
40538c2ecf20Sopenharmony_ci
40548c2ecf20Sopenharmony_ci		ret = rbd_lock(rbd_dev);
40558c2ecf20Sopenharmony_ci		if (!ret)
40568c2ecf20Sopenharmony_ci			goto out;
40578c2ecf20Sopenharmony_ci		if (ret != -EBUSY) {
40588c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "failed to lock header: %d", ret);
40598c2ecf20Sopenharmony_ci			goto out;
40608c2ecf20Sopenharmony_ci		}
40618c2ecf20Sopenharmony_ci
40628c2ecf20Sopenharmony_ci		/* determine if the current lock holder is still alive */
40638c2ecf20Sopenharmony_ci		locker = get_lock_owner_info(rbd_dev);
40648c2ecf20Sopenharmony_ci		if (IS_ERR(locker)) {
40658c2ecf20Sopenharmony_ci			ret = PTR_ERR(locker);
40668c2ecf20Sopenharmony_ci			locker = NULL;
40678c2ecf20Sopenharmony_ci			goto out;
40688c2ecf20Sopenharmony_ci		}
40698c2ecf20Sopenharmony_ci		if (!locker)
40708c2ecf20Sopenharmony_ci			goto again;
40718c2ecf20Sopenharmony_ci
40728c2ecf20Sopenharmony_ci		ret = find_watcher(rbd_dev, locker);
40738c2ecf20Sopenharmony_ci		if (ret)
40748c2ecf20Sopenharmony_ci			goto out; /* request lock or error */
40758c2ecf20Sopenharmony_ci
40768c2ecf20Sopenharmony_ci		refreshed_locker = get_lock_owner_info(rbd_dev);
40778c2ecf20Sopenharmony_ci		if (IS_ERR(refreshed_locker)) {
40788c2ecf20Sopenharmony_ci			ret = PTR_ERR(refreshed_locker);
40798c2ecf20Sopenharmony_ci			refreshed_locker = NULL;
40808c2ecf20Sopenharmony_ci			goto out;
40818c2ecf20Sopenharmony_ci		}
40828c2ecf20Sopenharmony_ci		if (!refreshed_locker ||
40838c2ecf20Sopenharmony_ci		    !locker_equal(locker, refreshed_locker))
40848c2ecf20Sopenharmony_ci			goto again;
40858c2ecf20Sopenharmony_ci
40868c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
40878c2ecf20Sopenharmony_ci			 ENTITY_NAME(locker->id.name));
40888c2ecf20Sopenharmony_ci
40898c2ecf20Sopenharmony_ci		ret = ceph_monc_blocklist_add(&client->monc,
40908c2ecf20Sopenharmony_ci					      &locker->info.addr);
40918c2ecf20Sopenharmony_ci		if (ret) {
40928c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "failed to blocklist %s%llu: %d",
40938c2ecf20Sopenharmony_ci				 ENTITY_NAME(locker->id.name), ret);
40948c2ecf20Sopenharmony_ci			goto out;
40958c2ecf20Sopenharmony_ci		}
40968c2ecf20Sopenharmony_ci
40978c2ecf20Sopenharmony_ci		ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
40988c2ecf20Sopenharmony_ci					  &rbd_dev->header_oloc, RBD_LOCK_NAME,
40998c2ecf20Sopenharmony_ci					  locker->id.cookie, &locker->id.name);
41008c2ecf20Sopenharmony_ci		if (ret && ret != -ENOENT) {
41018c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "failed to break header lock: %d",
41028c2ecf20Sopenharmony_ci				 ret);
41038c2ecf20Sopenharmony_ci			goto out;
41048c2ecf20Sopenharmony_ci		}
41058c2ecf20Sopenharmony_ci
41068c2ecf20Sopenharmony_ciagain:
41078c2ecf20Sopenharmony_ci		free_locker(refreshed_locker);
41088c2ecf20Sopenharmony_ci		free_locker(locker);
41098c2ecf20Sopenharmony_ci	}
41108c2ecf20Sopenharmony_ci
41118c2ecf20Sopenharmony_ciout:
41128c2ecf20Sopenharmony_ci	free_locker(refreshed_locker);
41138c2ecf20Sopenharmony_ci	free_locker(locker);
41148c2ecf20Sopenharmony_ci	return ret;
41158c2ecf20Sopenharmony_ci}
41168c2ecf20Sopenharmony_ci
41178c2ecf20Sopenharmony_cistatic int rbd_post_acquire_action(struct rbd_device *rbd_dev)
41188c2ecf20Sopenharmony_ci{
41198c2ecf20Sopenharmony_ci	int ret;
41208c2ecf20Sopenharmony_ci
41218c2ecf20Sopenharmony_ci	ret = rbd_dev_refresh(rbd_dev);
41228c2ecf20Sopenharmony_ci	if (ret)
41238c2ecf20Sopenharmony_ci		return ret;
41248c2ecf20Sopenharmony_ci
41258c2ecf20Sopenharmony_ci	if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) {
41268c2ecf20Sopenharmony_ci		ret = rbd_object_map_open(rbd_dev);
41278c2ecf20Sopenharmony_ci		if (ret)
41288c2ecf20Sopenharmony_ci			return ret;
41298c2ecf20Sopenharmony_ci	}
41308c2ecf20Sopenharmony_ci
41318c2ecf20Sopenharmony_ci	return 0;
41328c2ecf20Sopenharmony_ci}
41338c2ecf20Sopenharmony_ci
41348c2ecf20Sopenharmony_ci/*
41358c2ecf20Sopenharmony_ci * Return:
41368c2ecf20Sopenharmony_ci *   0 - lock acquired
41378c2ecf20Sopenharmony_ci *   1 - caller should call rbd_request_lock()
41388c2ecf20Sopenharmony_ci *  <0 - error
41398c2ecf20Sopenharmony_ci */
41408c2ecf20Sopenharmony_cistatic int rbd_try_acquire_lock(struct rbd_device *rbd_dev)
41418c2ecf20Sopenharmony_ci{
41428c2ecf20Sopenharmony_ci	int ret;
41438c2ecf20Sopenharmony_ci
41448c2ecf20Sopenharmony_ci	down_read(&rbd_dev->lock_rwsem);
41458c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev,
41468c2ecf20Sopenharmony_ci	     rbd_dev->lock_state);
41478c2ecf20Sopenharmony_ci	if (__rbd_is_lock_owner(rbd_dev)) {
41488c2ecf20Sopenharmony_ci		up_read(&rbd_dev->lock_rwsem);
41498c2ecf20Sopenharmony_ci		return 0;
41508c2ecf20Sopenharmony_ci	}
41518c2ecf20Sopenharmony_ci
41528c2ecf20Sopenharmony_ci	up_read(&rbd_dev->lock_rwsem);
41538c2ecf20Sopenharmony_ci	down_write(&rbd_dev->lock_rwsem);
41548c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev,
41558c2ecf20Sopenharmony_ci	     rbd_dev->lock_state);
41568c2ecf20Sopenharmony_ci	if (__rbd_is_lock_owner(rbd_dev)) {
41578c2ecf20Sopenharmony_ci		up_write(&rbd_dev->lock_rwsem);
41588c2ecf20Sopenharmony_ci		return 0;
41598c2ecf20Sopenharmony_ci	}
41608c2ecf20Sopenharmony_ci
41618c2ecf20Sopenharmony_ci	ret = rbd_try_lock(rbd_dev);
41628c2ecf20Sopenharmony_ci	if (ret < 0) {
41638c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to acquire lock: %d", ret);
41648c2ecf20Sopenharmony_ci		goto out;
41658c2ecf20Sopenharmony_ci	}
41668c2ecf20Sopenharmony_ci	if (ret > 0) {
41678c2ecf20Sopenharmony_ci		up_write(&rbd_dev->lock_rwsem);
41688c2ecf20Sopenharmony_ci		return ret;
41698c2ecf20Sopenharmony_ci	}
41708c2ecf20Sopenharmony_ci
41718c2ecf20Sopenharmony_ci	rbd_assert(rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED);
41728c2ecf20Sopenharmony_ci	rbd_assert(list_empty(&rbd_dev->running_list));
41738c2ecf20Sopenharmony_ci
41748c2ecf20Sopenharmony_ci	ret = rbd_post_acquire_action(rbd_dev);
41758c2ecf20Sopenharmony_ci	if (ret) {
41768c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "post-acquire action failed: %d", ret);
41778c2ecf20Sopenharmony_ci		/*
41788c2ecf20Sopenharmony_ci		 * Can't stay in RBD_LOCK_STATE_LOCKED because
41798c2ecf20Sopenharmony_ci		 * rbd_lock_add_request() would let the request through,
41808c2ecf20Sopenharmony_ci		 * assuming that e.g. object map is locked and loaded.
41818c2ecf20Sopenharmony_ci		 */
41828c2ecf20Sopenharmony_ci		rbd_unlock(rbd_dev);
41838c2ecf20Sopenharmony_ci	}
41848c2ecf20Sopenharmony_ci
41858c2ecf20Sopenharmony_ciout:
41868c2ecf20Sopenharmony_ci	wake_lock_waiters(rbd_dev, ret);
41878c2ecf20Sopenharmony_ci	up_write(&rbd_dev->lock_rwsem);
41888c2ecf20Sopenharmony_ci	return ret;
41898c2ecf20Sopenharmony_ci}
41908c2ecf20Sopenharmony_ci
41918c2ecf20Sopenharmony_cistatic void rbd_acquire_lock(struct work_struct *work)
41928c2ecf20Sopenharmony_ci{
41938c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
41948c2ecf20Sopenharmony_ci					    struct rbd_device, lock_dwork);
41958c2ecf20Sopenharmony_ci	int ret;
41968c2ecf20Sopenharmony_ci
41978c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
41988c2ecf20Sopenharmony_ciagain:
41998c2ecf20Sopenharmony_ci	ret = rbd_try_acquire_lock(rbd_dev);
42008c2ecf20Sopenharmony_ci	if (ret <= 0) {
42018c2ecf20Sopenharmony_ci		dout("%s rbd_dev %p ret %d - done\n", __func__, rbd_dev, ret);
42028c2ecf20Sopenharmony_ci		return;
42038c2ecf20Sopenharmony_ci	}
42048c2ecf20Sopenharmony_ci
42058c2ecf20Sopenharmony_ci	ret = rbd_request_lock(rbd_dev);
42068c2ecf20Sopenharmony_ci	if (ret == -ETIMEDOUT) {
42078c2ecf20Sopenharmony_ci		goto again; /* treat this as a dead client */
42088c2ecf20Sopenharmony_ci	} else if (ret == -EROFS) {
42098c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "peer will not release lock");
42108c2ecf20Sopenharmony_ci		down_write(&rbd_dev->lock_rwsem);
42118c2ecf20Sopenharmony_ci		wake_lock_waiters(rbd_dev, ret);
42128c2ecf20Sopenharmony_ci		up_write(&rbd_dev->lock_rwsem);
42138c2ecf20Sopenharmony_ci	} else if (ret < 0) {
42148c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "error requesting lock: %d", ret);
42158c2ecf20Sopenharmony_ci		mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
42168c2ecf20Sopenharmony_ci				 RBD_RETRY_DELAY);
42178c2ecf20Sopenharmony_ci	} else {
42188c2ecf20Sopenharmony_ci		/*
42198c2ecf20Sopenharmony_ci		 * lock owner acked, but resend if we don't see them
42208c2ecf20Sopenharmony_ci		 * release the lock
42218c2ecf20Sopenharmony_ci		 */
42228c2ecf20Sopenharmony_ci		dout("%s rbd_dev %p requeuing lock_dwork\n", __func__,
42238c2ecf20Sopenharmony_ci		     rbd_dev);
42248c2ecf20Sopenharmony_ci		mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork,
42258c2ecf20Sopenharmony_ci		    msecs_to_jiffies(2 * RBD_NOTIFY_TIMEOUT * MSEC_PER_SEC));
42268c2ecf20Sopenharmony_ci	}
42278c2ecf20Sopenharmony_ci}
42288c2ecf20Sopenharmony_ci
42298c2ecf20Sopenharmony_cistatic bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
42308c2ecf20Sopenharmony_ci{
42318c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
42328c2ecf20Sopenharmony_ci	lockdep_assert_held_write(&rbd_dev->lock_rwsem);
42338c2ecf20Sopenharmony_ci
42348c2ecf20Sopenharmony_ci	if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED)
42358c2ecf20Sopenharmony_ci		return false;
42368c2ecf20Sopenharmony_ci
42378c2ecf20Sopenharmony_ci	/*
42388c2ecf20Sopenharmony_ci	 * Ensure that all in-flight IO is flushed.
42398c2ecf20Sopenharmony_ci	 */
42408c2ecf20Sopenharmony_ci	rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
42418c2ecf20Sopenharmony_ci	rbd_assert(!completion_done(&rbd_dev->releasing_wait));
42428c2ecf20Sopenharmony_ci	if (list_empty(&rbd_dev->running_list))
42438c2ecf20Sopenharmony_ci		return true;
42448c2ecf20Sopenharmony_ci
42458c2ecf20Sopenharmony_ci	up_write(&rbd_dev->lock_rwsem);
42468c2ecf20Sopenharmony_ci	wait_for_completion(&rbd_dev->releasing_wait);
42478c2ecf20Sopenharmony_ci
42488c2ecf20Sopenharmony_ci	down_write(&rbd_dev->lock_rwsem);
42498c2ecf20Sopenharmony_ci	if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
42508c2ecf20Sopenharmony_ci		return false;
42518c2ecf20Sopenharmony_ci
42528c2ecf20Sopenharmony_ci	rbd_assert(list_empty(&rbd_dev->running_list));
42538c2ecf20Sopenharmony_ci	return true;
42548c2ecf20Sopenharmony_ci}
42558c2ecf20Sopenharmony_ci
42568c2ecf20Sopenharmony_cistatic void rbd_pre_release_action(struct rbd_device *rbd_dev)
42578c2ecf20Sopenharmony_ci{
42588c2ecf20Sopenharmony_ci	if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)
42598c2ecf20Sopenharmony_ci		rbd_object_map_close(rbd_dev);
42608c2ecf20Sopenharmony_ci}
42618c2ecf20Sopenharmony_ci
42628c2ecf20Sopenharmony_cistatic void __rbd_release_lock(struct rbd_device *rbd_dev)
42638c2ecf20Sopenharmony_ci{
42648c2ecf20Sopenharmony_ci	rbd_assert(list_empty(&rbd_dev->running_list));
42658c2ecf20Sopenharmony_ci
42668c2ecf20Sopenharmony_ci	rbd_pre_release_action(rbd_dev);
42678c2ecf20Sopenharmony_ci	rbd_unlock(rbd_dev);
42688c2ecf20Sopenharmony_ci}
42698c2ecf20Sopenharmony_ci
42708c2ecf20Sopenharmony_ci/*
42718c2ecf20Sopenharmony_ci * lock_rwsem must be held for write
42728c2ecf20Sopenharmony_ci */
42738c2ecf20Sopenharmony_cistatic void rbd_release_lock(struct rbd_device *rbd_dev)
42748c2ecf20Sopenharmony_ci{
42758c2ecf20Sopenharmony_ci	if (!rbd_quiesce_lock(rbd_dev))
42768c2ecf20Sopenharmony_ci		return;
42778c2ecf20Sopenharmony_ci
42788c2ecf20Sopenharmony_ci	__rbd_release_lock(rbd_dev);
42798c2ecf20Sopenharmony_ci
42808c2ecf20Sopenharmony_ci	/*
42818c2ecf20Sopenharmony_ci	 * Give others a chance to grab the lock - we would re-acquire
42828c2ecf20Sopenharmony_ci	 * almost immediately if we got new IO while draining the running
42838c2ecf20Sopenharmony_ci	 * list otherwise.  We need to ack our own notifications, so this
42848c2ecf20Sopenharmony_ci	 * lock_dwork will be requeued from rbd_handle_released_lock() by
42858c2ecf20Sopenharmony_ci	 * way of maybe_kick_acquire().
42868c2ecf20Sopenharmony_ci	 */
42878c2ecf20Sopenharmony_ci	cancel_delayed_work(&rbd_dev->lock_dwork);
42888c2ecf20Sopenharmony_ci}
42898c2ecf20Sopenharmony_ci
42908c2ecf20Sopenharmony_cistatic void rbd_release_lock_work(struct work_struct *work)
42918c2ecf20Sopenharmony_ci{
42928c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = container_of(work, struct rbd_device,
42938c2ecf20Sopenharmony_ci						  unlock_work);
42948c2ecf20Sopenharmony_ci
42958c2ecf20Sopenharmony_ci	down_write(&rbd_dev->lock_rwsem);
42968c2ecf20Sopenharmony_ci	rbd_release_lock(rbd_dev);
42978c2ecf20Sopenharmony_ci	up_write(&rbd_dev->lock_rwsem);
42988c2ecf20Sopenharmony_ci}
42998c2ecf20Sopenharmony_ci
43008c2ecf20Sopenharmony_cistatic void maybe_kick_acquire(struct rbd_device *rbd_dev)
43018c2ecf20Sopenharmony_ci{
43028c2ecf20Sopenharmony_ci	bool have_requests;
43038c2ecf20Sopenharmony_ci
43048c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
43058c2ecf20Sopenharmony_ci	if (__rbd_is_lock_owner(rbd_dev))
43068c2ecf20Sopenharmony_ci		return;
43078c2ecf20Sopenharmony_ci
43088c2ecf20Sopenharmony_ci	spin_lock(&rbd_dev->lock_lists_lock);
43098c2ecf20Sopenharmony_ci	have_requests = !list_empty(&rbd_dev->acquiring_list);
43108c2ecf20Sopenharmony_ci	spin_unlock(&rbd_dev->lock_lists_lock);
43118c2ecf20Sopenharmony_ci	if (have_requests || delayed_work_pending(&rbd_dev->lock_dwork)) {
43128c2ecf20Sopenharmony_ci		dout("%s rbd_dev %p kicking lock_dwork\n", __func__, rbd_dev);
43138c2ecf20Sopenharmony_ci		mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
43148c2ecf20Sopenharmony_ci	}
43158c2ecf20Sopenharmony_ci}
43168c2ecf20Sopenharmony_ci
43178c2ecf20Sopenharmony_cistatic void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v,
43188c2ecf20Sopenharmony_ci				     void **p)
43198c2ecf20Sopenharmony_ci{
43208c2ecf20Sopenharmony_ci	struct rbd_client_id cid = { 0 };
43218c2ecf20Sopenharmony_ci
43228c2ecf20Sopenharmony_ci	if (struct_v >= 2) {
43238c2ecf20Sopenharmony_ci		cid.gid = ceph_decode_64(p);
43248c2ecf20Sopenharmony_ci		cid.handle = ceph_decode_64(p);
43258c2ecf20Sopenharmony_ci	}
43268c2ecf20Sopenharmony_ci
43278c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
43288c2ecf20Sopenharmony_ci	     cid.handle);
43298c2ecf20Sopenharmony_ci	if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
43308c2ecf20Sopenharmony_ci		down_write(&rbd_dev->lock_rwsem);
43318c2ecf20Sopenharmony_ci		if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
43328c2ecf20Sopenharmony_ci			dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n",
43338c2ecf20Sopenharmony_ci			     __func__, rbd_dev, cid.gid, cid.handle);
43348c2ecf20Sopenharmony_ci		} else {
43358c2ecf20Sopenharmony_ci			rbd_set_owner_cid(rbd_dev, &cid);
43368c2ecf20Sopenharmony_ci		}
43378c2ecf20Sopenharmony_ci		downgrade_write(&rbd_dev->lock_rwsem);
43388c2ecf20Sopenharmony_ci	} else {
43398c2ecf20Sopenharmony_ci		down_read(&rbd_dev->lock_rwsem);
43408c2ecf20Sopenharmony_ci	}
43418c2ecf20Sopenharmony_ci
43428c2ecf20Sopenharmony_ci	maybe_kick_acquire(rbd_dev);
43438c2ecf20Sopenharmony_ci	up_read(&rbd_dev->lock_rwsem);
43448c2ecf20Sopenharmony_ci}
43458c2ecf20Sopenharmony_ci
43468c2ecf20Sopenharmony_cistatic void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
43478c2ecf20Sopenharmony_ci				     void **p)
43488c2ecf20Sopenharmony_ci{
43498c2ecf20Sopenharmony_ci	struct rbd_client_id cid = { 0 };
43508c2ecf20Sopenharmony_ci
43518c2ecf20Sopenharmony_ci	if (struct_v >= 2) {
43528c2ecf20Sopenharmony_ci		cid.gid = ceph_decode_64(p);
43538c2ecf20Sopenharmony_ci		cid.handle = ceph_decode_64(p);
43548c2ecf20Sopenharmony_ci	}
43558c2ecf20Sopenharmony_ci
43568c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
43578c2ecf20Sopenharmony_ci	     cid.handle);
43588c2ecf20Sopenharmony_ci	if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
43598c2ecf20Sopenharmony_ci		down_write(&rbd_dev->lock_rwsem);
43608c2ecf20Sopenharmony_ci		if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
43618c2ecf20Sopenharmony_ci			dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n",
43628c2ecf20Sopenharmony_ci			     __func__, rbd_dev, cid.gid, cid.handle,
43638c2ecf20Sopenharmony_ci			     rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle);
43648c2ecf20Sopenharmony_ci		} else {
43658c2ecf20Sopenharmony_ci			rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
43668c2ecf20Sopenharmony_ci		}
43678c2ecf20Sopenharmony_ci		downgrade_write(&rbd_dev->lock_rwsem);
43688c2ecf20Sopenharmony_ci	} else {
43698c2ecf20Sopenharmony_ci		down_read(&rbd_dev->lock_rwsem);
43708c2ecf20Sopenharmony_ci	}
43718c2ecf20Sopenharmony_ci
43728c2ecf20Sopenharmony_ci	maybe_kick_acquire(rbd_dev);
43738c2ecf20Sopenharmony_ci	up_read(&rbd_dev->lock_rwsem);
43748c2ecf20Sopenharmony_ci}
43758c2ecf20Sopenharmony_ci
43768c2ecf20Sopenharmony_ci/*
43778c2ecf20Sopenharmony_ci * Returns result for ResponseMessage to be encoded (<= 0), or 1 if no
43788c2ecf20Sopenharmony_ci * ResponseMessage is needed.
43798c2ecf20Sopenharmony_ci */
43808c2ecf20Sopenharmony_cistatic int rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v,
43818c2ecf20Sopenharmony_ci				   void **p)
43828c2ecf20Sopenharmony_ci{
43838c2ecf20Sopenharmony_ci	struct rbd_client_id my_cid = rbd_get_cid(rbd_dev);
43848c2ecf20Sopenharmony_ci	struct rbd_client_id cid = { 0 };
43858c2ecf20Sopenharmony_ci	int result = 1;
43868c2ecf20Sopenharmony_ci
43878c2ecf20Sopenharmony_ci	if (struct_v >= 2) {
43888c2ecf20Sopenharmony_ci		cid.gid = ceph_decode_64(p);
43898c2ecf20Sopenharmony_ci		cid.handle = ceph_decode_64(p);
43908c2ecf20Sopenharmony_ci	}
43918c2ecf20Sopenharmony_ci
43928c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid,
43938c2ecf20Sopenharmony_ci	     cid.handle);
43948c2ecf20Sopenharmony_ci	if (rbd_cid_equal(&cid, &my_cid))
43958c2ecf20Sopenharmony_ci		return result;
43968c2ecf20Sopenharmony_ci
43978c2ecf20Sopenharmony_ci	down_read(&rbd_dev->lock_rwsem);
43988c2ecf20Sopenharmony_ci	if (__rbd_is_lock_owner(rbd_dev)) {
43998c2ecf20Sopenharmony_ci		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED &&
44008c2ecf20Sopenharmony_ci		    rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid))
44018c2ecf20Sopenharmony_ci			goto out_unlock;
44028c2ecf20Sopenharmony_ci
44038c2ecf20Sopenharmony_ci		/*
44048c2ecf20Sopenharmony_ci		 * encode ResponseMessage(0) so the peer can detect
44058c2ecf20Sopenharmony_ci		 * a missing owner
44068c2ecf20Sopenharmony_ci		 */
44078c2ecf20Sopenharmony_ci		result = 0;
44088c2ecf20Sopenharmony_ci
44098c2ecf20Sopenharmony_ci		if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) {
44108c2ecf20Sopenharmony_ci			if (!rbd_dev->opts->exclusive) {
44118c2ecf20Sopenharmony_ci				dout("%s rbd_dev %p queueing unlock_work\n",
44128c2ecf20Sopenharmony_ci				     __func__, rbd_dev);
44138c2ecf20Sopenharmony_ci				queue_work(rbd_dev->task_wq,
44148c2ecf20Sopenharmony_ci					   &rbd_dev->unlock_work);
44158c2ecf20Sopenharmony_ci			} else {
44168c2ecf20Sopenharmony_ci				/* refuse to release the lock */
44178c2ecf20Sopenharmony_ci				result = -EROFS;
44188c2ecf20Sopenharmony_ci			}
44198c2ecf20Sopenharmony_ci		}
44208c2ecf20Sopenharmony_ci	}
44218c2ecf20Sopenharmony_ci
44228c2ecf20Sopenharmony_ciout_unlock:
44238c2ecf20Sopenharmony_ci	up_read(&rbd_dev->lock_rwsem);
44248c2ecf20Sopenharmony_ci	return result;
44258c2ecf20Sopenharmony_ci}
44268c2ecf20Sopenharmony_ci
44278c2ecf20Sopenharmony_cistatic void __rbd_acknowledge_notify(struct rbd_device *rbd_dev,
44288c2ecf20Sopenharmony_ci				     u64 notify_id, u64 cookie, s32 *result)
44298c2ecf20Sopenharmony_ci{
44308c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
44318c2ecf20Sopenharmony_ci	char buf[4 + CEPH_ENCODING_START_BLK_LEN];
44328c2ecf20Sopenharmony_ci	int buf_size = sizeof(buf);
44338c2ecf20Sopenharmony_ci	int ret;
44348c2ecf20Sopenharmony_ci
44358c2ecf20Sopenharmony_ci	if (result) {
44368c2ecf20Sopenharmony_ci		void *p = buf;
44378c2ecf20Sopenharmony_ci
44388c2ecf20Sopenharmony_ci		/* encode ResponseMessage */
44398c2ecf20Sopenharmony_ci		ceph_start_encoding(&p, 1, 1,
44408c2ecf20Sopenharmony_ci				    buf_size - CEPH_ENCODING_START_BLK_LEN);
44418c2ecf20Sopenharmony_ci		ceph_encode_32(&p, *result);
44428c2ecf20Sopenharmony_ci	} else {
44438c2ecf20Sopenharmony_ci		buf_size = 0;
44448c2ecf20Sopenharmony_ci	}
44458c2ecf20Sopenharmony_ci
44468c2ecf20Sopenharmony_ci	ret = ceph_osdc_notify_ack(osdc, &rbd_dev->header_oid,
44478c2ecf20Sopenharmony_ci				   &rbd_dev->header_oloc, notify_id, cookie,
44488c2ecf20Sopenharmony_ci				   buf, buf_size);
44498c2ecf20Sopenharmony_ci	if (ret)
44508c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "acknowledge_notify failed: %d", ret);
44518c2ecf20Sopenharmony_ci}
44528c2ecf20Sopenharmony_ci
44538c2ecf20Sopenharmony_cistatic void rbd_acknowledge_notify(struct rbd_device *rbd_dev, u64 notify_id,
44548c2ecf20Sopenharmony_ci				   u64 cookie)
44558c2ecf20Sopenharmony_ci{
44568c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
44578c2ecf20Sopenharmony_ci	__rbd_acknowledge_notify(rbd_dev, notify_id, cookie, NULL);
44588c2ecf20Sopenharmony_ci}
44598c2ecf20Sopenharmony_ci
44608c2ecf20Sopenharmony_cistatic void rbd_acknowledge_notify_result(struct rbd_device *rbd_dev,
44618c2ecf20Sopenharmony_ci					  u64 notify_id, u64 cookie, s32 result)
44628c2ecf20Sopenharmony_ci{
44638c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result);
44648c2ecf20Sopenharmony_ci	__rbd_acknowledge_notify(rbd_dev, notify_id, cookie, &result);
44658c2ecf20Sopenharmony_ci}
44668c2ecf20Sopenharmony_ci
44678c2ecf20Sopenharmony_cistatic void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie,
44688c2ecf20Sopenharmony_ci			 u64 notifier_id, void *data, size_t data_len)
44698c2ecf20Sopenharmony_ci{
44708c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = arg;
44718c2ecf20Sopenharmony_ci	void *p = data;
44728c2ecf20Sopenharmony_ci	void *const end = p + data_len;
44738c2ecf20Sopenharmony_ci	u8 struct_v = 0;
44748c2ecf20Sopenharmony_ci	u32 len;
44758c2ecf20Sopenharmony_ci	u32 notify_op;
44768c2ecf20Sopenharmony_ci	int ret;
44778c2ecf20Sopenharmony_ci
44788c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p cookie %llu notify_id %llu data_len %zu\n",
44798c2ecf20Sopenharmony_ci	     __func__, rbd_dev, cookie, notify_id, data_len);
44808c2ecf20Sopenharmony_ci	if (data_len) {
44818c2ecf20Sopenharmony_ci		ret = ceph_start_decoding(&p, end, 1, "NotifyMessage",
44828c2ecf20Sopenharmony_ci					  &struct_v, &len);
44838c2ecf20Sopenharmony_ci		if (ret) {
44848c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "failed to decode NotifyMessage: %d",
44858c2ecf20Sopenharmony_ci				 ret);
44868c2ecf20Sopenharmony_ci			return;
44878c2ecf20Sopenharmony_ci		}
44888c2ecf20Sopenharmony_ci
44898c2ecf20Sopenharmony_ci		notify_op = ceph_decode_32(&p);
44908c2ecf20Sopenharmony_ci	} else {
44918c2ecf20Sopenharmony_ci		/* legacy notification for header updates */
44928c2ecf20Sopenharmony_ci		notify_op = RBD_NOTIFY_OP_HEADER_UPDATE;
44938c2ecf20Sopenharmony_ci		len = 0;
44948c2ecf20Sopenharmony_ci	}
44958c2ecf20Sopenharmony_ci
44968c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p notify_op %u\n", __func__, rbd_dev, notify_op);
44978c2ecf20Sopenharmony_ci	switch (notify_op) {
44988c2ecf20Sopenharmony_ci	case RBD_NOTIFY_OP_ACQUIRED_LOCK:
44998c2ecf20Sopenharmony_ci		rbd_handle_acquired_lock(rbd_dev, struct_v, &p);
45008c2ecf20Sopenharmony_ci		rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
45018c2ecf20Sopenharmony_ci		break;
45028c2ecf20Sopenharmony_ci	case RBD_NOTIFY_OP_RELEASED_LOCK:
45038c2ecf20Sopenharmony_ci		rbd_handle_released_lock(rbd_dev, struct_v, &p);
45048c2ecf20Sopenharmony_ci		rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
45058c2ecf20Sopenharmony_ci		break;
45068c2ecf20Sopenharmony_ci	case RBD_NOTIFY_OP_REQUEST_LOCK:
45078c2ecf20Sopenharmony_ci		ret = rbd_handle_request_lock(rbd_dev, struct_v, &p);
45088c2ecf20Sopenharmony_ci		if (ret <= 0)
45098c2ecf20Sopenharmony_ci			rbd_acknowledge_notify_result(rbd_dev, notify_id,
45108c2ecf20Sopenharmony_ci						      cookie, ret);
45118c2ecf20Sopenharmony_ci		else
45128c2ecf20Sopenharmony_ci			rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
45138c2ecf20Sopenharmony_ci		break;
45148c2ecf20Sopenharmony_ci	case RBD_NOTIFY_OP_HEADER_UPDATE:
45158c2ecf20Sopenharmony_ci		ret = rbd_dev_refresh(rbd_dev);
45168c2ecf20Sopenharmony_ci		if (ret)
45178c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "refresh failed: %d", ret);
45188c2ecf20Sopenharmony_ci
45198c2ecf20Sopenharmony_ci		rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
45208c2ecf20Sopenharmony_ci		break;
45218c2ecf20Sopenharmony_ci	default:
45228c2ecf20Sopenharmony_ci		if (rbd_is_lock_owner(rbd_dev))
45238c2ecf20Sopenharmony_ci			rbd_acknowledge_notify_result(rbd_dev, notify_id,
45248c2ecf20Sopenharmony_ci						      cookie, -EOPNOTSUPP);
45258c2ecf20Sopenharmony_ci		else
45268c2ecf20Sopenharmony_ci			rbd_acknowledge_notify(rbd_dev, notify_id, cookie);
45278c2ecf20Sopenharmony_ci		break;
45288c2ecf20Sopenharmony_ci	}
45298c2ecf20Sopenharmony_ci}
45308c2ecf20Sopenharmony_ci
45318c2ecf20Sopenharmony_cistatic void __rbd_unregister_watch(struct rbd_device *rbd_dev);
45328c2ecf20Sopenharmony_ci
45338c2ecf20Sopenharmony_cistatic void rbd_watch_errcb(void *arg, u64 cookie, int err)
45348c2ecf20Sopenharmony_ci{
45358c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = arg;
45368c2ecf20Sopenharmony_ci
45378c2ecf20Sopenharmony_ci	rbd_warn(rbd_dev, "encountered watch error: %d", err);
45388c2ecf20Sopenharmony_ci
45398c2ecf20Sopenharmony_ci	down_write(&rbd_dev->lock_rwsem);
45408c2ecf20Sopenharmony_ci	rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
45418c2ecf20Sopenharmony_ci	up_write(&rbd_dev->lock_rwsem);
45428c2ecf20Sopenharmony_ci
45438c2ecf20Sopenharmony_ci	mutex_lock(&rbd_dev->watch_mutex);
45448c2ecf20Sopenharmony_ci	if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) {
45458c2ecf20Sopenharmony_ci		__rbd_unregister_watch(rbd_dev);
45468c2ecf20Sopenharmony_ci		rbd_dev->watch_state = RBD_WATCH_STATE_ERROR;
45478c2ecf20Sopenharmony_ci
45488c2ecf20Sopenharmony_ci		queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, 0);
45498c2ecf20Sopenharmony_ci	}
45508c2ecf20Sopenharmony_ci	mutex_unlock(&rbd_dev->watch_mutex);
45518c2ecf20Sopenharmony_ci}
45528c2ecf20Sopenharmony_ci
45538c2ecf20Sopenharmony_ci/*
45548c2ecf20Sopenharmony_ci * watch_mutex must be locked
45558c2ecf20Sopenharmony_ci */
45568c2ecf20Sopenharmony_cistatic int __rbd_register_watch(struct rbd_device *rbd_dev)
45578c2ecf20Sopenharmony_ci{
45588c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
45598c2ecf20Sopenharmony_ci	struct ceph_osd_linger_request *handle;
45608c2ecf20Sopenharmony_ci
45618c2ecf20Sopenharmony_ci	rbd_assert(!rbd_dev->watch_handle);
45628c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
45638c2ecf20Sopenharmony_ci
45648c2ecf20Sopenharmony_ci	handle = ceph_osdc_watch(osdc, &rbd_dev->header_oid,
45658c2ecf20Sopenharmony_ci				 &rbd_dev->header_oloc, rbd_watch_cb,
45668c2ecf20Sopenharmony_ci				 rbd_watch_errcb, rbd_dev);
45678c2ecf20Sopenharmony_ci	if (IS_ERR(handle))
45688c2ecf20Sopenharmony_ci		return PTR_ERR(handle);
45698c2ecf20Sopenharmony_ci
45708c2ecf20Sopenharmony_ci	rbd_dev->watch_handle = handle;
45718c2ecf20Sopenharmony_ci	return 0;
45728c2ecf20Sopenharmony_ci}
45738c2ecf20Sopenharmony_ci
45748c2ecf20Sopenharmony_ci/*
45758c2ecf20Sopenharmony_ci * watch_mutex must be locked
45768c2ecf20Sopenharmony_ci */
45778c2ecf20Sopenharmony_cistatic void __rbd_unregister_watch(struct rbd_device *rbd_dev)
45788c2ecf20Sopenharmony_ci{
45798c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
45808c2ecf20Sopenharmony_ci	int ret;
45818c2ecf20Sopenharmony_ci
45828c2ecf20Sopenharmony_ci	rbd_assert(rbd_dev->watch_handle);
45838c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
45848c2ecf20Sopenharmony_ci
45858c2ecf20Sopenharmony_ci	ret = ceph_osdc_unwatch(osdc, rbd_dev->watch_handle);
45868c2ecf20Sopenharmony_ci	if (ret)
45878c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to unwatch: %d", ret);
45888c2ecf20Sopenharmony_ci
45898c2ecf20Sopenharmony_ci	rbd_dev->watch_handle = NULL;
45908c2ecf20Sopenharmony_ci}
45918c2ecf20Sopenharmony_ci
45928c2ecf20Sopenharmony_cistatic int rbd_register_watch(struct rbd_device *rbd_dev)
45938c2ecf20Sopenharmony_ci{
45948c2ecf20Sopenharmony_ci	int ret;
45958c2ecf20Sopenharmony_ci
45968c2ecf20Sopenharmony_ci	mutex_lock(&rbd_dev->watch_mutex);
45978c2ecf20Sopenharmony_ci	rbd_assert(rbd_dev->watch_state == RBD_WATCH_STATE_UNREGISTERED);
45988c2ecf20Sopenharmony_ci	ret = __rbd_register_watch(rbd_dev);
45998c2ecf20Sopenharmony_ci	if (ret)
46008c2ecf20Sopenharmony_ci		goto out;
46018c2ecf20Sopenharmony_ci
46028c2ecf20Sopenharmony_ci	rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
46038c2ecf20Sopenharmony_ci	rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
46048c2ecf20Sopenharmony_ci
46058c2ecf20Sopenharmony_ciout:
46068c2ecf20Sopenharmony_ci	mutex_unlock(&rbd_dev->watch_mutex);
46078c2ecf20Sopenharmony_ci	return ret;
46088c2ecf20Sopenharmony_ci}
46098c2ecf20Sopenharmony_ci
46108c2ecf20Sopenharmony_cistatic void cancel_tasks_sync(struct rbd_device *rbd_dev)
46118c2ecf20Sopenharmony_ci{
46128c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
46138c2ecf20Sopenharmony_ci
46148c2ecf20Sopenharmony_ci	cancel_work_sync(&rbd_dev->acquired_lock_work);
46158c2ecf20Sopenharmony_ci	cancel_work_sync(&rbd_dev->released_lock_work);
46168c2ecf20Sopenharmony_ci	cancel_delayed_work_sync(&rbd_dev->lock_dwork);
46178c2ecf20Sopenharmony_ci	cancel_work_sync(&rbd_dev->unlock_work);
46188c2ecf20Sopenharmony_ci}
46198c2ecf20Sopenharmony_ci
46208c2ecf20Sopenharmony_ci/*
46218c2ecf20Sopenharmony_ci * header_rwsem must not be held to avoid a deadlock with
46228c2ecf20Sopenharmony_ci * rbd_dev_refresh() when flushing notifies.
46238c2ecf20Sopenharmony_ci */
46248c2ecf20Sopenharmony_cistatic void rbd_unregister_watch(struct rbd_device *rbd_dev)
46258c2ecf20Sopenharmony_ci{
46268c2ecf20Sopenharmony_ci	cancel_tasks_sync(rbd_dev);
46278c2ecf20Sopenharmony_ci
46288c2ecf20Sopenharmony_ci	mutex_lock(&rbd_dev->watch_mutex);
46298c2ecf20Sopenharmony_ci	if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED)
46308c2ecf20Sopenharmony_ci		__rbd_unregister_watch(rbd_dev);
46318c2ecf20Sopenharmony_ci	rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
46328c2ecf20Sopenharmony_ci	mutex_unlock(&rbd_dev->watch_mutex);
46338c2ecf20Sopenharmony_ci
46348c2ecf20Sopenharmony_ci	cancel_delayed_work_sync(&rbd_dev->watch_dwork);
46358c2ecf20Sopenharmony_ci	ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
46368c2ecf20Sopenharmony_ci}
46378c2ecf20Sopenharmony_ci
46388c2ecf20Sopenharmony_ci/*
46398c2ecf20Sopenharmony_ci * lock_rwsem must be held for write
46408c2ecf20Sopenharmony_ci */
46418c2ecf20Sopenharmony_cistatic void rbd_reacquire_lock(struct rbd_device *rbd_dev)
46428c2ecf20Sopenharmony_ci{
46438c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
46448c2ecf20Sopenharmony_ci	char cookie[32];
46458c2ecf20Sopenharmony_ci	int ret;
46468c2ecf20Sopenharmony_ci
46478c2ecf20Sopenharmony_ci	if (!rbd_quiesce_lock(rbd_dev))
46488c2ecf20Sopenharmony_ci		return;
46498c2ecf20Sopenharmony_ci
46508c2ecf20Sopenharmony_ci	format_lock_cookie(rbd_dev, cookie);
46518c2ecf20Sopenharmony_ci	ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid,
46528c2ecf20Sopenharmony_ci				  &rbd_dev->header_oloc, RBD_LOCK_NAME,
46538c2ecf20Sopenharmony_ci				  CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie,
46548c2ecf20Sopenharmony_ci				  RBD_LOCK_TAG, cookie);
46558c2ecf20Sopenharmony_ci	if (ret) {
46568c2ecf20Sopenharmony_ci		if (ret != -EOPNOTSUPP)
46578c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "failed to update lock cookie: %d",
46588c2ecf20Sopenharmony_ci				 ret);
46598c2ecf20Sopenharmony_ci
46608c2ecf20Sopenharmony_ci		/*
46618c2ecf20Sopenharmony_ci		 * Lock cookie cannot be updated on older OSDs, so do
46628c2ecf20Sopenharmony_ci		 * a manual release and queue an acquire.
46638c2ecf20Sopenharmony_ci		 */
46648c2ecf20Sopenharmony_ci		__rbd_release_lock(rbd_dev);
46658c2ecf20Sopenharmony_ci		queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
46668c2ecf20Sopenharmony_ci	} else {
46678c2ecf20Sopenharmony_ci		__rbd_lock(rbd_dev, cookie);
46688c2ecf20Sopenharmony_ci		wake_lock_waiters(rbd_dev, 0);
46698c2ecf20Sopenharmony_ci	}
46708c2ecf20Sopenharmony_ci}
46718c2ecf20Sopenharmony_ci
46728c2ecf20Sopenharmony_cistatic void rbd_reregister_watch(struct work_struct *work)
46738c2ecf20Sopenharmony_ci{
46748c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
46758c2ecf20Sopenharmony_ci					    struct rbd_device, watch_dwork);
46768c2ecf20Sopenharmony_ci	int ret;
46778c2ecf20Sopenharmony_ci
46788c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
46798c2ecf20Sopenharmony_ci
46808c2ecf20Sopenharmony_ci	mutex_lock(&rbd_dev->watch_mutex);
46818c2ecf20Sopenharmony_ci	if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) {
46828c2ecf20Sopenharmony_ci		mutex_unlock(&rbd_dev->watch_mutex);
46838c2ecf20Sopenharmony_ci		return;
46848c2ecf20Sopenharmony_ci	}
46858c2ecf20Sopenharmony_ci
46868c2ecf20Sopenharmony_ci	ret = __rbd_register_watch(rbd_dev);
46878c2ecf20Sopenharmony_ci	if (ret) {
46888c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
46898c2ecf20Sopenharmony_ci		if (ret != -EBLOCKLISTED && ret != -ENOENT) {
46908c2ecf20Sopenharmony_ci			queue_delayed_work(rbd_dev->task_wq,
46918c2ecf20Sopenharmony_ci					   &rbd_dev->watch_dwork,
46928c2ecf20Sopenharmony_ci					   RBD_RETRY_DELAY);
46938c2ecf20Sopenharmony_ci			mutex_unlock(&rbd_dev->watch_mutex);
46948c2ecf20Sopenharmony_ci			return;
46958c2ecf20Sopenharmony_ci		}
46968c2ecf20Sopenharmony_ci
46978c2ecf20Sopenharmony_ci		mutex_unlock(&rbd_dev->watch_mutex);
46988c2ecf20Sopenharmony_ci		down_write(&rbd_dev->lock_rwsem);
46998c2ecf20Sopenharmony_ci		wake_lock_waiters(rbd_dev, ret);
47008c2ecf20Sopenharmony_ci		up_write(&rbd_dev->lock_rwsem);
47018c2ecf20Sopenharmony_ci		return;
47028c2ecf20Sopenharmony_ci	}
47038c2ecf20Sopenharmony_ci
47048c2ecf20Sopenharmony_ci	rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
47058c2ecf20Sopenharmony_ci	rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
47068c2ecf20Sopenharmony_ci	mutex_unlock(&rbd_dev->watch_mutex);
47078c2ecf20Sopenharmony_ci
47088c2ecf20Sopenharmony_ci	down_write(&rbd_dev->lock_rwsem);
47098c2ecf20Sopenharmony_ci	if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED)
47108c2ecf20Sopenharmony_ci		rbd_reacquire_lock(rbd_dev);
47118c2ecf20Sopenharmony_ci	up_write(&rbd_dev->lock_rwsem);
47128c2ecf20Sopenharmony_ci
47138c2ecf20Sopenharmony_ci	ret = rbd_dev_refresh(rbd_dev);
47148c2ecf20Sopenharmony_ci	if (ret)
47158c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "reregistration refresh failed: %d", ret);
47168c2ecf20Sopenharmony_ci}
47178c2ecf20Sopenharmony_ci
47188c2ecf20Sopenharmony_ci/*
47198c2ecf20Sopenharmony_ci * Synchronous osd object method call.  Returns the number of bytes
47208c2ecf20Sopenharmony_ci * returned in the outbound buffer, or a negative error code.
47218c2ecf20Sopenharmony_ci */
47228c2ecf20Sopenharmony_cistatic int rbd_obj_method_sync(struct rbd_device *rbd_dev,
47238c2ecf20Sopenharmony_ci			     struct ceph_object_id *oid,
47248c2ecf20Sopenharmony_ci			     struct ceph_object_locator *oloc,
47258c2ecf20Sopenharmony_ci			     const char *method_name,
47268c2ecf20Sopenharmony_ci			     const void *outbound,
47278c2ecf20Sopenharmony_ci			     size_t outbound_size,
47288c2ecf20Sopenharmony_ci			     void *inbound,
47298c2ecf20Sopenharmony_ci			     size_t inbound_size)
47308c2ecf20Sopenharmony_ci{
47318c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
47328c2ecf20Sopenharmony_ci	struct page *req_page = NULL;
47338c2ecf20Sopenharmony_ci	struct page *reply_page;
47348c2ecf20Sopenharmony_ci	int ret;
47358c2ecf20Sopenharmony_ci
47368c2ecf20Sopenharmony_ci	/*
47378c2ecf20Sopenharmony_ci	 * Method calls are ultimately read operations.  The result
47388c2ecf20Sopenharmony_ci	 * should placed into the inbound buffer provided.  They
47398c2ecf20Sopenharmony_ci	 * also supply outbound data--parameters for the object
47408c2ecf20Sopenharmony_ci	 * method.  Currently if this is present it will be a
47418c2ecf20Sopenharmony_ci	 * snapshot id.
47428c2ecf20Sopenharmony_ci	 */
47438c2ecf20Sopenharmony_ci	if (outbound) {
47448c2ecf20Sopenharmony_ci		if (outbound_size > PAGE_SIZE)
47458c2ecf20Sopenharmony_ci			return -E2BIG;
47468c2ecf20Sopenharmony_ci
47478c2ecf20Sopenharmony_ci		req_page = alloc_page(GFP_KERNEL);
47488c2ecf20Sopenharmony_ci		if (!req_page)
47498c2ecf20Sopenharmony_ci			return -ENOMEM;
47508c2ecf20Sopenharmony_ci
47518c2ecf20Sopenharmony_ci		memcpy(page_address(req_page), outbound, outbound_size);
47528c2ecf20Sopenharmony_ci	}
47538c2ecf20Sopenharmony_ci
47548c2ecf20Sopenharmony_ci	reply_page = alloc_page(GFP_KERNEL);
47558c2ecf20Sopenharmony_ci	if (!reply_page) {
47568c2ecf20Sopenharmony_ci		if (req_page)
47578c2ecf20Sopenharmony_ci			__free_page(req_page);
47588c2ecf20Sopenharmony_ci		return -ENOMEM;
47598c2ecf20Sopenharmony_ci	}
47608c2ecf20Sopenharmony_ci
47618c2ecf20Sopenharmony_ci	ret = ceph_osdc_call(osdc, oid, oloc, RBD_DRV_NAME, method_name,
47628c2ecf20Sopenharmony_ci			     CEPH_OSD_FLAG_READ, req_page, outbound_size,
47638c2ecf20Sopenharmony_ci			     &reply_page, &inbound_size);
47648c2ecf20Sopenharmony_ci	if (!ret) {
47658c2ecf20Sopenharmony_ci		memcpy(inbound, page_address(reply_page), inbound_size);
47668c2ecf20Sopenharmony_ci		ret = inbound_size;
47678c2ecf20Sopenharmony_ci	}
47688c2ecf20Sopenharmony_ci
47698c2ecf20Sopenharmony_ci	if (req_page)
47708c2ecf20Sopenharmony_ci		__free_page(req_page);
47718c2ecf20Sopenharmony_ci	__free_page(reply_page);
47728c2ecf20Sopenharmony_ci	return ret;
47738c2ecf20Sopenharmony_ci}
47748c2ecf20Sopenharmony_ci
47758c2ecf20Sopenharmony_cistatic void rbd_queue_workfn(struct work_struct *work)
47768c2ecf20Sopenharmony_ci{
47778c2ecf20Sopenharmony_ci	struct rbd_img_request *img_request =
47788c2ecf20Sopenharmony_ci	    container_of(work, struct rbd_img_request, work);
47798c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = img_request->rbd_dev;
47808c2ecf20Sopenharmony_ci	enum obj_operation_type op_type = img_request->op_type;
47818c2ecf20Sopenharmony_ci	struct request *rq = blk_mq_rq_from_pdu(img_request);
47828c2ecf20Sopenharmony_ci	u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT;
47838c2ecf20Sopenharmony_ci	u64 length = blk_rq_bytes(rq);
47848c2ecf20Sopenharmony_ci	u64 mapping_size;
47858c2ecf20Sopenharmony_ci	int result;
47868c2ecf20Sopenharmony_ci
47878c2ecf20Sopenharmony_ci	/* Ignore/skip any zero-length requests */
47888c2ecf20Sopenharmony_ci	if (!length) {
47898c2ecf20Sopenharmony_ci		dout("%s: zero-length request\n", __func__);
47908c2ecf20Sopenharmony_ci		result = 0;
47918c2ecf20Sopenharmony_ci		goto err_img_request;
47928c2ecf20Sopenharmony_ci	}
47938c2ecf20Sopenharmony_ci
47948c2ecf20Sopenharmony_ci	blk_mq_start_request(rq);
47958c2ecf20Sopenharmony_ci
47968c2ecf20Sopenharmony_ci	down_read(&rbd_dev->header_rwsem);
47978c2ecf20Sopenharmony_ci	mapping_size = rbd_dev->mapping.size;
47988c2ecf20Sopenharmony_ci	rbd_img_capture_header(img_request);
47998c2ecf20Sopenharmony_ci	up_read(&rbd_dev->header_rwsem);
48008c2ecf20Sopenharmony_ci
48018c2ecf20Sopenharmony_ci	if (offset + length > mapping_size) {
48028c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset,
48038c2ecf20Sopenharmony_ci			 length, mapping_size);
48048c2ecf20Sopenharmony_ci		result = -EIO;
48058c2ecf20Sopenharmony_ci		goto err_img_request;
48068c2ecf20Sopenharmony_ci	}
48078c2ecf20Sopenharmony_ci
48088c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev,
48098c2ecf20Sopenharmony_ci	     img_request, obj_op_name(op_type), offset, length);
48108c2ecf20Sopenharmony_ci
48118c2ecf20Sopenharmony_ci	if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_ZEROOUT)
48128c2ecf20Sopenharmony_ci		result = rbd_img_fill_nodata(img_request, offset, length);
48138c2ecf20Sopenharmony_ci	else
48148c2ecf20Sopenharmony_ci		result = rbd_img_fill_from_bio(img_request, offset, length,
48158c2ecf20Sopenharmony_ci					       rq->bio);
48168c2ecf20Sopenharmony_ci	if (result)
48178c2ecf20Sopenharmony_ci		goto err_img_request;
48188c2ecf20Sopenharmony_ci
48198c2ecf20Sopenharmony_ci	rbd_img_handle_request(img_request, 0);
48208c2ecf20Sopenharmony_ci	return;
48218c2ecf20Sopenharmony_ci
48228c2ecf20Sopenharmony_cierr_img_request:
48238c2ecf20Sopenharmony_ci	rbd_img_request_destroy(img_request);
48248c2ecf20Sopenharmony_ci	if (result)
48258c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "%s %llx at %llx result %d",
48268c2ecf20Sopenharmony_ci			 obj_op_name(op_type), length, offset, result);
48278c2ecf20Sopenharmony_ci	blk_mq_end_request(rq, errno_to_blk_status(result));
48288c2ecf20Sopenharmony_ci}
48298c2ecf20Sopenharmony_ci
48308c2ecf20Sopenharmony_cistatic blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx,
48318c2ecf20Sopenharmony_ci		const struct blk_mq_queue_data *bd)
48328c2ecf20Sopenharmony_ci{
48338c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = hctx->queue->queuedata;
48348c2ecf20Sopenharmony_ci	struct rbd_img_request *img_req = blk_mq_rq_to_pdu(bd->rq);
48358c2ecf20Sopenharmony_ci	enum obj_operation_type op_type;
48368c2ecf20Sopenharmony_ci
48378c2ecf20Sopenharmony_ci	switch (req_op(bd->rq)) {
48388c2ecf20Sopenharmony_ci	case REQ_OP_DISCARD:
48398c2ecf20Sopenharmony_ci		op_type = OBJ_OP_DISCARD;
48408c2ecf20Sopenharmony_ci		break;
48418c2ecf20Sopenharmony_ci	case REQ_OP_WRITE_ZEROES:
48428c2ecf20Sopenharmony_ci		op_type = OBJ_OP_ZEROOUT;
48438c2ecf20Sopenharmony_ci		break;
48448c2ecf20Sopenharmony_ci	case REQ_OP_WRITE:
48458c2ecf20Sopenharmony_ci		op_type = OBJ_OP_WRITE;
48468c2ecf20Sopenharmony_ci		break;
48478c2ecf20Sopenharmony_ci	case REQ_OP_READ:
48488c2ecf20Sopenharmony_ci		op_type = OBJ_OP_READ;
48498c2ecf20Sopenharmony_ci		break;
48508c2ecf20Sopenharmony_ci	default:
48518c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "unknown req_op %d", req_op(bd->rq));
48528c2ecf20Sopenharmony_ci		return BLK_STS_IOERR;
48538c2ecf20Sopenharmony_ci	}
48548c2ecf20Sopenharmony_ci
48558c2ecf20Sopenharmony_ci	rbd_img_request_init(img_req, rbd_dev, op_type);
48568c2ecf20Sopenharmony_ci
48578c2ecf20Sopenharmony_ci	if (rbd_img_is_write(img_req)) {
48588c2ecf20Sopenharmony_ci		if (rbd_is_ro(rbd_dev)) {
48598c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "%s on read-only mapping",
48608c2ecf20Sopenharmony_ci				 obj_op_name(img_req->op_type));
48618c2ecf20Sopenharmony_ci			return BLK_STS_IOERR;
48628c2ecf20Sopenharmony_ci		}
48638c2ecf20Sopenharmony_ci		rbd_assert(!rbd_is_snap(rbd_dev));
48648c2ecf20Sopenharmony_ci	}
48658c2ecf20Sopenharmony_ci
48668c2ecf20Sopenharmony_ci	INIT_WORK(&img_req->work, rbd_queue_workfn);
48678c2ecf20Sopenharmony_ci	queue_work(rbd_wq, &img_req->work);
48688c2ecf20Sopenharmony_ci	return BLK_STS_OK;
48698c2ecf20Sopenharmony_ci}
48708c2ecf20Sopenharmony_ci
48718c2ecf20Sopenharmony_cistatic void rbd_free_disk(struct rbd_device *rbd_dev)
48728c2ecf20Sopenharmony_ci{
48738c2ecf20Sopenharmony_ci	blk_cleanup_queue(rbd_dev->disk->queue);
48748c2ecf20Sopenharmony_ci	blk_mq_free_tag_set(&rbd_dev->tag_set);
48758c2ecf20Sopenharmony_ci	put_disk(rbd_dev->disk);
48768c2ecf20Sopenharmony_ci	rbd_dev->disk = NULL;
48778c2ecf20Sopenharmony_ci}
48788c2ecf20Sopenharmony_ci
48798c2ecf20Sopenharmony_cistatic int rbd_obj_read_sync(struct rbd_device *rbd_dev,
48808c2ecf20Sopenharmony_ci			     struct ceph_object_id *oid,
48818c2ecf20Sopenharmony_ci			     struct ceph_object_locator *oloc,
48828c2ecf20Sopenharmony_ci			     void *buf, int buf_len)
48838c2ecf20Sopenharmony_ci
48848c2ecf20Sopenharmony_ci{
48858c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
48868c2ecf20Sopenharmony_ci	struct ceph_osd_request *req;
48878c2ecf20Sopenharmony_ci	struct page **pages;
48888c2ecf20Sopenharmony_ci	int num_pages = calc_pages_for(0, buf_len);
48898c2ecf20Sopenharmony_ci	int ret;
48908c2ecf20Sopenharmony_ci
48918c2ecf20Sopenharmony_ci	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
48928c2ecf20Sopenharmony_ci	if (!req)
48938c2ecf20Sopenharmony_ci		return -ENOMEM;
48948c2ecf20Sopenharmony_ci
48958c2ecf20Sopenharmony_ci	ceph_oid_copy(&req->r_base_oid, oid);
48968c2ecf20Sopenharmony_ci	ceph_oloc_copy(&req->r_base_oloc, oloc);
48978c2ecf20Sopenharmony_ci	req->r_flags = CEPH_OSD_FLAG_READ;
48988c2ecf20Sopenharmony_ci
48998c2ecf20Sopenharmony_ci	pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
49008c2ecf20Sopenharmony_ci	if (IS_ERR(pages)) {
49018c2ecf20Sopenharmony_ci		ret = PTR_ERR(pages);
49028c2ecf20Sopenharmony_ci		goto out_req;
49038c2ecf20Sopenharmony_ci	}
49048c2ecf20Sopenharmony_ci
49058c2ecf20Sopenharmony_ci	osd_req_op_extent_init(req, 0, CEPH_OSD_OP_READ, 0, buf_len, 0, 0);
49068c2ecf20Sopenharmony_ci	osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false,
49078c2ecf20Sopenharmony_ci					 true);
49088c2ecf20Sopenharmony_ci
49098c2ecf20Sopenharmony_ci	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
49108c2ecf20Sopenharmony_ci	if (ret)
49118c2ecf20Sopenharmony_ci		goto out_req;
49128c2ecf20Sopenharmony_ci
49138c2ecf20Sopenharmony_ci	ceph_osdc_start_request(osdc, req, false);
49148c2ecf20Sopenharmony_ci	ret = ceph_osdc_wait_request(osdc, req);
49158c2ecf20Sopenharmony_ci	if (ret >= 0)
49168c2ecf20Sopenharmony_ci		ceph_copy_from_page_vector(pages, buf, 0, ret);
49178c2ecf20Sopenharmony_ci
49188c2ecf20Sopenharmony_ciout_req:
49198c2ecf20Sopenharmony_ci	ceph_osdc_put_request(req);
49208c2ecf20Sopenharmony_ci	return ret;
49218c2ecf20Sopenharmony_ci}
49228c2ecf20Sopenharmony_ci
49238c2ecf20Sopenharmony_ci/*
49248c2ecf20Sopenharmony_ci * Read the complete header for the given rbd device.  On successful
49258c2ecf20Sopenharmony_ci * return, the rbd_dev->header field will contain up-to-date
49268c2ecf20Sopenharmony_ci * information about the image.
49278c2ecf20Sopenharmony_ci */
49288c2ecf20Sopenharmony_cistatic int rbd_dev_v1_header_info(struct rbd_device *rbd_dev,
49298c2ecf20Sopenharmony_ci				  struct rbd_image_header *header,
49308c2ecf20Sopenharmony_ci				  bool first_time)
49318c2ecf20Sopenharmony_ci{
49328c2ecf20Sopenharmony_ci	struct rbd_image_header_ondisk *ondisk = NULL;
49338c2ecf20Sopenharmony_ci	u32 snap_count = 0;
49348c2ecf20Sopenharmony_ci	u64 names_size = 0;
49358c2ecf20Sopenharmony_ci	u32 want_count;
49368c2ecf20Sopenharmony_ci	int ret;
49378c2ecf20Sopenharmony_ci
49388c2ecf20Sopenharmony_ci	/*
49398c2ecf20Sopenharmony_ci	 * The complete header will include an array of its 64-bit
49408c2ecf20Sopenharmony_ci	 * snapshot ids, followed by the names of those snapshots as
49418c2ecf20Sopenharmony_ci	 * a contiguous block of NUL-terminated strings.  Note that
49428c2ecf20Sopenharmony_ci	 * the number of snapshots could change by the time we read
49438c2ecf20Sopenharmony_ci	 * it in, in which case we re-read it.
49448c2ecf20Sopenharmony_ci	 */
49458c2ecf20Sopenharmony_ci	do {
49468c2ecf20Sopenharmony_ci		size_t size;
49478c2ecf20Sopenharmony_ci
49488c2ecf20Sopenharmony_ci		kfree(ondisk);
49498c2ecf20Sopenharmony_ci
49508c2ecf20Sopenharmony_ci		size = sizeof (*ondisk);
49518c2ecf20Sopenharmony_ci		size += snap_count * sizeof (struct rbd_image_snap_ondisk);
49528c2ecf20Sopenharmony_ci		size += names_size;
49538c2ecf20Sopenharmony_ci		ondisk = kmalloc(size, GFP_KERNEL);
49548c2ecf20Sopenharmony_ci		if (!ondisk)
49558c2ecf20Sopenharmony_ci			return -ENOMEM;
49568c2ecf20Sopenharmony_ci
49578c2ecf20Sopenharmony_ci		ret = rbd_obj_read_sync(rbd_dev, &rbd_dev->header_oid,
49588c2ecf20Sopenharmony_ci					&rbd_dev->header_oloc, ondisk, size);
49598c2ecf20Sopenharmony_ci		if (ret < 0)
49608c2ecf20Sopenharmony_ci			goto out;
49618c2ecf20Sopenharmony_ci		if ((size_t)ret < size) {
49628c2ecf20Sopenharmony_ci			ret = -ENXIO;
49638c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "short header read (want %zd got %d)",
49648c2ecf20Sopenharmony_ci				size, ret);
49658c2ecf20Sopenharmony_ci			goto out;
49668c2ecf20Sopenharmony_ci		}
49678c2ecf20Sopenharmony_ci		if (!rbd_dev_ondisk_valid(ondisk)) {
49688c2ecf20Sopenharmony_ci			ret = -ENXIO;
49698c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev, "invalid header");
49708c2ecf20Sopenharmony_ci			goto out;
49718c2ecf20Sopenharmony_ci		}
49728c2ecf20Sopenharmony_ci
49738c2ecf20Sopenharmony_ci		names_size = le64_to_cpu(ondisk->snap_names_len);
49748c2ecf20Sopenharmony_ci		want_count = snap_count;
49758c2ecf20Sopenharmony_ci		snap_count = le32_to_cpu(ondisk->snap_count);
49768c2ecf20Sopenharmony_ci	} while (snap_count != want_count);
49778c2ecf20Sopenharmony_ci
49788c2ecf20Sopenharmony_ci	ret = rbd_header_from_disk(header, ondisk, first_time);
49798c2ecf20Sopenharmony_ciout:
49808c2ecf20Sopenharmony_ci	kfree(ondisk);
49818c2ecf20Sopenharmony_ci
49828c2ecf20Sopenharmony_ci	return ret;
49838c2ecf20Sopenharmony_ci}
49848c2ecf20Sopenharmony_ci
49858c2ecf20Sopenharmony_cistatic void rbd_dev_update_size(struct rbd_device *rbd_dev)
49868c2ecf20Sopenharmony_ci{
49878c2ecf20Sopenharmony_ci	sector_t size;
49888c2ecf20Sopenharmony_ci
49898c2ecf20Sopenharmony_ci	/*
49908c2ecf20Sopenharmony_ci	 * If EXISTS is not set, rbd_dev->disk may be NULL, so don't
49918c2ecf20Sopenharmony_ci	 * try to update its size.  If REMOVING is set, updating size
49928c2ecf20Sopenharmony_ci	 * is just useless work since the device can't be opened.
49938c2ecf20Sopenharmony_ci	 */
49948c2ecf20Sopenharmony_ci	if (test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags) &&
49958c2ecf20Sopenharmony_ci	    !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) {
49968c2ecf20Sopenharmony_ci		size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
49978c2ecf20Sopenharmony_ci		dout("setting size to %llu sectors", (unsigned long long)size);
49988c2ecf20Sopenharmony_ci		set_capacity(rbd_dev->disk, size);
49998c2ecf20Sopenharmony_ci		revalidate_disk_size(rbd_dev->disk, true);
50008c2ecf20Sopenharmony_ci	}
50018c2ecf20Sopenharmony_ci}
50028c2ecf20Sopenharmony_ci
50038c2ecf20Sopenharmony_cistatic const struct blk_mq_ops rbd_mq_ops = {
50048c2ecf20Sopenharmony_ci	.queue_rq	= rbd_queue_rq,
50058c2ecf20Sopenharmony_ci};
50068c2ecf20Sopenharmony_ci
50078c2ecf20Sopenharmony_cistatic int rbd_init_disk(struct rbd_device *rbd_dev)
50088c2ecf20Sopenharmony_ci{
50098c2ecf20Sopenharmony_ci	struct gendisk *disk;
50108c2ecf20Sopenharmony_ci	struct request_queue *q;
50118c2ecf20Sopenharmony_ci	unsigned int objset_bytes =
50128c2ecf20Sopenharmony_ci	    rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
50138c2ecf20Sopenharmony_ci	int err;
50148c2ecf20Sopenharmony_ci
50158c2ecf20Sopenharmony_ci	/* create gendisk info */
50168c2ecf20Sopenharmony_ci	disk = alloc_disk(single_major ?
50178c2ecf20Sopenharmony_ci			  (1 << RBD_SINGLE_MAJOR_PART_SHIFT) :
50188c2ecf20Sopenharmony_ci			  RBD_MINORS_PER_MAJOR);
50198c2ecf20Sopenharmony_ci	if (!disk)
50208c2ecf20Sopenharmony_ci		return -ENOMEM;
50218c2ecf20Sopenharmony_ci
50228c2ecf20Sopenharmony_ci	snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
50238c2ecf20Sopenharmony_ci		 rbd_dev->dev_id);
50248c2ecf20Sopenharmony_ci	disk->major = rbd_dev->major;
50258c2ecf20Sopenharmony_ci	disk->first_minor = rbd_dev->minor;
50268c2ecf20Sopenharmony_ci	if (single_major)
50278c2ecf20Sopenharmony_ci		disk->flags |= GENHD_FL_EXT_DEVT;
50288c2ecf20Sopenharmony_ci	disk->fops = &rbd_bd_ops;
50298c2ecf20Sopenharmony_ci	disk->private_data = rbd_dev;
50308c2ecf20Sopenharmony_ci
50318c2ecf20Sopenharmony_ci	memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
50328c2ecf20Sopenharmony_ci	rbd_dev->tag_set.ops = &rbd_mq_ops;
50338c2ecf20Sopenharmony_ci	rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
50348c2ecf20Sopenharmony_ci	rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
50358c2ecf20Sopenharmony_ci	rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
50368c2ecf20Sopenharmony_ci	rbd_dev->tag_set.nr_hw_queues = num_present_cpus();
50378c2ecf20Sopenharmony_ci	rbd_dev->tag_set.cmd_size = sizeof(struct rbd_img_request);
50388c2ecf20Sopenharmony_ci
50398c2ecf20Sopenharmony_ci	err = blk_mq_alloc_tag_set(&rbd_dev->tag_set);
50408c2ecf20Sopenharmony_ci	if (err)
50418c2ecf20Sopenharmony_ci		goto out_disk;
50428c2ecf20Sopenharmony_ci
50438c2ecf20Sopenharmony_ci	q = blk_mq_init_queue(&rbd_dev->tag_set);
50448c2ecf20Sopenharmony_ci	if (IS_ERR(q)) {
50458c2ecf20Sopenharmony_ci		err = PTR_ERR(q);
50468c2ecf20Sopenharmony_ci		goto out_tag_set;
50478c2ecf20Sopenharmony_ci	}
50488c2ecf20Sopenharmony_ci
50498c2ecf20Sopenharmony_ci	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
50508c2ecf20Sopenharmony_ci	/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
50518c2ecf20Sopenharmony_ci
50528c2ecf20Sopenharmony_ci	blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT);
50538c2ecf20Sopenharmony_ci	q->limits.max_sectors = queue_max_hw_sectors(q);
50548c2ecf20Sopenharmony_ci	blk_queue_max_segments(q, USHRT_MAX);
50558c2ecf20Sopenharmony_ci	blk_queue_max_segment_size(q, UINT_MAX);
50568c2ecf20Sopenharmony_ci	blk_queue_io_min(q, rbd_dev->opts->alloc_size);
50578c2ecf20Sopenharmony_ci	blk_queue_io_opt(q, rbd_dev->opts->alloc_size);
50588c2ecf20Sopenharmony_ci
50598c2ecf20Sopenharmony_ci	if (rbd_dev->opts->trim) {
50608c2ecf20Sopenharmony_ci		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
50618c2ecf20Sopenharmony_ci		q->limits.discard_granularity = rbd_dev->opts->alloc_size;
50628c2ecf20Sopenharmony_ci		blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT);
50638c2ecf20Sopenharmony_ci		blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT);
50648c2ecf20Sopenharmony_ci	}
50658c2ecf20Sopenharmony_ci
50668c2ecf20Sopenharmony_ci	if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
50678c2ecf20Sopenharmony_ci		blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
50688c2ecf20Sopenharmony_ci
50698c2ecf20Sopenharmony_ci	/*
50708c2ecf20Sopenharmony_ci	 * disk_release() expects a queue ref from add_disk() and will
50718c2ecf20Sopenharmony_ci	 * put it.  Hold an extra ref until add_disk() is called.
50728c2ecf20Sopenharmony_ci	 */
50738c2ecf20Sopenharmony_ci	WARN_ON(!blk_get_queue(q));
50748c2ecf20Sopenharmony_ci	disk->queue = q;
50758c2ecf20Sopenharmony_ci	q->queuedata = rbd_dev;
50768c2ecf20Sopenharmony_ci
50778c2ecf20Sopenharmony_ci	rbd_dev->disk = disk;
50788c2ecf20Sopenharmony_ci
50798c2ecf20Sopenharmony_ci	return 0;
50808c2ecf20Sopenharmony_ciout_tag_set:
50818c2ecf20Sopenharmony_ci	blk_mq_free_tag_set(&rbd_dev->tag_set);
50828c2ecf20Sopenharmony_ciout_disk:
50838c2ecf20Sopenharmony_ci	put_disk(disk);
50848c2ecf20Sopenharmony_ci	return err;
50858c2ecf20Sopenharmony_ci}
50868c2ecf20Sopenharmony_ci
50878c2ecf20Sopenharmony_ci/*
50888c2ecf20Sopenharmony_ci  sysfs
50898c2ecf20Sopenharmony_ci*/
50908c2ecf20Sopenharmony_ci
50918c2ecf20Sopenharmony_cistatic struct rbd_device *dev_to_rbd_dev(struct device *dev)
50928c2ecf20Sopenharmony_ci{
50938c2ecf20Sopenharmony_ci	return container_of(dev, struct rbd_device, dev);
50948c2ecf20Sopenharmony_ci}
50958c2ecf20Sopenharmony_ci
50968c2ecf20Sopenharmony_cistatic ssize_t rbd_size_show(struct device *dev,
50978c2ecf20Sopenharmony_ci			     struct device_attribute *attr, char *buf)
50988c2ecf20Sopenharmony_ci{
50998c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51008c2ecf20Sopenharmony_ci
51018c2ecf20Sopenharmony_ci	return sprintf(buf, "%llu\n",
51028c2ecf20Sopenharmony_ci		(unsigned long long)rbd_dev->mapping.size);
51038c2ecf20Sopenharmony_ci}
51048c2ecf20Sopenharmony_ci
51058c2ecf20Sopenharmony_cistatic ssize_t rbd_features_show(struct device *dev,
51068c2ecf20Sopenharmony_ci			     struct device_attribute *attr, char *buf)
51078c2ecf20Sopenharmony_ci{
51088c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51098c2ecf20Sopenharmony_ci
51108c2ecf20Sopenharmony_ci	return sprintf(buf, "0x%016llx\n", rbd_dev->header.features);
51118c2ecf20Sopenharmony_ci}
51128c2ecf20Sopenharmony_ci
51138c2ecf20Sopenharmony_cistatic ssize_t rbd_major_show(struct device *dev,
51148c2ecf20Sopenharmony_ci			      struct device_attribute *attr, char *buf)
51158c2ecf20Sopenharmony_ci{
51168c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51178c2ecf20Sopenharmony_ci
51188c2ecf20Sopenharmony_ci	if (rbd_dev->major)
51198c2ecf20Sopenharmony_ci		return sprintf(buf, "%d\n", rbd_dev->major);
51208c2ecf20Sopenharmony_ci
51218c2ecf20Sopenharmony_ci	return sprintf(buf, "(none)\n");
51228c2ecf20Sopenharmony_ci}
51238c2ecf20Sopenharmony_ci
51248c2ecf20Sopenharmony_cistatic ssize_t rbd_minor_show(struct device *dev,
51258c2ecf20Sopenharmony_ci			      struct device_attribute *attr, char *buf)
51268c2ecf20Sopenharmony_ci{
51278c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51288c2ecf20Sopenharmony_ci
51298c2ecf20Sopenharmony_ci	return sprintf(buf, "%d\n", rbd_dev->minor);
51308c2ecf20Sopenharmony_ci}
51318c2ecf20Sopenharmony_ci
51328c2ecf20Sopenharmony_cistatic ssize_t rbd_client_addr_show(struct device *dev,
51338c2ecf20Sopenharmony_ci				    struct device_attribute *attr, char *buf)
51348c2ecf20Sopenharmony_ci{
51358c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51368c2ecf20Sopenharmony_ci	struct ceph_entity_addr *client_addr =
51378c2ecf20Sopenharmony_ci	    ceph_client_addr(rbd_dev->rbd_client->client);
51388c2ecf20Sopenharmony_ci
51398c2ecf20Sopenharmony_ci	return sprintf(buf, "%pISpc/%u\n", &client_addr->in_addr,
51408c2ecf20Sopenharmony_ci		       le32_to_cpu(client_addr->nonce));
51418c2ecf20Sopenharmony_ci}
51428c2ecf20Sopenharmony_ci
51438c2ecf20Sopenharmony_cistatic ssize_t rbd_client_id_show(struct device *dev,
51448c2ecf20Sopenharmony_ci				  struct device_attribute *attr, char *buf)
51458c2ecf20Sopenharmony_ci{
51468c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51478c2ecf20Sopenharmony_ci
51488c2ecf20Sopenharmony_ci	return sprintf(buf, "client%lld\n",
51498c2ecf20Sopenharmony_ci		       ceph_client_gid(rbd_dev->rbd_client->client));
51508c2ecf20Sopenharmony_ci}
51518c2ecf20Sopenharmony_ci
51528c2ecf20Sopenharmony_cistatic ssize_t rbd_cluster_fsid_show(struct device *dev,
51538c2ecf20Sopenharmony_ci				     struct device_attribute *attr, char *buf)
51548c2ecf20Sopenharmony_ci{
51558c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51568c2ecf20Sopenharmony_ci
51578c2ecf20Sopenharmony_ci	return sprintf(buf, "%pU\n", &rbd_dev->rbd_client->client->fsid);
51588c2ecf20Sopenharmony_ci}
51598c2ecf20Sopenharmony_ci
51608c2ecf20Sopenharmony_cistatic ssize_t rbd_config_info_show(struct device *dev,
51618c2ecf20Sopenharmony_ci				    struct device_attribute *attr, char *buf)
51628c2ecf20Sopenharmony_ci{
51638c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51648c2ecf20Sopenharmony_ci
51658c2ecf20Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
51668c2ecf20Sopenharmony_ci		return -EPERM;
51678c2ecf20Sopenharmony_ci
51688c2ecf20Sopenharmony_ci	return sprintf(buf, "%s\n", rbd_dev->config_info);
51698c2ecf20Sopenharmony_ci}
51708c2ecf20Sopenharmony_ci
51718c2ecf20Sopenharmony_cistatic ssize_t rbd_pool_show(struct device *dev,
51728c2ecf20Sopenharmony_ci			     struct device_attribute *attr, char *buf)
51738c2ecf20Sopenharmony_ci{
51748c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51758c2ecf20Sopenharmony_ci
51768c2ecf20Sopenharmony_ci	return sprintf(buf, "%s\n", rbd_dev->spec->pool_name);
51778c2ecf20Sopenharmony_ci}
51788c2ecf20Sopenharmony_ci
51798c2ecf20Sopenharmony_cistatic ssize_t rbd_pool_id_show(struct device *dev,
51808c2ecf20Sopenharmony_ci			     struct device_attribute *attr, char *buf)
51818c2ecf20Sopenharmony_ci{
51828c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51838c2ecf20Sopenharmony_ci
51848c2ecf20Sopenharmony_ci	return sprintf(buf, "%llu\n",
51858c2ecf20Sopenharmony_ci			(unsigned long long) rbd_dev->spec->pool_id);
51868c2ecf20Sopenharmony_ci}
51878c2ecf20Sopenharmony_ci
51888c2ecf20Sopenharmony_cistatic ssize_t rbd_pool_ns_show(struct device *dev,
51898c2ecf20Sopenharmony_ci				struct device_attribute *attr, char *buf)
51908c2ecf20Sopenharmony_ci{
51918c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
51928c2ecf20Sopenharmony_ci
51938c2ecf20Sopenharmony_ci	return sprintf(buf, "%s\n", rbd_dev->spec->pool_ns ?: "");
51948c2ecf20Sopenharmony_ci}
51958c2ecf20Sopenharmony_ci
51968c2ecf20Sopenharmony_cistatic ssize_t rbd_name_show(struct device *dev,
51978c2ecf20Sopenharmony_ci			     struct device_attribute *attr, char *buf)
51988c2ecf20Sopenharmony_ci{
51998c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
52008c2ecf20Sopenharmony_ci
52018c2ecf20Sopenharmony_ci	if (rbd_dev->spec->image_name)
52028c2ecf20Sopenharmony_ci		return sprintf(buf, "%s\n", rbd_dev->spec->image_name);
52038c2ecf20Sopenharmony_ci
52048c2ecf20Sopenharmony_ci	return sprintf(buf, "(unknown)\n");
52058c2ecf20Sopenharmony_ci}
52068c2ecf20Sopenharmony_ci
52078c2ecf20Sopenharmony_cistatic ssize_t rbd_image_id_show(struct device *dev,
52088c2ecf20Sopenharmony_ci			     struct device_attribute *attr, char *buf)
52098c2ecf20Sopenharmony_ci{
52108c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
52118c2ecf20Sopenharmony_ci
52128c2ecf20Sopenharmony_ci	return sprintf(buf, "%s\n", rbd_dev->spec->image_id);
52138c2ecf20Sopenharmony_ci}
52148c2ecf20Sopenharmony_ci
52158c2ecf20Sopenharmony_ci/*
52168c2ecf20Sopenharmony_ci * Shows the name of the currently-mapped snapshot (or
52178c2ecf20Sopenharmony_ci * RBD_SNAP_HEAD_NAME for the base image).
52188c2ecf20Sopenharmony_ci */
52198c2ecf20Sopenharmony_cistatic ssize_t rbd_snap_show(struct device *dev,
52208c2ecf20Sopenharmony_ci			     struct device_attribute *attr,
52218c2ecf20Sopenharmony_ci			     char *buf)
52228c2ecf20Sopenharmony_ci{
52238c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
52248c2ecf20Sopenharmony_ci
52258c2ecf20Sopenharmony_ci	return sprintf(buf, "%s\n", rbd_dev->spec->snap_name);
52268c2ecf20Sopenharmony_ci}
52278c2ecf20Sopenharmony_ci
52288c2ecf20Sopenharmony_cistatic ssize_t rbd_snap_id_show(struct device *dev,
52298c2ecf20Sopenharmony_ci				struct device_attribute *attr, char *buf)
52308c2ecf20Sopenharmony_ci{
52318c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
52328c2ecf20Sopenharmony_ci
52338c2ecf20Sopenharmony_ci	return sprintf(buf, "%llu\n", rbd_dev->spec->snap_id);
52348c2ecf20Sopenharmony_ci}
52358c2ecf20Sopenharmony_ci
52368c2ecf20Sopenharmony_ci/*
52378c2ecf20Sopenharmony_ci * For a v2 image, shows the chain of parent images, separated by empty
52388c2ecf20Sopenharmony_ci * lines.  For v1 images or if there is no parent, shows "(no parent
52398c2ecf20Sopenharmony_ci * image)".
52408c2ecf20Sopenharmony_ci */
52418c2ecf20Sopenharmony_cistatic ssize_t rbd_parent_show(struct device *dev,
52428c2ecf20Sopenharmony_ci			       struct device_attribute *attr,
52438c2ecf20Sopenharmony_ci			       char *buf)
52448c2ecf20Sopenharmony_ci{
52458c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
52468c2ecf20Sopenharmony_ci	ssize_t count = 0;
52478c2ecf20Sopenharmony_ci
52488c2ecf20Sopenharmony_ci	if (!rbd_dev->parent)
52498c2ecf20Sopenharmony_ci		return sprintf(buf, "(no parent image)\n");
52508c2ecf20Sopenharmony_ci
52518c2ecf20Sopenharmony_ci	for ( ; rbd_dev->parent; rbd_dev = rbd_dev->parent) {
52528c2ecf20Sopenharmony_ci		struct rbd_spec *spec = rbd_dev->parent_spec;
52538c2ecf20Sopenharmony_ci
52548c2ecf20Sopenharmony_ci		count += sprintf(&buf[count], "%s"
52558c2ecf20Sopenharmony_ci			    "pool_id %llu\npool_name %s\n"
52568c2ecf20Sopenharmony_ci			    "pool_ns %s\n"
52578c2ecf20Sopenharmony_ci			    "image_id %s\nimage_name %s\n"
52588c2ecf20Sopenharmony_ci			    "snap_id %llu\nsnap_name %s\n"
52598c2ecf20Sopenharmony_ci			    "overlap %llu\n",
52608c2ecf20Sopenharmony_ci			    !count ? "" : "\n", /* first? */
52618c2ecf20Sopenharmony_ci			    spec->pool_id, spec->pool_name,
52628c2ecf20Sopenharmony_ci			    spec->pool_ns ?: "",
52638c2ecf20Sopenharmony_ci			    spec->image_id, spec->image_name ?: "(unknown)",
52648c2ecf20Sopenharmony_ci			    spec->snap_id, spec->snap_name,
52658c2ecf20Sopenharmony_ci			    rbd_dev->parent_overlap);
52668c2ecf20Sopenharmony_ci	}
52678c2ecf20Sopenharmony_ci
52688c2ecf20Sopenharmony_ci	return count;
52698c2ecf20Sopenharmony_ci}
52708c2ecf20Sopenharmony_ci
52718c2ecf20Sopenharmony_cistatic ssize_t rbd_image_refresh(struct device *dev,
52728c2ecf20Sopenharmony_ci				 struct device_attribute *attr,
52738c2ecf20Sopenharmony_ci				 const char *buf,
52748c2ecf20Sopenharmony_ci				 size_t size)
52758c2ecf20Sopenharmony_ci{
52768c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
52778c2ecf20Sopenharmony_ci	int ret;
52788c2ecf20Sopenharmony_ci
52798c2ecf20Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
52808c2ecf20Sopenharmony_ci		return -EPERM;
52818c2ecf20Sopenharmony_ci
52828c2ecf20Sopenharmony_ci	ret = rbd_dev_refresh(rbd_dev);
52838c2ecf20Sopenharmony_ci	if (ret)
52848c2ecf20Sopenharmony_ci		return ret;
52858c2ecf20Sopenharmony_ci
52868c2ecf20Sopenharmony_ci	return size;
52878c2ecf20Sopenharmony_ci}
52888c2ecf20Sopenharmony_ci
52898c2ecf20Sopenharmony_cistatic DEVICE_ATTR(size, 0444, rbd_size_show, NULL);
52908c2ecf20Sopenharmony_cistatic DEVICE_ATTR(features, 0444, rbd_features_show, NULL);
52918c2ecf20Sopenharmony_cistatic DEVICE_ATTR(major, 0444, rbd_major_show, NULL);
52928c2ecf20Sopenharmony_cistatic DEVICE_ATTR(minor, 0444, rbd_minor_show, NULL);
52938c2ecf20Sopenharmony_cistatic DEVICE_ATTR(client_addr, 0444, rbd_client_addr_show, NULL);
52948c2ecf20Sopenharmony_cistatic DEVICE_ATTR(client_id, 0444, rbd_client_id_show, NULL);
52958c2ecf20Sopenharmony_cistatic DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL);
52968c2ecf20Sopenharmony_cistatic DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL);
52978c2ecf20Sopenharmony_cistatic DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL);
52988c2ecf20Sopenharmony_cistatic DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL);
52998c2ecf20Sopenharmony_cistatic DEVICE_ATTR(pool_ns, 0444, rbd_pool_ns_show, NULL);
53008c2ecf20Sopenharmony_cistatic DEVICE_ATTR(name, 0444, rbd_name_show, NULL);
53018c2ecf20Sopenharmony_cistatic DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL);
53028c2ecf20Sopenharmony_cistatic DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh);
53038c2ecf20Sopenharmony_cistatic DEVICE_ATTR(current_snap, 0444, rbd_snap_show, NULL);
53048c2ecf20Sopenharmony_cistatic DEVICE_ATTR(snap_id, 0444, rbd_snap_id_show, NULL);
53058c2ecf20Sopenharmony_cistatic DEVICE_ATTR(parent, 0444, rbd_parent_show, NULL);
53068c2ecf20Sopenharmony_ci
53078c2ecf20Sopenharmony_cistatic struct attribute *rbd_attrs[] = {
53088c2ecf20Sopenharmony_ci	&dev_attr_size.attr,
53098c2ecf20Sopenharmony_ci	&dev_attr_features.attr,
53108c2ecf20Sopenharmony_ci	&dev_attr_major.attr,
53118c2ecf20Sopenharmony_ci	&dev_attr_minor.attr,
53128c2ecf20Sopenharmony_ci	&dev_attr_client_addr.attr,
53138c2ecf20Sopenharmony_ci	&dev_attr_client_id.attr,
53148c2ecf20Sopenharmony_ci	&dev_attr_cluster_fsid.attr,
53158c2ecf20Sopenharmony_ci	&dev_attr_config_info.attr,
53168c2ecf20Sopenharmony_ci	&dev_attr_pool.attr,
53178c2ecf20Sopenharmony_ci	&dev_attr_pool_id.attr,
53188c2ecf20Sopenharmony_ci	&dev_attr_pool_ns.attr,
53198c2ecf20Sopenharmony_ci	&dev_attr_name.attr,
53208c2ecf20Sopenharmony_ci	&dev_attr_image_id.attr,
53218c2ecf20Sopenharmony_ci	&dev_attr_current_snap.attr,
53228c2ecf20Sopenharmony_ci	&dev_attr_snap_id.attr,
53238c2ecf20Sopenharmony_ci	&dev_attr_parent.attr,
53248c2ecf20Sopenharmony_ci	&dev_attr_refresh.attr,
53258c2ecf20Sopenharmony_ci	NULL
53268c2ecf20Sopenharmony_ci};
53278c2ecf20Sopenharmony_ci
53288c2ecf20Sopenharmony_cistatic struct attribute_group rbd_attr_group = {
53298c2ecf20Sopenharmony_ci	.attrs = rbd_attrs,
53308c2ecf20Sopenharmony_ci};
53318c2ecf20Sopenharmony_ci
53328c2ecf20Sopenharmony_cistatic const struct attribute_group *rbd_attr_groups[] = {
53338c2ecf20Sopenharmony_ci	&rbd_attr_group,
53348c2ecf20Sopenharmony_ci	NULL
53358c2ecf20Sopenharmony_ci};
53368c2ecf20Sopenharmony_ci
53378c2ecf20Sopenharmony_cistatic void rbd_dev_release(struct device *dev);
53388c2ecf20Sopenharmony_ci
53398c2ecf20Sopenharmony_cistatic const struct device_type rbd_device_type = {
53408c2ecf20Sopenharmony_ci	.name		= "rbd",
53418c2ecf20Sopenharmony_ci	.groups		= rbd_attr_groups,
53428c2ecf20Sopenharmony_ci	.release	= rbd_dev_release,
53438c2ecf20Sopenharmony_ci};
53448c2ecf20Sopenharmony_ci
53458c2ecf20Sopenharmony_cistatic struct rbd_spec *rbd_spec_get(struct rbd_spec *spec)
53468c2ecf20Sopenharmony_ci{
53478c2ecf20Sopenharmony_ci	kref_get(&spec->kref);
53488c2ecf20Sopenharmony_ci
53498c2ecf20Sopenharmony_ci	return spec;
53508c2ecf20Sopenharmony_ci}
53518c2ecf20Sopenharmony_ci
53528c2ecf20Sopenharmony_cistatic void rbd_spec_free(struct kref *kref);
53538c2ecf20Sopenharmony_cistatic void rbd_spec_put(struct rbd_spec *spec)
53548c2ecf20Sopenharmony_ci{
53558c2ecf20Sopenharmony_ci	if (spec)
53568c2ecf20Sopenharmony_ci		kref_put(&spec->kref, rbd_spec_free);
53578c2ecf20Sopenharmony_ci}
53588c2ecf20Sopenharmony_ci
53598c2ecf20Sopenharmony_cistatic struct rbd_spec *rbd_spec_alloc(void)
53608c2ecf20Sopenharmony_ci{
53618c2ecf20Sopenharmony_ci	struct rbd_spec *spec;
53628c2ecf20Sopenharmony_ci
53638c2ecf20Sopenharmony_ci	spec = kzalloc(sizeof (*spec), GFP_KERNEL);
53648c2ecf20Sopenharmony_ci	if (!spec)
53658c2ecf20Sopenharmony_ci		return NULL;
53668c2ecf20Sopenharmony_ci
53678c2ecf20Sopenharmony_ci	spec->pool_id = CEPH_NOPOOL;
53688c2ecf20Sopenharmony_ci	spec->snap_id = CEPH_NOSNAP;
53698c2ecf20Sopenharmony_ci	kref_init(&spec->kref);
53708c2ecf20Sopenharmony_ci
53718c2ecf20Sopenharmony_ci	return spec;
53728c2ecf20Sopenharmony_ci}
53738c2ecf20Sopenharmony_ci
53748c2ecf20Sopenharmony_cistatic void rbd_spec_free(struct kref *kref)
53758c2ecf20Sopenharmony_ci{
53768c2ecf20Sopenharmony_ci	struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref);
53778c2ecf20Sopenharmony_ci
53788c2ecf20Sopenharmony_ci	kfree(spec->pool_name);
53798c2ecf20Sopenharmony_ci	kfree(spec->pool_ns);
53808c2ecf20Sopenharmony_ci	kfree(spec->image_id);
53818c2ecf20Sopenharmony_ci	kfree(spec->image_name);
53828c2ecf20Sopenharmony_ci	kfree(spec->snap_name);
53838c2ecf20Sopenharmony_ci	kfree(spec);
53848c2ecf20Sopenharmony_ci}
53858c2ecf20Sopenharmony_ci
53868c2ecf20Sopenharmony_cistatic void rbd_dev_free(struct rbd_device *rbd_dev)
53878c2ecf20Sopenharmony_ci{
53888c2ecf20Sopenharmony_ci	WARN_ON(rbd_dev->watch_state != RBD_WATCH_STATE_UNREGISTERED);
53898c2ecf20Sopenharmony_ci	WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_UNLOCKED);
53908c2ecf20Sopenharmony_ci
53918c2ecf20Sopenharmony_ci	ceph_oid_destroy(&rbd_dev->header_oid);
53928c2ecf20Sopenharmony_ci	ceph_oloc_destroy(&rbd_dev->header_oloc);
53938c2ecf20Sopenharmony_ci	kfree(rbd_dev->config_info);
53948c2ecf20Sopenharmony_ci
53958c2ecf20Sopenharmony_ci	rbd_put_client(rbd_dev->rbd_client);
53968c2ecf20Sopenharmony_ci	rbd_spec_put(rbd_dev->spec);
53978c2ecf20Sopenharmony_ci	kfree(rbd_dev->opts);
53988c2ecf20Sopenharmony_ci	kfree(rbd_dev);
53998c2ecf20Sopenharmony_ci}
54008c2ecf20Sopenharmony_ci
54018c2ecf20Sopenharmony_cistatic void rbd_dev_release(struct device *dev)
54028c2ecf20Sopenharmony_ci{
54038c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
54048c2ecf20Sopenharmony_ci	bool need_put = !!rbd_dev->opts;
54058c2ecf20Sopenharmony_ci
54068c2ecf20Sopenharmony_ci	if (need_put) {
54078c2ecf20Sopenharmony_ci		destroy_workqueue(rbd_dev->task_wq);
54088c2ecf20Sopenharmony_ci		ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
54098c2ecf20Sopenharmony_ci	}
54108c2ecf20Sopenharmony_ci
54118c2ecf20Sopenharmony_ci	rbd_dev_free(rbd_dev);
54128c2ecf20Sopenharmony_ci
54138c2ecf20Sopenharmony_ci	/*
54148c2ecf20Sopenharmony_ci	 * This is racy, but way better than putting module outside of
54158c2ecf20Sopenharmony_ci	 * the release callback.  The race window is pretty small, so
54168c2ecf20Sopenharmony_ci	 * doing something similar to dm (dm-builtin.c) is overkill.
54178c2ecf20Sopenharmony_ci	 */
54188c2ecf20Sopenharmony_ci	if (need_put)
54198c2ecf20Sopenharmony_ci		module_put(THIS_MODULE);
54208c2ecf20Sopenharmony_ci}
54218c2ecf20Sopenharmony_ci
54228c2ecf20Sopenharmony_cistatic struct rbd_device *__rbd_dev_create(struct rbd_spec *spec)
54238c2ecf20Sopenharmony_ci{
54248c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev;
54258c2ecf20Sopenharmony_ci
54268c2ecf20Sopenharmony_ci	rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL);
54278c2ecf20Sopenharmony_ci	if (!rbd_dev)
54288c2ecf20Sopenharmony_ci		return NULL;
54298c2ecf20Sopenharmony_ci
54308c2ecf20Sopenharmony_ci	spin_lock_init(&rbd_dev->lock);
54318c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&rbd_dev->node);
54328c2ecf20Sopenharmony_ci	init_rwsem(&rbd_dev->header_rwsem);
54338c2ecf20Sopenharmony_ci
54348c2ecf20Sopenharmony_ci	rbd_dev->header.data_pool_id = CEPH_NOPOOL;
54358c2ecf20Sopenharmony_ci	ceph_oid_init(&rbd_dev->header_oid);
54368c2ecf20Sopenharmony_ci	rbd_dev->header_oloc.pool = spec->pool_id;
54378c2ecf20Sopenharmony_ci	if (spec->pool_ns) {
54388c2ecf20Sopenharmony_ci		WARN_ON(!*spec->pool_ns);
54398c2ecf20Sopenharmony_ci		rbd_dev->header_oloc.pool_ns =
54408c2ecf20Sopenharmony_ci		    ceph_find_or_create_string(spec->pool_ns,
54418c2ecf20Sopenharmony_ci					       strlen(spec->pool_ns));
54428c2ecf20Sopenharmony_ci	}
54438c2ecf20Sopenharmony_ci
54448c2ecf20Sopenharmony_ci	mutex_init(&rbd_dev->watch_mutex);
54458c2ecf20Sopenharmony_ci	rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
54468c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&rbd_dev->watch_dwork, rbd_reregister_watch);
54478c2ecf20Sopenharmony_ci
54488c2ecf20Sopenharmony_ci	init_rwsem(&rbd_dev->lock_rwsem);
54498c2ecf20Sopenharmony_ci	rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED;
54508c2ecf20Sopenharmony_ci	INIT_WORK(&rbd_dev->acquired_lock_work, rbd_notify_acquired_lock);
54518c2ecf20Sopenharmony_ci	INIT_WORK(&rbd_dev->released_lock_work, rbd_notify_released_lock);
54528c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&rbd_dev->lock_dwork, rbd_acquire_lock);
54538c2ecf20Sopenharmony_ci	INIT_WORK(&rbd_dev->unlock_work, rbd_release_lock_work);
54548c2ecf20Sopenharmony_ci	spin_lock_init(&rbd_dev->lock_lists_lock);
54558c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&rbd_dev->acquiring_list);
54568c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&rbd_dev->running_list);
54578c2ecf20Sopenharmony_ci	init_completion(&rbd_dev->acquire_wait);
54588c2ecf20Sopenharmony_ci	init_completion(&rbd_dev->releasing_wait);
54598c2ecf20Sopenharmony_ci
54608c2ecf20Sopenharmony_ci	spin_lock_init(&rbd_dev->object_map_lock);
54618c2ecf20Sopenharmony_ci
54628c2ecf20Sopenharmony_ci	rbd_dev->dev.bus = &rbd_bus_type;
54638c2ecf20Sopenharmony_ci	rbd_dev->dev.type = &rbd_device_type;
54648c2ecf20Sopenharmony_ci	rbd_dev->dev.parent = &rbd_root_dev;
54658c2ecf20Sopenharmony_ci	device_initialize(&rbd_dev->dev);
54668c2ecf20Sopenharmony_ci
54678c2ecf20Sopenharmony_ci	return rbd_dev;
54688c2ecf20Sopenharmony_ci}
54698c2ecf20Sopenharmony_ci
54708c2ecf20Sopenharmony_ci/*
54718c2ecf20Sopenharmony_ci * Create a mapping rbd_dev.
54728c2ecf20Sopenharmony_ci */
54738c2ecf20Sopenharmony_cistatic struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
54748c2ecf20Sopenharmony_ci					 struct rbd_spec *spec,
54758c2ecf20Sopenharmony_ci					 struct rbd_options *opts)
54768c2ecf20Sopenharmony_ci{
54778c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev;
54788c2ecf20Sopenharmony_ci
54798c2ecf20Sopenharmony_ci	rbd_dev = __rbd_dev_create(spec);
54808c2ecf20Sopenharmony_ci	if (!rbd_dev)
54818c2ecf20Sopenharmony_ci		return NULL;
54828c2ecf20Sopenharmony_ci
54838c2ecf20Sopenharmony_ci	/* get an id and fill in device name */
54848c2ecf20Sopenharmony_ci	rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0,
54858c2ecf20Sopenharmony_ci					 minor_to_rbd_dev_id(1 << MINORBITS),
54868c2ecf20Sopenharmony_ci					 GFP_KERNEL);
54878c2ecf20Sopenharmony_ci	if (rbd_dev->dev_id < 0)
54888c2ecf20Sopenharmony_ci		goto fail_rbd_dev;
54898c2ecf20Sopenharmony_ci
54908c2ecf20Sopenharmony_ci	sprintf(rbd_dev->name, RBD_DRV_NAME "%d", rbd_dev->dev_id);
54918c2ecf20Sopenharmony_ci	rbd_dev->task_wq = alloc_ordered_workqueue("%s-tasks", WQ_MEM_RECLAIM,
54928c2ecf20Sopenharmony_ci						   rbd_dev->name);
54938c2ecf20Sopenharmony_ci	if (!rbd_dev->task_wq)
54948c2ecf20Sopenharmony_ci		goto fail_dev_id;
54958c2ecf20Sopenharmony_ci
54968c2ecf20Sopenharmony_ci	/* we have a ref from do_rbd_add() */
54978c2ecf20Sopenharmony_ci	__module_get(THIS_MODULE);
54988c2ecf20Sopenharmony_ci
54998c2ecf20Sopenharmony_ci	rbd_dev->rbd_client = rbdc;
55008c2ecf20Sopenharmony_ci	rbd_dev->spec = spec;
55018c2ecf20Sopenharmony_ci	rbd_dev->opts = opts;
55028c2ecf20Sopenharmony_ci
55038c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id);
55048c2ecf20Sopenharmony_ci	return rbd_dev;
55058c2ecf20Sopenharmony_ci
55068c2ecf20Sopenharmony_cifail_dev_id:
55078c2ecf20Sopenharmony_ci	ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id);
55088c2ecf20Sopenharmony_cifail_rbd_dev:
55098c2ecf20Sopenharmony_ci	rbd_dev_free(rbd_dev);
55108c2ecf20Sopenharmony_ci	return NULL;
55118c2ecf20Sopenharmony_ci}
55128c2ecf20Sopenharmony_ci
55138c2ecf20Sopenharmony_cistatic void rbd_dev_destroy(struct rbd_device *rbd_dev)
55148c2ecf20Sopenharmony_ci{
55158c2ecf20Sopenharmony_ci	if (rbd_dev)
55168c2ecf20Sopenharmony_ci		put_device(&rbd_dev->dev);
55178c2ecf20Sopenharmony_ci}
55188c2ecf20Sopenharmony_ci
55198c2ecf20Sopenharmony_ci/*
55208c2ecf20Sopenharmony_ci * Get the size and object order for an image snapshot, or if
55218c2ecf20Sopenharmony_ci * snap_id is CEPH_NOSNAP, gets this information for the base
55228c2ecf20Sopenharmony_ci * image.
55238c2ecf20Sopenharmony_ci */
55248c2ecf20Sopenharmony_cistatic int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
55258c2ecf20Sopenharmony_ci				u8 *order, u64 *snap_size)
55268c2ecf20Sopenharmony_ci{
55278c2ecf20Sopenharmony_ci	__le64 snapid = cpu_to_le64(snap_id);
55288c2ecf20Sopenharmony_ci	int ret;
55298c2ecf20Sopenharmony_ci	struct {
55308c2ecf20Sopenharmony_ci		u8 order;
55318c2ecf20Sopenharmony_ci		__le64 size;
55328c2ecf20Sopenharmony_ci	} __attribute__ ((packed)) size_buf = { 0 };
55338c2ecf20Sopenharmony_ci
55348c2ecf20Sopenharmony_ci	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
55358c2ecf20Sopenharmony_ci				  &rbd_dev->header_oloc, "get_size",
55368c2ecf20Sopenharmony_ci				  &snapid, sizeof(snapid),
55378c2ecf20Sopenharmony_ci				  &size_buf, sizeof(size_buf));
55388c2ecf20Sopenharmony_ci	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
55398c2ecf20Sopenharmony_ci	if (ret < 0)
55408c2ecf20Sopenharmony_ci		return ret;
55418c2ecf20Sopenharmony_ci	if (ret < sizeof (size_buf))
55428c2ecf20Sopenharmony_ci		return -ERANGE;
55438c2ecf20Sopenharmony_ci
55448c2ecf20Sopenharmony_ci	if (order) {
55458c2ecf20Sopenharmony_ci		*order = size_buf.order;
55468c2ecf20Sopenharmony_ci		dout("  order %u", (unsigned int)*order);
55478c2ecf20Sopenharmony_ci	}
55488c2ecf20Sopenharmony_ci	*snap_size = le64_to_cpu(size_buf.size);
55498c2ecf20Sopenharmony_ci
55508c2ecf20Sopenharmony_ci	dout("  snap_id 0x%016llx snap_size = %llu\n",
55518c2ecf20Sopenharmony_ci		(unsigned long long)snap_id,
55528c2ecf20Sopenharmony_ci		(unsigned long long)*snap_size);
55538c2ecf20Sopenharmony_ci
55548c2ecf20Sopenharmony_ci	return 0;
55558c2ecf20Sopenharmony_ci}
55568c2ecf20Sopenharmony_ci
55578c2ecf20Sopenharmony_cistatic int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev,
55588c2ecf20Sopenharmony_ci				    char **pobject_prefix)
55598c2ecf20Sopenharmony_ci{
55608c2ecf20Sopenharmony_ci	size_t size;
55618c2ecf20Sopenharmony_ci	void *reply_buf;
55628c2ecf20Sopenharmony_ci	char *object_prefix;
55638c2ecf20Sopenharmony_ci	int ret;
55648c2ecf20Sopenharmony_ci	void *p;
55658c2ecf20Sopenharmony_ci
55668c2ecf20Sopenharmony_ci	/* Response will be an encoded string, which includes a length */
55678c2ecf20Sopenharmony_ci	size = sizeof(__le32) + RBD_OBJ_PREFIX_LEN_MAX;
55688c2ecf20Sopenharmony_ci	reply_buf = kzalloc(size, GFP_KERNEL);
55698c2ecf20Sopenharmony_ci	if (!reply_buf)
55708c2ecf20Sopenharmony_ci		return -ENOMEM;
55718c2ecf20Sopenharmony_ci
55728c2ecf20Sopenharmony_ci	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
55738c2ecf20Sopenharmony_ci				  &rbd_dev->header_oloc, "get_object_prefix",
55748c2ecf20Sopenharmony_ci				  NULL, 0, reply_buf, size);
55758c2ecf20Sopenharmony_ci	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
55768c2ecf20Sopenharmony_ci	if (ret < 0)
55778c2ecf20Sopenharmony_ci		goto out;
55788c2ecf20Sopenharmony_ci
55798c2ecf20Sopenharmony_ci	p = reply_buf;
55808c2ecf20Sopenharmony_ci	object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL,
55818c2ecf20Sopenharmony_ci						    GFP_NOIO);
55828c2ecf20Sopenharmony_ci	if (IS_ERR(object_prefix)) {
55838c2ecf20Sopenharmony_ci		ret = PTR_ERR(object_prefix);
55848c2ecf20Sopenharmony_ci		goto out;
55858c2ecf20Sopenharmony_ci	}
55868c2ecf20Sopenharmony_ci	ret = 0;
55878c2ecf20Sopenharmony_ci
55888c2ecf20Sopenharmony_ci	*pobject_prefix = object_prefix;
55898c2ecf20Sopenharmony_ci	dout("  object_prefix = %s\n", object_prefix);
55908c2ecf20Sopenharmony_ciout:
55918c2ecf20Sopenharmony_ci	kfree(reply_buf);
55928c2ecf20Sopenharmony_ci
55938c2ecf20Sopenharmony_ci	return ret;
55948c2ecf20Sopenharmony_ci}
55958c2ecf20Sopenharmony_ci
55968c2ecf20Sopenharmony_cistatic int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
55978c2ecf20Sopenharmony_ci				     bool read_only, u64 *snap_features)
55988c2ecf20Sopenharmony_ci{
55998c2ecf20Sopenharmony_ci	struct {
56008c2ecf20Sopenharmony_ci		__le64 snap_id;
56018c2ecf20Sopenharmony_ci		u8 read_only;
56028c2ecf20Sopenharmony_ci	} features_in;
56038c2ecf20Sopenharmony_ci	struct {
56048c2ecf20Sopenharmony_ci		__le64 features;
56058c2ecf20Sopenharmony_ci		__le64 incompat;
56068c2ecf20Sopenharmony_ci	} __attribute__ ((packed)) features_buf = { 0 };
56078c2ecf20Sopenharmony_ci	u64 unsup;
56088c2ecf20Sopenharmony_ci	int ret;
56098c2ecf20Sopenharmony_ci
56108c2ecf20Sopenharmony_ci	features_in.snap_id = cpu_to_le64(snap_id);
56118c2ecf20Sopenharmony_ci	features_in.read_only = read_only;
56128c2ecf20Sopenharmony_ci
56138c2ecf20Sopenharmony_ci	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
56148c2ecf20Sopenharmony_ci				  &rbd_dev->header_oloc, "get_features",
56158c2ecf20Sopenharmony_ci				  &features_in, sizeof(features_in),
56168c2ecf20Sopenharmony_ci				  &features_buf, sizeof(features_buf));
56178c2ecf20Sopenharmony_ci	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
56188c2ecf20Sopenharmony_ci	if (ret < 0)
56198c2ecf20Sopenharmony_ci		return ret;
56208c2ecf20Sopenharmony_ci	if (ret < sizeof (features_buf))
56218c2ecf20Sopenharmony_ci		return -ERANGE;
56228c2ecf20Sopenharmony_ci
56238c2ecf20Sopenharmony_ci	unsup = le64_to_cpu(features_buf.incompat) & ~RBD_FEATURES_SUPPORTED;
56248c2ecf20Sopenharmony_ci	if (unsup) {
56258c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "image uses unsupported features: 0x%llx",
56268c2ecf20Sopenharmony_ci			 unsup);
56278c2ecf20Sopenharmony_ci		return -ENXIO;
56288c2ecf20Sopenharmony_ci	}
56298c2ecf20Sopenharmony_ci
56308c2ecf20Sopenharmony_ci	*snap_features = le64_to_cpu(features_buf.features);
56318c2ecf20Sopenharmony_ci
56328c2ecf20Sopenharmony_ci	dout("  snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n",
56338c2ecf20Sopenharmony_ci		(unsigned long long)snap_id,
56348c2ecf20Sopenharmony_ci		(unsigned long long)*snap_features,
56358c2ecf20Sopenharmony_ci		(unsigned long long)le64_to_cpu(features_buf.incompat));
56368c2ecf20Sopenharmony_ci
56378c2ecf20Sopenharmony_ci	return 0;
56388c2ecf20Sopenharmony_ci}
56398c2ecf20Sopenharmony_ci
56408c2ecf20Sopenharmony_ci/*
56418c2ecf20Sopenharmony_ci * These are generic image flags, but since they are used only for
56428c2ecf20Sopenharmony_ci * object map, store them in rbd_dev->object_map_flags.
56438c2ecf20Sopenharmony_ci *
56448c2ecf20Sopenharmony_ci * For the same reason, this function is called only on object map
56458c2ecf20Sopenharmony_ci * (re)load and not on header refresh.
56468c2ecf20Sopenharmony_ci */
56478c2ecf20Sopenharmony_cistatic int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev)
56488c2ecf20Sopenharmony_ci{
56498c2ecf20Sopenharmony_ci	__le64 snapid = cpu_to_le64(rbd_dev->spec->snap_id);
56508c2ecf20Sopenharmony_ci	__le64 flags;
56518c2ecf20Sopenharmony_ci	int ret;
56528c2ecf20Sopenharmony_ci
56538c2ecf20Sopenharmony_ci	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
56548c2ecf20Sopenharmony_ci				  &rbd_dev->header_oloc, "get_flags",
56558c2ecf20Sopenharmony_ci				  &snapid, sizeof(snapid),
56568c2ecf20Sopenharmony_ci				  &flags, sizeof(flags));
56578c2ecf20Sopenharmony_ci	if (ret < 0)
56588c2ecf20Sopenharmony_ci		return ret;
56598c2ecf20Sopenharmony_ci	if (ret < sizeof(flags))
56608c2ecf20Sopenharmony_ci		return -EBADMSG;
56618c2ecf20Sopenharmony_ci
56628c2ecf20Sopenharmony_ci	rbd_dev->object_map_flags = le64_to_cpu(flags);
56638c2ecf20Sopenharmony_ci	return 0;
56648c2ecf20Sopenharmony_ci}
56658c2ecf20Sopenharmony_ci
56668c2ecf20Sopenharmony_cistruct parent_image_info {
56678c2ecf20Sopenharmony_ci	u64		pool_id;
56688c2ecf20Sopenharmony_ci	const char	*pool_ns;
56698c2ecf20Sopenharmony_ci	const char	*image_id;
56708c2ecf20Sopenharmony_ci	u64		snap_id;
56718c2ecf20Sopenharmony_ci
56728c2ecf20Sopenharmony_ci	bool		has_overlap;
56738c2ecf20Sopenharmony_ci	u64		overlap;
56748c2ecf20Sopenharmony_ci};
56758c2ecf20Sopenharmony_ci
56768c2ecf20Sopenharmony_cistatic void rbd_parent_info_cleanup(struct parent_image_info *pii)
56778c2ecf20Sopenharmony_ci{
56788c2ecf20Sopenharmony_ci	kfree(pii->pool_ns);
56798c2ecf20Sopenharmony_ci	kfree(pii->image_id);
56808c2ecf20Sopenharmony_ci
56818c2ecf20Sopenharmony_ci	memset(pii, 0, sizeof(*pii));
56828c2ecf20Sopenharmony_ci}
56838c2ecf20Sopenharmony_ci
56848c2ecf20Sopenharmony_ci/*
56858c2ecf20Sopenharmony_ci * The caller is responsible for @pii.
56868c2ecf20Sopenharmony_ci */
56878c2ecf20Sopenharmony_cistatic int decode_parent_image_spec(void **p, void *end,
56888c2ecf20Sopenharmony_ci				    struct parent_image_info *pii)
56898c2ecf20Sopenharmony_ci{
56908c2ecf20Sopenharmony_ci	u8 struct_v;
56918c2ecf20Sopenharmony_ci	u32 struct_len;
56928c2ecf20Sopenharmony_ci	int ret;
56938c2ecf20Sopenharmony_ci
56948c2ecf20Sopenharmony_ci	ret = ceph_start_decoding(p, end, 1, "ParentImageSpec",
56958c2ecf20Sopenharmony_ci				  &struct_v, &struct_len);
56968c2ecf20Sopenharmony_ci	if (ret)
56978c2ecf20Sopenharmony_ci		return ret;
56988c2ecf20Sopenharmony_ci
56998c2ecf20Sopenharmony_ci	ceph_decode_64_safe(p, end, pii->pool_id, e_inval);
57008c2ecf20Sopenharmony_ci	pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
57018c2ecf20Sopenharmony_ci	if (IS_ERR(pii->pool_ns)) {
57028c2ecf20Sopenharmony_ci		ret = PTR_ERR(pii->pool_ns);
57038c2ecf20Sopenharmony_ci		pii->pool_ns = NULL;
57048c2ecf20Sopenharmony_ci		return ret;
57058c2ecf20Sopenharmony_ci	}
57068c2ecf20Sopenharmony_ci	pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
57078c2ecf20Sopenharmony_ci	if (IS_ERR(pii->image_id)) {
57088c2ecf20Sopenharmony_ci		ret = PTR_ERR(pii->image_id);
57098c2ecf20Sopenharmony_ci		pii->image_id = NULL;
57108c2ecf20Sopenharmony_ci		return ret;
57118c2ecf20Sopenharmony_ci	}
57128c2ecf20Sopenharmony_ci	ceph_decode_64_safe(p, end, pii->snap_id, e_inval);
57138c2ecf20Sopenharmony_ci	return 0;
57148c2ecf20Sopenharmony_ci
57158c2ecf20Sopenharmony_cie_inval:
57168c2ecf20Sopenharmony_ci	return -EINVAL;
57178c2ecf20Sopenharmony_ci}
57188c2ecf20Sopenharmony_ci
57198c2ecf20Sopenharmony_cistatic int __get_parent_info(struct rbd_device *rbd_dev,
57208c2ecf20Sopenharmony_ci			     struct page *req_page,
57218c2ecf20Sopenharmony_ci			     struct page *reply_page,
57228c2ecf20Sopenharmony_ci			     struct parent_image_info *pii)
57238c2ecf20Sopenharmony_ci{
57248c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
57258c2ecf20Sopenharmony_ci	size_t reply_len = PAGE_SIZE;
57268c2ecf20Sopenharmony_ci	void *p, *end;
57278c2ecf20Sopenharmony_ci	int ret;
57288c2ecf20Sopenharmony_ci
57298c2ecf20Sopenharmony_ci	ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
57308c2ecf20Sopenharmony_ci			     "rbd", "parent_get", CEPH_OSD_FLAG_READ,
57318c2ecf20Sopenharmony_ci			     req_page, sizeof(u64), &reply_page, &reply_len);
57328c2ecf20Sopenharmony_ci	if (ret)
57338c2ecf20Sopenharmony_ci		return ret == -EOPNOTSUPP ? 1 : ret;
57348c2ecf20Sopenharmony_ci
57358c2ecf20Sopenharmony_ci	p = page_address(reply_page);
57368c2ecf20Sopenharmony_ci	end = p + reply_len;
57378c2ecf20Sopenharmony_ci	ret = decode_parent_image_spec(&p, end, pii);
57388c2ecf20Sopenharmony_ci	if (ret)
57398c2ecf20Sopenharmony_ci		return ret;
57408c2ecf20Sopenharmony_ci
57418c2ecf20Sopenharmony_ci	ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
57428c2ecf20Sopenharmony_ci			     "rbd", "parent_overlap_get", CEPH_OSD_FLAG_READ,
57438c2ecf20Sopenharmony_ci			     req_page, sizeof(u64), &reply_page, &reply_len);
57448c2ecf20Sopenharmony_ci	if (ret)
57458c2ecf20Sopenharmony_ci		return ret;
57468c2ecf20Sopenharmony_ci
57478c2ecf20Sopenharmony_ci	p = page_address(reply_page);
57488c2ecf20Sopenharmony_ci	end = p + reply_len;
57498c2ecf20Sopenharmony_ci	ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval);
57508c2ecf20Sopenharmony_ci	if (pii->has_overlap)
57518c2ecf20Sopenharmony_ci		ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
57528c2ecf20Sopenharmony_ci
57538c2ecf20Sopenharmony_ci	dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
57548c2ecf20Sopenharmony_ci	     __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
57558c2ecf20Sopenharmony_ci	     pii->has_overlap, pii->overlap);
57568c2ecf20Sopenharmony_ci	return 0;
57578c2ecf20Sopenharmony_ci
57588c2ecf20Sopenharmony_cie_inval:
57598c2ecf20Sopenharmony_ci	return -EINVAL;
57608c2ecf20Sopenharmony_ci}
57618c2ecf20Sopenharmony_ci
57628c2ecf20Sopenharmony_ci/*
57638c2ecf20Sopenharmony_ci * The caller is responsible for @pii.
57648c2ecf20Sopenharmony_ci */
57658c2ecf20Sopenharmony_cistatic int __get_parent_info_legacy(struct rbd_device *rbd_dev,
57668c2ecf20Sopenharmony_ci				    struct page *req_page,
57678c2ecf20Sopenharmony_ci				    struct page *reply_page,
57688c2ecf20Sopenharmony_ci				    struct parent_image_info *pii)
57698c2ecf20Sopenharmony_ci{
57708c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
57718c2ecf20Sopenharmony_ci	size_t reply_len = PAGE_SIZE;
57728c2ecf20Sopenharmony_ci	void *p, *end;
57738c2ecf20Sopenharmony_ci	int ret;
57748c2ecf20Sopenharmony_ci
57758c2ecf20Sopenharmony_ci	ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
57768c2ecf20Sopenharmony_ci			     "rbd", "get_parent", CEPH_OSD_FLAG_READ,
57778c2ecf20Sopenharmony_ci			     req_page, sizeof(u64), &reply_page, &reply_len);
57788c2ecf20Sopenharmony_ci	if (ret)
57798c2ecf20Sopenharmony_ci		return ret;
57808c2ecf20Sopenharmony_ci
57818c2ecf20Sopenharmony_ci	p = page_address(reply_page);
57828c2ecf20Sopenharmony_ci	end = p + reply_len;
57838c2ecf20Sopenharmony_ci	ceph_decode_64_safe(&p, end, pii->pool_id, e_inval);
57848c2ecf20Sopenharmony_ci	pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
57858c2ecf20Sopenharmony_ci	if (IS_ERR(pii->image_id)) {
57868c2ecf20Sopenharmony_ci		ret = PTR_ERR(pii->image_id);
57878c2ecf20Sopenharmony_ci		pii->image_id = NULL;
57888c2ecf20Sopenharmony_ci		return ret;
57898c2ecf20Sopenharmony_ci	}
57908c2ecf20Sopenharmony_ci	ceph_decode_64_safe(&p, end, pii->snap_id, e_inval);
57918c2ecf20Sopenharmony_ci	pii->has_overlap = true;
57928c2ecf20Sopenharmony_ci	ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
57938c2ecf20Sopenharmony_ci
57948c2ecf20Sopenharmony_ci	dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
57958c2ecf20Sopenharmony_ci	     __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
57968c2ecf20Sopenharmony_ci	     pii->has_overlap, pii->overlap);
57978c2ecf20Sopenharmony_ci	return 0;
57988c2ecf20Sopenharmony_ci
57998c2ecf20Sopenharmony_cie_inval:
58008c2ecf20Sopenharmony_ci	return -EINVAL;
58018c2ecf20Sopenharmony_ci}
58028c2ecf20Sopenharmony_ci
58038c2ecf20Sopenharmony_cistatic int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev,
58048c2ecf20Sopenharmony_ci				  struct parent_image_info *pii)
58058c2ecf20Sopenharmony_ci{
58068c2ecf20Sopenharmony_ci	struct page *req_page, *reply_page;
58078c2ecf20Sopenharmony_ci	void *p;
58088c2ecf20Sopenharmony_ci	int ret;
58098c2ecf20Sopenharmony_ci
58108c2ecf20Sopenharmony_ci	req_page = alloc_page(GFP_KERNEL);
58118c2ecf20Sopenharmony_ci	if (!req_page)
58128c2ecf20Sopenharmony_ci		return -ENOMEM;
58138c2ecf20Sopenharmony_ci
58148c2ecf20Sopenharmony_ci	reply_page = alloc_page(GFP_KERNEL);
58158c2ecf20Sopenharmony_ci	if (!reply_page) {
58168c2ecf20Sopenharmony_ci		__free_page(req_page);
58178c2ecf20Sopenharmony_ci		return -ENOMEM;
58188c2ecf20Sopenharmony_ci	}
58198c2ecf20Sopenharmony_ci
58208c2ecf20Sopenharmony_ci	p = page_address(req_page);
58218c2ecf20Sopenharmony_ci	ceph_encode_64(&p, rbd_dev->spec->snap_id);
58228c2ecf20Sopenharmony_ci	ret = __get_parent_info(rbd_dev, req_page, reply_page, pii);
58238c2ecf20Sopenharmony_ci	if (ret > 0)
58248c2ecf20Sopenharmony_ci		ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page,
58258c2ecf20Sopenharmony_ci					       pii);
58268c2ecf20Sopenharmony_ci
58278c2ecf20Sopenharmony_ci	__free_page(req_page);
58288c2ecf20Sopenharmony_ci	__free_page(reply_page);
58298c2ecf20Sopenharmony_ci	return ret;
58308c2ecf20Sopenharmony_ci}
58318c2ecf20Sopenharmony_ci
58328c2ecf20Sopenharmony_cistatic int rbd_dev_setup_parent(struct rbd_device *rbd_dev)
58338c2ecf20Sopenharmony_ci{
58348c2ecf20Sopenharmony_ci	struct rbd_spec *parent_spec;
58358c2ecf20Sopenharmony_ci	struct parent_image_info pii = { 0 };
58368c2ecf20Sopenharmony_ci	int ret;
58378c2ecf20Sopenharmony_ci
58388c2ecf20Sopenharmony_ci	parent_spec = rbd_spec_alloc();
58398c2ecf20Sopenharmony_ci	if (!parent_spec)
58408c2ecf20Sopenharmony_ci		return -ENOMEM;
58418c2ecf20Sopenharmony_ci
58428c2ecf20Sopenharmony_ci	ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
58438c2ecf20Sopenharmony_ci	if (ret)
58448c2ecf20Sopenharmony_ci		goto out_err;
58458c2ecf20Sopenharmony_ci
58468c2ecf20Sopenharmony_ci	if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap)
58478c2ecf20Sopenharmony_ci		goto out;	/* No parent?  No problem. */
58488c2ecf20Sopenharmony_ci
58498c2ecf20Sopenharmony_ci	/* The ceph file layout needs to fit pool id in 32 bits */
58508c2ecf20Sopenharmony_ci
58518c2ecf20Sopenharmony_ci	ret = -EIO;
58528c2ecf20Sopenharmony_ci	if (pii.pool_id > (u64)U32_MAX) {
58538c2ecf20Sopenharmony_ci		rbd_warn(NULL, "parent pool id too large (%llu > %u)",
58548c2ecf20Sopenharmony_ci			(unsigned long long)pii.pool_id, U32_MAX);
58558c2ecf20Sopenharmony_ci		goto out_err;
58568c2ecf20Sopenharmony_ci	}
58578c2ecf20Sopenharmony_ci
58588c2ecf20Sopenharmony_ci	/*
58598c2ecf20Sopenharmony_ci	 * The parent won't change except when the clone is flattened,
58608c2ecf20Sopenharmony_ci	 * so we only need to record the parent image spec once.
58618c2ecf20Sopenharmony_ci	 */
58628c2ecf20Sopenharmony_ci	parent_spec->pool_id = pii.pool_id;
58638c2ecf20Sopenharmony_ci	if (pii.pool_ns && *pii.pool_ns) {
58648c2ecf20Sopenharmony_ci		parent_spec->pool_ns = pii.pool_ns;
58658c2ecf20Sopenharmony_ci		pii.pool_ns = NULL;
58668c2ecf20Sopenharmony_ci	}
58678c2ecf20Sopenharmony_ci	parent_spec->image_id = pii.image_id;
58688c2ecf20Sopenharmony_ci	pii.image_id = NULL;
58698c2ecf20Sopenharmony_ci	parent_spec->snap_id = pii.snap_id;
58708c2ecf20Sopenharmony_ci
58718c2ecf20Sopenharmony_ci	rbd_assert(!rbd_dev->parent_spec);
58728c2ecf20Sopenharmony_ci	rbd_dev->parent_spec = parent_spec;
58738c2ecf20Sopenharmony_ci	parent_spec = NULL;	/* rbd_dev now owns this */
58748c2ecf20Sopenharmony_ci
58758c2ecf20Sopenharmony_ci	/*
58768c2ecf20Sopenharmony_ci	 * Record the parent overlap.  If it's zero, issue a warning as
58778c2ecf20Sopenharmony_ci	 * we will proceed as if there is no parent.
58788c2ecf20Sopenharmony_ci	 */
58798c2ecf20Sopenharmony_ci	if (!pii.overlap)
58808c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
58818c2ecf20Sopenharmony_ci	rbd_dev->parent_overlap = pii.overlap;
58828c2ecf20Sopenharmony_ci
58838c2ecf20Sopenharmony_ciout:
58848c2ecf20Sopenharmony_ci	ret = 0;
58858c2ecf20Sopenharmony_ciout_err:
58868c2ecf20Sopenharmony_ci	rbd_parent_info_cleanup(&pii);
58878c2ecf20Sopenharmony_ci	rbd_spec_put(parent_spec);
58888c2ecf20Sopenharmony_ci	return ret;
58898c2ecf20Sopenharmony_ci}
58908c2ecf20Sopenharmony_ci
58918c2ecf20Sopenharmony_cistatic int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev,
58928c2ecf20Sopenharmony_ci				    u64 *stripe_unit, u64 *stripe_count)
58938c2ecf20Sopenharmony_ci{
58948c2ecf20Sopenharmony_ci	struct {
58958c2ecf20Sopenharmony_ci		__le64 stripe_unit;
58968c2ecf20Sopenharmony_ci		__le64 stripe_count;
58978c2ecf20Sopenharmony_ci	} __attribute__ ((packed)) striping_info_buf = { 0 };
58988c2ecf20Sopenharmony_ci	size_t size = sizeof (striping_info_buf);
58998c2ecf20Sopenharmony_ci	int ret;
59008c2ecf20Sopenharmony_ci
59018c2ecf20Sopenharmony_ci	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
59028c2ecf20Sopenharmony_ci				&rbd_dev->header_oloc, "get_stripe_unit_count",
59038c2ecf20Sopenharmony_ci				NULL, 0, &striping_info_buf, size);
59048c2ecf20Sopenharmony_ci	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
59058c2ecf20Sopenharmony_ci	if (ret < 0)
59068c2ecf20Sopenharmony_ci		return ret;
59078c2ecf20Sopenharmony_ci	if (ret < size)
59088c2ecf20Sopenharmony_ci		return -ERANGE;
59098c2ecf20Sopenharmony_ci
59108c2ecf20Sopenharmony_ci	*stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit);
59118c2ecf20Sopenharmony_ci	*stripe_count = le64_to_cpu(striping_info_buf.stripe_count);
59128c2ecf20Sopenharmony_ci	dout("  stripe_unit = %llu stripe_count = %llu\n", *stripe_unit,
59138c2ecf20Sopenharmony_ci	     *stripe_count);
59148c2ecf20Sopenharmony_ci
59158c2ecf20Sopenharmony_ci	return 0;
59168c2ecf20Sopenharmony_ci}
59178c2ecf20Sopenharmony_ci
59188c2ecf20Sopenharmony_cistatic int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id)
59198c2ecf20Sopenharmony_ci{
59208c2ecf20Sopenharmony_ci	__le64 data_pool_buf;
59218c2ecf20Sopenharmony_ci	int ret;
59228c2ecf20Sopenharmony_ci
59238c2ecf20Sopenharmony_ci	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
59248c2ecf20Sopenharmony_ci				  &rbd_dev->header_oloc, "get_data_pool",
59258c2ecf20Sopenharmony_ci				  NULL, 0, &data_pool_buf,
59268c2ecf20Sopenharmony_ci				  sizeof(data_pool_buf));
59278c2ecf20Sopenharmony_ci	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
59288c2ecf20Sopenharmony_ci	if (ret < 0)
59298c2ecf20Sopenharmony_ci		return ret;
59308c2ecf20Sopenharmony_ci	if (ret < sizeof(data_pool_buf))
59318c2ecf20Sopenharmony_ci		return -EBADMSG;
59328c2ecf20Sopenharmony_ci
59338c2ecf20Sopenharmony_ci	*data_pool_id = le64_to_cpu(data_pool_buf);
59348c2ecf20Sopenharmony_ci	dout("  data_pool_id = %lld\n", *data_pool_id);
59358c2ecf20Sopenharmony_ci	WARN_ON(*data_pool_id == CEPH_NOPOOL);
59368c2ecf20Sopenharmony_ci
59378c2ecf20Sopenharmony_ci	return 0;
59388c2ecf20Sopenharmony_ci}
59398c2ecf20Sopenharmony_ci
59408c2ecf20Sopenharmony_cistatic char *rbd_dev_image_name(struct rbd_device *rbd_dev)
59418c2ecf20Sopenharmony_ci{
59428c2ecf20Sopenharmony_ci	CEPH_DEFINE_OID_ONSTACK(oid);
59438c2ecf20Sopenharmony_ci	size_t image_id_size;
59448c2ecf20Sopenharmony_ci	char *image_id;
59458c2ecf20Sopenharmony_ci	void *p;
59468c2ecf20Sopenharmony_ci	void *end;
59478c2ecf20Sopenharmony_ci	size_t size;
59488c2ecf20Sopenharmony_ci	void *reply_buf = NULL;
59498c2ecf20Sopenharmony_ci	size_t len = 0;
59508c2ecf20Sopenharmony_ci	char *image_name = NULL;
59518c2ecf20Sopenharmony_ci	int ret;
59528c2ecf20Sopenharmony_ci
59538c2ecf20Sopenharmony_ci	rbd_assert(!rbd_dev->spec->image_name);
59548c2ecf20Sopenharmony_ci
59558c2ecf20Sopenharmony_ci	len = strlen(rbd_dev->spec->image_id);
59568c2ecf20Sopenharmony_ci	image_id_size = sizeof (__le32) + len;
59578c2ecf20Sopenharmony_ci	image_id = kmalloc(image_id_size, GFP_KERNEL);
59588c2ecf20Sopenharmony_ci	if (!image_id)
59598c2ecf20Sopenharmony_ci		return NULL;
59608c2ecf20Sopenharmony_ci
59618c2ecf20Sopenharmony_ci	p = image_id;
59628c2ecf20Sopenharmony_ci	end = image_id + image_id_size;
59638c2ecf20Sopenharmony_ci	ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32)len);
59648c2ecf20Sopenharmony_ci
59658c2ecf20Sopenharmony_ci	size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX;
59668c2ecf20Sopenharmony_ci	reply_buf = kmalloc(size, GFP_KERNEL);
59678c2ecf20Sopenharmony_ci	if (!reply_buf)
59688c2ecf20Sopenharmony_ci		goto out;
59698c2ecf20Sopenharmony_ci
59708c2ecf20Sopenharmony_ci	ceph_oid_printf(&oid, "%s", RBD_DIRECTORY);
59718c2ecf20Sopenharmony_ci	ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
59728c2ecf20Sopenharmony_ci				  "dir_get_name", image_id, image_id_size,
59738c2ecf20Sopenharmony_ci				  reply_buf, size);
59748c2ecf20Sopenharmony_ci	if (ret < 0)
59758c2ecf20Sopenharmony_ci		goto out;
59768c2ecf20Sopenharmony_ci	p = reply_buf;
59778c2ecf20Sopenharmony_ci	end = reply_buf + ret;
59788c2ecf20Sopenharmony_ci
59798c2ecf20Sopenharmony_ci	image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL);
59808c2ecf20Sopenharmony_ci	if (IS_ERR(image_name))
59818c2ecf20Sopenharmony_ci		image_name = NULL;
59828c2ecf20Sopenharmony_ci	else
59838c2ecf20Sopenharmony_ci		dout("%s: name is %s len is %zd\n", __func__, image_name, len);
59848c2ecf20Sopenharmony_ciout:
59858c2ecf20Sopenharmony_ci	kfree(reply_buf);
59868c2ecf20Sopenharmony_ci	kfree(image_id);
59878c2ecf20Sopenharmony_ci
59888c2ecf20Sopenharmony_ci	return image_name;
59898c2ecf20Sopenharmony_ci}
59908c2ecf20Sopenharmony_ci
59918c2ecf20Sopenharmony_cistatic u64 rbd_v1_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
59928c2ecf20Sopenharmony_ci{
59938c2ecf20Sopenharmony_ci	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
59948c2ecf20Sopenharmony_ci	const char *snap_name;
59958c2ecf20Sopenharmony_ci	u32 which = 0;
59968c2ecf20Sopenharmony_ci
59978c2ecf20Sopenharmony_ci	/* Skip over names until we find the one we are looking for */
59988c2ecf20Sopenharmony_ci
59998c2ecf20Sopenharmony_ci	snap_name = rbd_dev->header.snap_names;
60008c2ecf20Sopenharmony_ci	while (which < snapc->num_snaps) {
60018c2ecf20Sopenharmony_ci		if (!strcmp(name, snap_name))
60028c2ecf20Sopenharmony_ci			return snapc->snaps[which];
60038c2ecf20Sopenharmony_ci		snap_name += strlen(snap_name) + 1;
60048c2ecf20Sopenharmony_ci		which++;
60058c2ecf20Sopenharmony_ci	}
60068c2ecf20Sopenharmony_ci	return CEPH_NOSNAP;
60078c2ecf20Sopenharmony_ci}
60088c2ecf20Sopenharmony_ci
60098c2ecf20Sopenharmony_cistatic u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
60108c2ecf20Sopenharmony_ci{
60118c2ecf20Sopenharmony_ci	struct ceph_snap_context *snapc = rbd_dev->header.snapc;
60128c2ecf20Sopenharmony_ci	u32 which;
60138c2ecf20Sopenharmony_ci	bool found = false;
60148c2ecf20Sopenharmony_ci	u64 snap_id;
60158c2ecf20Sopenharmony_ci
60168c2ecf20Sopenharmony_ci	for (which = 0; !found && which < snapc->num_snaps; which++) {
60178c2ecf20Sopenharmony_ci		const char *snap_name;
60188c2ecf20Sopenharmony_ci
60198c2ecf20Sopenharmony_ci		snap_id = snapc->snaps[which];
60208c2ecf20Sopenharmony_ci		snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id);
60218c2ecf20Sopenharmony_ci		if (IS_ERR(snap_name)) {
60228c2ecf20Sopenharmony_ci			/* ignore no-longer existing snapshots */
60238c2ecf20Sopenharmony_ci			if (PTR_ERR(snap_name) == -ENOENT)
60248c2ecf20Sopenharmony_ci				continue;
60258c2ecf20Sopenharmony_ci			else
60268c2ecf20Sopenharmony_ci				break;
60278c2ecf20Sopenharmony_ci		}
60288c2ecf20Sopenharmony_ci		found = !strcmp(name, snap_name);
60298c2ecf20Sopenharmony_ci		kfree(snap_name);
60308c2ecf20Sopenharmony_ci	}
60318c2ecf20Sopenharmony_ci	return found ? snap_id : CEPH_NOSNAP;
60328c2ecf20Sopenharmony_ci}
60338c2ecf20Sopenharmony_ci
60348c2ecf20Sopenharmony_ci/*
60358c2ecf20Sopenharmony_ci * Assumes name is never RBD_SNAP_HEAD_NAME; returns CEPH_NOSNAP if
60368c2ecf20Sopenharmony_ci * no snapshot by that name is found, or if an error occurs.
60378c2ecf20Sopenharmony_ci */
60388c2ecf20Sopenharmony_cistatic u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
60398c2ecf20Sopenharmony_ci{
60408c2ecf20Sopenharmony_ci	if (rbd_dev->image_format == 1)
60418c2ecf20Sopenharmony_ci		return rbd_v1_snap_id_by_name(rbd_dev, name);
60428c2ecf20Sopenharmony_ci
60438c2ecf20Sopenharmony_ci	return rbd_v2_snap_id_by_name(rbd_dev, name);
60448c2ecf20Sopenharmony_ci}
60458c2ecf20Sopenharmony_ci
60468c2ecf20Sopenharmony_ci/*
60478c2ecf20Sopenharmony_ci * An image being mapped will have everything but the snap id.
60488c2ecf20Sopenharmony_ci */
60498c2ecf20Sopenharmony_cistatic int rbd_spec_fill_snap_id(struct rbd_device *rbd_dev)
60508c2ecf20Sopenharmony_ci{
60518c2ecf20Sopenharmony_ci	struct rbd_spec *spec = rbd_dev->spec;
60528c2ecf20Sopenharmony_ci
60538c2ecf20Sopenharmony_ci	rbd_assert(spec->pool_id != CEPH_NOPOOL && spec->pool_name);
60548c2ecf20Sopenharmony_ci	rbd_assert(spec->image_id && spec->image_name);
60558c2ecf20Sopenharmony_ci	rbd_assert(spec->snap_name);
60568c2ecf20Sopenharmony_ci
60578c2ecf20Sopenharmony_ci	if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) {
60588c2ecf20Sopenharmony_ci		u64 snap_id;
60598c2ecf20Sopenharmony_ci
60608c2ecf20Sopenharmony_ci		snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name);
60618c2ecf20Sopenharmony_ci		if (snap_id == CEPH_NOSNAP)
60628c2ecf20Sopenharmony_ci			return -ENOENT;
60638c2ecf20Sopenharmony_ci
60648c2ecf20Sopenharmony_ci		spec->snap_id = snap_id;
60658c2ecf20Sopenharmony_ci	} else {
60668c2ecf20Sopenharmony_ci		spec->snap_id = CEPH_NOSNAP;
60678c2ecf20Sopenharmony_ci	}
60688c2ecf20Sopenharmony_ci
60698c2ecf20Sopenharmony_ci	return 0;
60708c2ecf20Sopenharmony_ci}
60718c2ecf20Sopenharmony_ci
60728c2ecf20Sopenharmony_ci/*
60738c2ecf20Sopenharmony_ci * A parent image will have all ids but none of the names.
60748c2ecf20Sopenharmony_ci *
60758c2ecf20Sopenharmony_ci * All names in an rbd spec are dynamically allocated.  It's OK if we
60768c2ecf20Sopenharmony_ci * can't figure out the name for an image id.
60778c2ecf20Sopenharmony_ci */
60788c2ecf20Sopenharmony_cistatic int rbd_spec_fill_names(struct rbd_device *rbd_dev)
60798c2ecf20Sopenharmony_ci{
60808c2ecf20Sopenharmony_ci	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
60818c2ecf20Sopenharmony_ci	struct rbd_spec *spec = rbd_dev->spec;
60828c2ecf20Sopenharmony_ci	const char *pool_name;
60838c2ecf20Sopenharmony_ci	const char *image_name;
60848c2ecf20Sopenharmony_ci	const char *snap_name;
60858c2ecf20Sopenharmony_ci	int ret;
60868c2ecf20Sopenharmony_ci
60878c2ecf20Sopenharmony_ci	rbd_assert(spec->pool_id != CEPH_NOPOOL);
60888c2ecf20Sopenharmony_ci	rbd_assert(spec->image_id);
60898c2ecf20Sopenharmony_ci	rbd_assert(spec->snap_id != CEPH_NOSNAP);
60908c2ecf20Sopenharmony_ci
60918c2ecf20Sopenharmony_ci	/* Get the pool name; we have to make our own copy of this */
60928c2ecf20Sopenharmony_ci
60938c2ecf20Sopenharmony_ci	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, spec->pool_id);
60948c2ecf20Sopenharmony_ci	if (!pool_name) {
60958c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "no pool with id %llu", spec->pool_id);
60968c2ecf20Sopenharmony_ci		return -EIO;
60978c2ecf20Sopenharmony_ci	}
60988c2ecf20Sopenharmony_ci	pool_name = kstrdup(pool_name, GFP_KERNEL);
60998c2ecf20Sopenharmony_ci	if (!pool_name)
61008c2ecf20Sopenharmony_ci		return -ENOMEM;
61018c2ecf20Sopenharmony_ci
61028c2ecf20Sopenharmony_ci	/* Fetch the image name; tolerate failure here */
61038c2ecf20Sopenharmony_ci
61048c2ecf20Sopenharmony_ci	image_name = rbd_dev_image_name(rbd_dev);
61058c2ecf20Sopenharmony_ci	if (!image_name)
61068c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "unable to get image name");
61078c2ecf20Sopenharmony_ci
61088c2ecf20Sopenharmony_ci	/* Fetch the snapshot name */
61098c2ecf20Sopenharmony_ci
61108c2ecf20Sopenharmony_ci	snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
61118c2ecf20Sopenharmony_ci	if (IS_ERR(snap_name)) {
61128c2ecf20Sopenharmony_ci		ret = PTR_ERR(snap_name);
61138c2ecf20Sopenharmony_ci		goto out_err;
61148c2ecf20Sopenharmony_ci	}
61158c2ecf20Sopenharmony_ci
61168c2ecf20Sopenharmony_ci	spec->pool_name = pool_name;
61178c2ecf20Sopenharmony_ci	spec->image_name = image_name;
61188c2ecf20Sopenharmony_ci	spec->snap_name = snap_name;
61198c2ecf20Sopenharmony_ci
61208c2ecf20Sopenharmony_ci	return 0;
61218c2ecf20Sopenharmony_ci
61228c2ecf20Sopenharmony_ciout_err:
61238c2ecf20Sopenharmony_ci	kfree(image_name);
61248c2ecf20Sopenharmony_ci	kfree(pool_name);
61258c2ecf20Sopenharmony_ci	return ret;
61268c2ecf20Sopenharmony_ci}
61278c2ecf20Sopenharmony_ci
61288c2ecf20Sopenharmony_cistatic int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev,
61298c2ecf20Sopenharmony_ci				   struct ceph_snap_context **psnapc)
61308c2ecf20Sopenharmony_ci{
61318c2ecf20Sopenharmony_ci	size_t size;
61328c2ecf20Sopenharmony_ci	int ret;
61338c2ecf20Sopenharmony_ci	void *reply_buf;
61348c2ecf20Sopenharmony_ci	void *p;
61358c2ecf20Sopenharmony_ci	void *end;
61368c2ecf20Sopenharmony_ci	u64 seq;
61378c2ecf20Sopenharmony_ci	u32 snap_count;
61388c2ecf20Sopenharmony_ci	struct ceph_snap_context *snapc;
61398c2ecf20Sopenharmony_ci	u32 i;
61408c2ecf20Sopenharmony_ci
61418c2ecf20Sopenharmony_ci	/*
61428c2ecf20Sopenharmony_ci	 * We'll need room for the seq value (maximum snapshot id),
61438c2ecf20Sopenharmony_ci	 * snapshot count, and array of that many snapshot ids.
61448c2ecf20Sopenharmony_ci	 * For now we have a fixed upper limit on the number we're
61458c2ecf20Sopenharmony_ci	 * prepared to receive.
61468c2ecf20Sopenharmony_ci	 */
61478c2ecf20Sopenharmony_ci	size = sizeof (__le64) + sizeof (__le32) +
61488c2ecf20Sopenharmony_ci			RBD_MAX_SNAP_COUNT * sizeof (__le64);
61498c2ecf20Sopenharmony_ci	reply_buf = kzalloc(size, GFP_KERNEL);
61508c2ecf20Sopenharmony_ci	if (!reply_buf)
61518c2ecf20Sopenharmony_ci		return -ENOMEM;
61528c2ecf20Sopenharmony_ci
61538c2ecf20Sopenharmony_ci	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
61548c2ecf20Sopenharmony_ci				  &rbd_dev->header_oloc, "get_snapcontext",
61558c2ecf20Sopenharmony_ci				  NULL, 0, reply_buf, size);
61568c2ecf20Sopenharmony_ci	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
61578c2ecf20Sopenharmony_ci	if (ret < 0)
61588c2ecf20Sopenharmony_ci		goto out;
61598c2ecf20Sopenharmony_ci
61608c2ecf20Sopenharmony_ci	p = reply_buf;
61618c2ecf20Sopenharmony_ci	end = reply_buf + ret;
61628c2ecf20Sopenharmony_ci	ret = -ERANGE;
61638c2ecf20Sopenharmony_ci	ceph_decode_64_safe(&p, end, seq, out);
61648c2ecf20Sopenharmony_ci	ceph_decode_32_safe(&p, end, snap_count, out);
61658c2ecf20Sopenharmony_ci
61668c2ecf20Sopenharmony_ci	/*
61678c2ecf20Sopenharmony_ci	 * Make sure the reported number of snapshot ids wouldn't go
61688c2ecf20Sopenharmony_ci	 * beyond the end of our buffer.  But before checking that,
61698c2ecf20Sopenharmony_ci	 * make sure the computed size of the snapshot context we
61708c2ecf20Sopenharmony_ci	 * allocate is representable in a size_t.
61718c2ecf20Sopenharmony_ci	 */
61728c2ecf20Sopenharmony_ci	if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context))
61738c2ecf20Sopenharmony_ci				 / sizeof (u64)) {
61748c2ecf20Sopenharmony_ci		ret = -EINVAL;
61758c2ecf20Sopenharmony_ci		goto out;
61768c2ecf20Sopenharmony_ci	}
61778c2ecf20Sopenharmony_ci	if (!ceph_has_room(&p, end, snap_count * sizeof (__le64)))
61788c2ecf20Sopenharmony_ci		goto out;
61798c2ecf20Sopenharmony_ci	ret = 0;
61808c2ecf20Sopenharmony_ci
61818c2ecf20Sopenharmony_ci	snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
61828c2ecf20Sopenharmony_ci	if (!snapc) {
61838c2ecf20Sopenharmony_ci		ret = -ENOMEM;
61848c2ecf20Sopenharmony_ci		goto out;
61858c2ecf20Sopenharmony_ci	}
61868c2ecf20Sopenharmony_ci	snapc->seq = seq;
61878c2ecf20Sopenharmony_ci	for (i = 0; i < snap_count; i++)
61888c2ecf20Sopenharmony_ci		snapc->snaps[i] = ceph_decode_64(&p);
61898c2ecf20Sopenharmony_ci
61908c2ecf20Sopenharmony_ci	*psnapc = snapc;
61918c2ecf20Sopenharmony_ci	dout("  snap context seq = %llu, snap_count = %u\n",
61928c2ecf20Sopenharmony_ci		(unsigned long long)seq, (unsigned int)snap_count);
61938c2ecf20Sopenharmony_ciout:
61948c2ecf20Sopenharmony_ci	kfree(reply_buf);
61958c2ecf20Sopenharmony_ci
61968c2ecf20Sopenharmony_ci	return ret;
61978c2ecf20Sopenharmony_ci}
61988c2ecf20Sopenharmony_ci
61998c2ecf20Sopenharmony_cistatic const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
62008c2ecf20Sopenharmony_ci					u64 snap_id)
62018c2ecf20Sopenharmony_ci{
62028c2ecf20Sopenharmony_ci	size_t size;
62038c2ecf20Sopenharmony_ci	void *reply_buf;
62048c2ecf20Sopenharmony_ci	__le64 snapid;
62058c2ecf20Sopenharmony_ci	int ret;
62068c2ecf20Sopenharmony_ci	void *p;
62078c2ecf20Sopenharmony_ci	void *end;
62088c2ecf20Sopenharmony_ci	char *snap_name;
62098c2ecf20Sopenharmony_ci
62108c2ecf20Sopenharmony_ci	size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN;
62118c2ecf20Sopenharmony_ci	reply_buf = kmalloc(size, GFP_KERNEL);
62128c2ecf20Sopenharmony_ci	if (!reply_buf)
62138c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
62148c2ecf20Sopenharmony_ci
62158c2ecf20Sopenharmony_ci	snapid = cpu_to_le64(snap_id);
62168c2ecf20Sopenharmony_ci	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
62178c2ecf20Sopenharmony_ci				  &rbd_dev->header_oloc, "get_snapshot_name",
62188c2ecf20Sopenharmony_ci				  &snapid, sizeof(snapid), reply_buf, size);
62198c2ecf20Sopenharmony_ci	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
62208c2ecf20Sopenharmony_ci	if (ret < 0) {
62218c2ecf20Sopenharmony_ci		snap_name = ERR_PTR(ret);
62228c2ecf20Sopenharmony_ci		goto out;
62238c2ecf20Sopenharmony_ci	}
62248c2ecf20Sopenharmony_ci
62258c2ecf20Sopenharmony_ci	p = reply_buf;
62268c2ecf20Sopenharmony_ci	end = reply_buf + ret;
62278c2ecf20Sopenharmony_ci	snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
62288c2ecf20Sopenharmony_ci	if (IS_ERR(snap_name))
62298c2ecf20Sopenharmony_ci		goto out;
62308c2ecf20Sopenharmony_ci
62318c2ecf20Sopenharmony_ci	dout("  snap_id 0x%016llx snap_name = %s\n",
62328c2ecf20Sopenharmony_ci		(unsigned long long)snap_id, snap_name);
62338c2ecf20Sopenharmony_ciout:
62348c2ecf20Sopenharmony_ci	kfree(reply_buf);
62358c2ecf20Sopenharmony_ci
62368c2ecf20Sopenharmony_ci	return snap_name;
62378c2ecf20Sopenharmony_ci}
62388c2ecf20Sopenharmony_ci
62398c2ecf20Sopenharmony_cistatic int rbd_dev_v2_header_info(struct rbd_device *rbd_dev,
62408c2ecf20Sopenharmony_ci				  struct rbd_image_header *header,
62418c2ecf20Sopenharmony_ci				  bool first_time)
62428c2ecf20Sopenharmony_ci{
62438c2ecf20Sopenharmony_ci	int ret;
62448c2ecf20Sopenharmony_ci
62458c2ecf20Sopenharmony_ci	ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
62468c2ecf20Sopenharmony_ci				    first_time ? &header->obj_order : NULL,
62478c2ecf20Sopenharmony_ci				    &header->image_size);
62488c2ecf20Sopenharmony_ci	if (ret)
62498c2ecf20Sopenharmony_ci		return ret;
62508c2ecf20Sopenharmony_ci
62518c2ecf20Sopenharmony_ci	if (first_time) {
62528c2ecf20Sopenharmony_ci		ret = rbd_dev_v2_header_onetime(rbd_dev, header);
62538c2ecf20Sopenharmony_ci		if (ret)
62548c2ecf20Sopenharmony_ci			return ret;
62558c2ecf20Sopenharmony_ci	}
62568c2ecf20Sopenharmony_ci
62578c2ecf20Sopenharmony_ci	ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc);
62588c2ecf20Sopenharmony_ci	if (ret)
62598c2ecf20Sopenharmony_ci		return ret;
62608c2ecf20Sopenharmony_ci
62618c2ecf20Sopenharmony_ci	return 0;
62628c2ecf20Sopenharmony_ci}
62638c2ecf20Sopenharmony_ci
62648c2ecf20Sopenharmony_cistatic int rbd_dev_header_info(struct rbd_device *rbd_dev,
62658c2ecf20Sopenharmony_ci			       struct rbd_image_header *header,
62668c2ecf20Sopenharmony_ci			       bool first_time)
62678c2ecf20Sopenharmony_ci{
62688c2ecf20Sopenharmony_ci	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
62698c2ecf20Sopenharmony_ci	rbd_assert(!header->object_prefix && !header->snapc);
62708c2ecf20Sopenharmony_ci
62718c2ecf20Sopenharmony_ci	if (rbd_dev->image_format == 1)
62728c2ecf20Sopenharmony_ci		return rbd_dev_v1_header_info(rbd_dev, header, first_time);
62738c2ecf20Sopenharmony_ci
62748c2ecf20Sopenharmony_ci	return rbd_dev_v2_header_info(rbd_dev, header, first_time);
62758c2ecf20Sopenharmony_ci}
62768c2ecf20Sopenharmony_ci
62778c2ecf20Sopenharmony_ci/*
62788c2ecf20Sopenharmony_ci * Skips over white space at *buf, and updates *buf to point to the
62798c2ecf20Sopenharmony_ci * first found non-space character (if any). Returns the length of
62808c2ecf20Sopenharmony_ci * the token (string of non-white space characters) found.  Note
62818c2ecf20Sopenharmony_ci * that *buf must be terminated with '\0'.
62828c2ecf20Sopenharmony_ci */
62838c2ecf20Sopenharmony_cistatic inline size_t next_token(const char **buf)
62848c2ecf20Sopenharmony_ci{
62858c2ecf20Sopenharmony_ci        /*
62868c2ecf20Sopenharmony_ci        * These are the characters that produce nonzero for
62878c2ecf20Sopenharmony_ci        * isspace() in the "C" and "POSIX" locales.
62888c2ecf20Sopenharmony_ci        */
62898c2ecf20Sopenharmony_ci        const char *spaces = " \f\n\r\t\v";
62908c2ecf20Sopenharmony_ci
62918c2ecf20Sopenharmony_ci        *buf += strspn(*buf, spaces);	/* Find start of token */
62928c2ecf20Sopenharmony_ci
62938c2ecf20Sopenharmony_ci	return strcspn(*buf, spaces);   /* Return token length */
62948c2ecf20Sopenharmony_ci}
62958c2ecf20Sopenharmony_ci
62968c2ecf20Sopenharmony_ci/*
62978c2ecf20Sopenharmony_ci * Finds the next token in *buf, dynamically allocates a buffer big
62988c2ecf20Sopenharmony_ci * enough to hold a copy of it, and copies the token into the new
62998c2ecf20Sopenharmony_ci * buffer.  The copy is guaranteed to be terminated with '\0'.  Note
63008c2ecf20Sopenharmony_ci * that a duplicate buffer is created even for a zero-length token.
63018c2ecf20Sopenharmony_ci *
63028c2ecf20Sopenharmony_ci * Returns a pointer to the newly-allocated duplicate, or a null
63038c2ecf20Sopenharmony_ci * pointer if memory for the duplicate was not available.  If
63048c2ecf20Sopenharmony_ci * the lenp argument is a non-null pointer, the length of the token
63058c2ecf20Sopenharmony_ci * (not including the '\0') is returned in *lenp.
63068c2ecf20Sopenharmony_ci *
63078c2ecf20Sopenharmony_ci * If successful, the *buf pointer will be updated to point beyond
63088c2ecf20Sopenharmony_ci * the end of the found token.
63098c2ecf20Sopenharmony_ci *
63108c2ecf20Sopenharmony_ci * Note: uses GFP_KERNEL for allocation.
63118c2ecf20Sopenharmony_ci */
63128c2ecf20Sopenharmony_cistatic inline char *dup_token(const char **buf, size_t *lenp)
63138c2ecf20Sopenharmony_ci{
63148c2ecf20Sopenharmony_ci	char *dup;
63158c2ecf20Sopenharmony_ci	size_t len;
63168c2ecf20Sopenharmony_ci
63178c2ecf20Sopenharmony_ci	len = next_token(buf);
63188c2ecf20Sopenharmony_ci	dup = kmemdup(*buf, len + 1, GFP_KERNEL);
63198c2ecf20Sopenharmony_ci	if (!dup)
63208c2ecf20Sopenharmony_ci		return NULL;
63218c2ecf20Sopenharmony_ci	*(dup + len) = '\0';
63228c2ecf20Sopenharmony_ci	*buf += len;
63238c2ecf20Sopenharmony_ci
63248c2ecf20Sopenharmony_ci	if (lenp)
63258c2ecf20Sopenharmony_ci		*lenp = len;
63268c2ecf20Sopenharmony_ci
63278c2ecf20Sopenharmony_ci	return dup;
63288c2ecf20Sopenharmony_ci}
63298c2ecf20Sopenharmony_ci
63308c2ecf20Sopenharmony_cistatic int rbd_parse_param(struct fs_parameter *param,
63318c2ecf20Sopenharmony_ci			    struct rbd_parse_opts_ctx *pctx)
63328c2ecf20Sopenharmony_ci{
63338c2ecf20Sopenharmony_ci	struct rbd_options *opt = pctx->opts;
63348c2ecf20Sopenharmony_ci	struct fs_parse_result result;
63358c2ecf20Sopenharmony_ci	struct p_log log = {.prefix = "rbd"};
63368c2ecf20Sopenharmony_ci	int token, ret;
63378c2ecf20Sopenharmony_ci
63388c2ecf20Sopenharmony_ci	ret = ceph_parse_param(param, pctx->copts, NULL);
63398c2ecf20Sopenharmony_ci	if (ret != -ENOPARAM)
63408c2ecf20Sopenharmony_ci		return ret;
63418c2ecf20Sopenharmony_ci
63428c2ecf20Sopenharmony_ci	token = __fs_parse(&log, rbd_parameters, param, &result);
63438c2ecf20Sopenharmony_ci	dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
63448c2ecf20Sopenharmony_ci	if (token < 0) {
63458c2ecf20Sopenharmony_ci		if (token == -ENOPARAM)
63468c2ecf20Sopenharmony_ci			return inval_plog(&log, "Unknown parameter '%s'",
63478c2ecf20Sopenharmony_ci					  param->key);
63488c2ecf20Sopenharmony_ci		return token;
63498c2ecf20Sopenharmony_ci	}
63508c2ecf20Sopenharmony_ci
63518c2ecf20Sopenharmony_ci	switch (token) {
63528c2ecf20Sopenharmony_ci	case Opt_queue_depth:
63538c2ecf20Sopenharmony_ci		if (result.uint_32 < 1)
63548c2ecf20Sopenharmony_ci			goto out_of_range;
63558c2ecf20Sopenharmony_ci		opt->queue_depth = result.uint_32;
63568c2ecf20Sopenharmony_ci		break;
63578c2ecf20Sopenharmony_ci	case Opt_alloc_size:
63588c2ecf20Sopenharmony_ci		if (result.uint_32 < SECTOR_SIZE)
63598c2ecf20Sopenharmony_ci			goto out_of_range;
63608c2ecf20Sopenharmony_ci		if (!is_power_of_2(result.uint_32))
63618c2ecf20Sopenharmony_ci			return inval_plog(&log, "alloc_size must be a power of 2");
63628c2ecf20Sopenharmony_ci		opt->alloc_size = result.uint_32;
63638c2ecf20Sopenharmony_ci		break;
63648c2ecf20Sopenharmony_ci	case Opt_lock_timeout:
63658c2ecf20Sopenharmony_ci		/* 0 is "wait forever" (i.e. infinite timeout) */
63668c2ecf20Sopenharmony_ci		if (result.uint_32 > INT_MAX / 1000)
63678c2ecf20Sopenharmony_ci			goto out_of_range;
63688c2ecf20Sopenharmony_ci		opt->lock_timeout = msecs_to_jiffies(result.uint_32 * 1000);
63698c2ecf20Sopenharmony_ci		break;
63708c2ecf20Sopenharmony_ci	case Opt_pool_ns:
63718c2ecf20Sopenharmony_ci		kfree(pctx->spec->pool_ns);
63728c2ecf20Sopenharmony_ci		pctx->spec->pool_ns = param->string;
63738c2ecf20Sopenharmony_ci		param->string = NULL;
63748c2ecf20Sopenharmony_ci		break;
63758c2ecf20Sopenharmony_ci	case Opt_compression_hint:
63768c2ecf20Sopenharmony_ci		switch (result.uint_32) {
63778c2ecf20Sopenharmony_ci		case Opt_compression_hint_none:
63788c2ecf20Sopenharmony_ci			opt->alloc_hint_flags &=
63798c2ecf20Sopenharmony_ci			    ~(CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE |
63808c2ecf20Sopenharmony_ci			      CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE);
63818c2ecf20Sopenharmony_ci			break;
63828c2ecf20Sopenharmony_ci		case Opt_compression_hint_compressible:
63838c2ecf20Sopenharmony_ci			opt->alloc_hint_flags |=
63848c2ecf20Sopenharmony_ci			    CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE;
63858c2ecf20Sopenharmony_ci			opt->alloc_hint_flags &=
63868c2ecf20Sopenharmony_ci			    ~CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE;
63878c2ecf20Sopenharmony_ci			break;
63888c2ecf20Sopenharmony_ci		case Opt_compression_hint_incompressible:
63898c2ecf20Sopenharmony_ci			opt->alloc_hint_flags |=
63908c2ecf20Sopenharmony_ci			    CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE;
63918c2ecf20Sopenharmony_ci			opt->alloc_hint_flags &=
63928c2ecf20Sopenharmony_ci			    ~CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE;
63938c2ecf20Sopenharmony_ci			break;
63948c2ecf20Sopenharmony_ci		default:
63958c2ecf20Sopenharmony_ci			BUG();
63968c2ecf20Sopenharmony_ci		}
63978c2ecf20Sopenharmony_ci		break;
63988c2ecf20Sopenharmony_ci	case Opt_read_only:
63998c2ecf20Sopenharmony_ci		opt->read_only = true;
64008c2ecf20Sopenharmony_ci		break;
64018c2ecf20Sopenharmony_ci	case Opt_read_write:
64028c2ecf20Sopenharmony_ci		opt->read_only = false;
64038c2ecf20Sopenharmony_ci		break;
64048c2ecf20Sopenharmony_ci	case Opt_lock_on_read:
64058c2ecf20Sopenharmony_ci		opt->lock_on_read = true;
64068c2ecf20Sopenharmony_ci		break;
64078c2ecf20Sopenharmony_ci	case Opt_exclusive:
64088c2ecf20Sopenharmony_ci		opt->exclusive = true;
64098c2ecf20Sopenharmony_ci		break;
64108c2ecf20Sopenharmony_ci	case Opt_notrim:
64118c2ecf20Sopenharmony_ci		opt->trim = false;
64128c2ecf20Sopenharmony_ci		break;
64138c2ecf20Sopenharmony_ci	default:
64148c2ecf20Sopenharmony_ci		BUG();
64158c2ecf20Sopenharmony_ci	}
64168c2ecf20Sopenharmony_ci
64178c2ecf20Sopenharmony_ci	return 0;
64188c2ecf20Sopenharmony_ci
64198c2ecf20Sopenharmony_ciout_of_range:
64208c2ecf20Sopenharmony_ci	return inval_plog(&log, "%s out of range", param->key);
64218c2ecf20Sopenharmony_ci}
64228c2ecf20Sopenharmony_ci
64238c2ecf20Sopenharmony_ci/*
64248c2ecf20Sopenharmony_ci * This duplicates most of generic_parse_monolithic(), untying it from
64258c2ecf20Sopenharmony_ci * fs_context and skipping standard superblock and security options.
64268c2ecf20Sopenharmony_ci */
64278c2ecf20Sopenharmony_cistatic int rbd_parse_options(char *options, struct rbd_parse_opts_ctx *pctx)
64288c2ecf20Sopenharmony_ci{
64298c2ecf20Sopenharmony_ci	char *key;
64308c2ecf20Sopenharmony_ci	int ret = 0;
64318c2ecf20Sopenharmony_ci
64328c2ecf20Sopenharmony_ci	dout("%s '%s'\n", __func__, options);
64338c2ecf20Sopenharmony_ci	while ((key = strsep(&options, ",")) != NULL) {
64348c2ecf20Sopenharmony_ci		if (*key) {
64358c2ecf20Sopenharmony_ci			struct fs_parameter param = {
64368c2ecf20Sopenharmony_ci				.key	= key,
64378c2ecf20Sopenharmony_ci				.type	= fs_value_is_flag,
64388c2ecf20Sopenharmony_ci			};
64398c2ecf20Sopenharmony_ci			char *value = strchr(key, '=');
64408c2ecf20Sopenharmony_ci			size_t v_len = 0;
64418c2ecf20Sopenharmony_ci
64428c2ecf20Sopenharmony_ci			if (value) {
64438c2ecf20Sopenharmony_ci				if (value == key)
64448c2ecf20Sopenharmony_ci					continue;
64458c2ecf20Sopenharmony_ci				*value++ = 0;
64468c2ecf20Sopenharmony_ci				v_len = strlen(value);
64478c2ecf20Sopenharmony_ci				param.string = kmemdup_nul(value, v_len,
64488c2ecf20Sopenharmony_ci							   GFP_KERNEL);
64498c2ecf20Sopenharmony_ci				if (!param.string)
64508c2ecf20Sopenharmony_ci					return -ENOMEM;
64518c2ecf20Sopenharmony_ci				param.type = fs_value_is_string;
64528c2ecf20Sopenharmony_ci			}
64538c2ecf20Sopenharmony_ci			param.size = v_len;
64548c2ecf20Sopenharmony_ci
64558c2ecf20Sopenharmony_ci			ret = rbd_parse_param(&param, pctx);
64568c2ecf20Sopenharmony_ci			kfree(param.string);
64578c2ecf20Sopenharmony_ci			if (ret)
64588c2ecf20Sopenharmony_ci				break;
64598c2ecf20Sopenharmony_ci		}
64608c2ecf20Sopenharmony_ci	}
64618c2ecf20Sopenharmony_ci
64628c2ecf20Sopenharmony_ci	return ret;
64638c2ecf20Sopenharmony_ci}
64648c2ecf20Sopenharmony_ci
64658c2ecf20Sopenharmony_ci/*
64668c2ecf20Sopenharmony_ci * Parse the options provided for an "rbd add" (i.e., rbd image
64678c2ecf20Sopenharmony_ci * mapping) request.  These arrive via a write to /sys/bus/rbd/add,
64688c2ecf20Sopenharmony_ci * and the data written is passed here via a NUL-terminated buffer.
64698c2ecf20Sopenharmony_ci * Returns 0 if successful or an error code otherwise.
64708c2ecf20Sopenharmony_ci *
64718c2ecf20Sopenharmony_ci * The information extracted from these options is recorded in
64728c2ecf20Sopenharmony_ci * the other parameters which return dynamically-allocated
64738c2ecf20Sopenharmony_ci * structures:
64748c2ecf20Sopenharmony_ci *  ceph_opts
64758c2ecf20Sopenharmony_ci *      The address of a pointer that will refer to a ceph options
64768c2ecf20Sopenharmony_ci *      structure.  Caller must release the returned pointer using
64778c2ecf20Sopenharmony_ci *      ceph_destroy_options() when it is no longer needed.
64788c2ecf20Sopenharmony_ci *  rbd_opts
64798c2ecf20Sopenharmony_ci *	Address of an rbd options pointer.  Fully initialized by
64808c2ecf20Sopenharmony_ci *	this function; caller must release with kfree().
64818c2ecf20Sopenharmony_ci *  spec
64828c2ecf20Sopenharmony_ci *	Address of an rbd image specification pointer.  Fully
64838c2ecf20Sopenharmony_ci *	initialized by this function based on parsed options.
64848c2ecf20Sopenharmony_ci *	Caller must release with rbd_spec_put().
64858c2ecf20Sopenharmony_ci *
64868c2ecf20Sopenharmony_ci * The options passed take this form:
64878c2ecf20Sopenharmony_ci *  <mon_addrs> <options> <pool_name> <image_name> [<snap_id>]
64888c2ecf20Sopenharmony_ci * where:
64898c2ecf20Sopenharmony_ci *  <mon_addrs>
64908c2ecf20Sopenharmony_ci *      A comma-separated list of one or more monitor addresses.
64918c2ecf20Sopenharmony_ci *      A monitor address is an ip address, optionally followed
64928c2ecf20Sopenharmony_ci *      by a port number (separated by a colon).
64938c2ecf20Sopenharmony_ci *        I.e.:  ip1[:port1][,ip2[:port2]...]
64948c2ecf20Sopenharmony_ci *  <options>
64958c2ecf20Sopenharmony_ci *      A comma-separated list of ceph and/or rbd options.
64968c2ecf20Sopenharmony_ci *  <pool_name>
64978c2ecf20Sopenharmony_ci *      The name of the rados pool containing the rbd image.
64988c2ecf20Sopenharmony_ci *  <image_name>
64998c2ecf20Sopenharmony_ci *      The name of the image in that pool to map.
65008c2ecf20Sopenharmony_ci *  <snap_id>
65018c2ecf20Sopenharmony_ci *      An optional snapshot id.  If provided, the mapping will
65028c2ecf20Sopenharmony_ci *      present data from the image at the time that snapshot was
65038c2ecf20Sopenharmony_ci *      created.  The image head is used if no snapshot id is
65048c2ecf20Sopenharmony_ci *      provided.  Snapshot mappings are always read-only.
65058c2ecf20Sopenharmony_ci */
65068c2ecf20Sopenharmony_cistatic int rbd_add_parse_args(const char *buf,
65078c2ecf20Sopenharmony_ci				struct ceph_options **ceph_opts,
65088c2ecf20Sopenharmony_ci				struct rbd_options **opts,
65098c2ecf20Sopenharmony_ci				struct rbd_spec **rbd_spec)
65108c2ecf20Sopenharmony_ci{
65118c2ecf20Sopenharmony_ci	size_t len;
65128c2ecf20Sopenharmony_ci	char *options;
65138c2ecf20Sopenharmony_ci	const char *mon_addrs;
65148c2ecf20Sopenharmony_ci	char *snap_name;
65158c2ecf20Sopenharmony_ci	size_t mon_addrs_size;
65168c2ecf20Sopenharmony_ci	struct rbd_parse_opts_ctx pctx = { 0 };
65178c2ecf20Sopenharmony_ci	int ret;
65188c2ecf20Sopenharmony_ci
65198c2ecf20Sopenharmony_ci	/* The first four tokens are required */
65208c2ecf20Sopenharmony_ci
65218c2ecf20Sopenharmony_ci	len = next_token(&buf);
65228c2ecf20Sopenharmony_ci	if (!len) {
65238c2ecf20Sopenharmony_ci		rbd_warn(NULL, "no monitor address(es) provided");
65248c2ecf20Sopenharmony_ci		return -EINVAL;
65258c2ecf20Sopenharmony_ci	}
65268c2ecf20Sopenharmony_ci	mon_addrs = buf;
65278c2ecf20Sopenharmony_ci	mon_addrs_size = len;
65288c2ecf20Sopenharmony_ci	buf += len;
65298c2ecf20Sopenharmony_ci
65308c2ecf20Sopenharmony_ci	ret = -EINVAL;
65318c2ecf20Sopenharmony_ci	options = dup_token(&buf, NULL);
65328c2ecf20Sopenharmony_ci	if (!options)
65338c2ecf20Sopenharmony_ci		return -ENOMEM;
65348c2ecf20Sopenharmony_ci	if (!*options) {
65358c2ecf20Sopenharmony_ci		rbd_warn(NULL, "no options provided");
65368c2ecf20Sopenharmony_ci		goto out_err;
65378c2ecf20Sopenharmony_ci	}
65388c2ecf20Sopenharmony_ci
65398c2ecf20Sopenharmony_ci	pctx.spec = rbd_spec_alloc();
65408c2ecf20Sopenharmony_ci	if (!pctx.spec)
65418c2ecf20Sopenharmony_ci		goto out_mem;
65428c2ecf20Sopenharmony_ci
65438c2ecf20Sopenharmony_ci	pctx.spec->pool_name = dup_token(&buf, NULL);
65448c2ecf20Sopenharmony_ci	if (!pctx.spec->pool_name)
65458c2ecf20Sopenharmony_ci		goto out_mem;
65468c2ecf20Sopenharmony_ci	if (!*pctx.spec->pool_name) {
65478c2ecf20Sopenharmony_ci		rbd_warn(NULL, "no pool name provided");
65488c2ecf20Sopenharmony_ci		goto out_err;
65498c2ecf20Sopenharmony_ci	}
65508c2ecf20Sopenharmony_ci
65518c2ecf20Sopenharmony_ci	pctx.spec->image_name = dup_token(&buf, NULL);
65528c2ecf20Sopenharmony_ci	if (!pctx.spec->image_name)
65538c2ecf20Sopenharmony_ci		goto out_mem;
65548c2ecf20Sopenharmony_ci	if (!*pctx.spec->image_name) {
65558c2ecf20Sopenharmony_ci		rbd_warn(NULL, "no image name provided");
65568c2ecf20Sopenharmony_ci		goto out_err;
65578c2ecf20Sopenharmony_ci	}
65588c2ecf20Sopenharmony_ci
65598c2ecf20Sopenharmony_ci	/*
65608c2ecf20Sopenharmony_ci	 * Snapshot name is optional; default is to use "-"
65618c2ecf20Sopenharmony_ci	 * (indicating the head/no snapshot).
65628c2ecf20Sopenharmony_ci	 */
65638c2ecf20Sopenharmony_ci	len = next_token(&buf);
65648c2ecf20Sopenharmony_ci	if (!len) {
65658c2ecf20Sopenharmony_ci		buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */
65668c2ecf20Sopenharmony_ci		len = sizeof (RBD_SNAP_HEAD_NAME) - 1;
65678c2ecf20Sopenharmony_ci	} else if (len > RBD_MAX_SNAP_NAME_LEN) {
65688c2ecf20Sopenharmony_ci		ret = -ENAMETOOLONG;
65698c2ecf20Sopenharmony_ci		goto out_err;
65708c2ecf20Sopenharmony_ci	}
65718c2ecf20Sopenharmony_ci	snap_name = kmemdup(buf, len + 1, GFP_KERNEL);
65728c2ecf20Sopenharmony_ci	if (!snap_name)
65738c2ecf20Sopenharmony_ci		goto out_mem;
65748c2ecf20Sopenharmony_ci	*(snap_name + len) = '\0';
65758c2ecf20Sopenharmony_ci	pctx.spec->snap_name = snap_name;
65768c2ecf20Sopenharmony_ci
65778c2ecf20Sopenharmony_ci	pctx.copts = ceph_alloc_options();
65788c2ecf20Sopenharmony_ci	if (!pctx.copts)
65798c2ecf20Sopenharmony_ci		goto out_mem;
65808c2ecf20Sopenharmony_ci
65818c2ecf20Sopenharmony_ci	/* Initialize all rbd options to the defaults */
65828c2ecf20Sopenharmony_ci
65838c2ecf20Sopenharmony_ci	pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL);
65848c2ecf20Sopenharmony_ci	if (!pctx.opts)
65858c2ecf20Sopenharmony_ci		goto out_mem;
65868c2ecf20Sopenharmony_ci
65878c2ecf20Sopenharmony_ci	pctx.opts->read_only = RBD_READ_ONLY_DEFAULT;
65888c2ecf20Sopenharmony_ci	pctx.opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
65898c2ecf20Sopenharmony_ci	pctx.opts->alloc_size = RBD_ALLOC_SIZE_DEFAULT;
65908c2ecf20Sopenharmony_ci	pctx.opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT;
65918c2ecf20Sopenharmony_ci	pctx.opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT;
65928c2ecf20Sopenharmony_ci	pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT;
65938c2ecf20Sopenharmony_ci	pctx.opts->trim = RBD_TRIM_DEFAULT;
65948c2ecf20Sopenharmony_ci
65958c2ecf20Sopenharmony_ci	ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL);
65968c2ecf20Sopenharmony_ci	if (ret)
65978c2ecf20Sopenharmony_ci		goto out_err;
65988c2ecf20Sopenharmony_ci
65998c2ecf20Sopenharmony_ci	ret = rbd_parse_options(options, &pctx);
66008c2ecf20Sopenharmony_ci	if (ret)
66018c2ecf20Sopenharmony_ci		goto out_err;
66028c2ecf20Sopenharmony_ci
66038c2ecf20Sopenharmony_ci	*ceph_opts = pctx.copts;
66048c2ecf20Sopenharmony_ci	*opts = pctx.opts;
66058c2ecf20Sopenharmony_ci	*rbd_spec = pctx.spec;
66068c2ecf20Sopenharmony_ci	kfree(options);
66078c2ecf20Sopenharmony_ci	return 0;
66088c2ecf20Sopenharmony_ci
66098c2ecf20Sopenharmony_ciout_mem:
66108c2ecf20Sopenharmony_ci	ret = -ENOMEM;
66118c2ecf20Sopenharmony_ciout_err:
66128c2ecf20Sopenharmony_ci	kfree(pctx.opts);
66138c2ecf20Sopenharmony_ci	ceph_destroy_options(pctx.copts);
66148c2ecf20Sopenharmony_ci	rbd_spec_put(pctx.spec);
66158c2ecf20Sopenharmony_ci	kfree(options);
66168c2ecf20Sopenharmony_ci	return ret;
66178c2ecf20Sopenharmony_ci}
66188c2ecf20Sopenharmony_ci
66198c2ecf20Sopenharmony_cistatic void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
66208c2ecf20Sopenharmony_ci{
66218c2ecf20Sopenharmony_ci	down_write(&rbd_dev->lock_rwsem);
66228c2ecf20Sopenharmony_ci	if (__rbd_is_lock_owner(rbd_dev))
66238c2ecf20Sopenharmony_ci		__rbd_release_lock(rbd_dev);
66248c2ecf20Sopenharmony_ci	up_write(&rbd_dev->lock_rwsem);
66258c2ecf20Sopenharmony_ci}
66268c2ecf20Sopenharmony_ci
66278c2ecf20Sopenharmony_ci/*
66288c2ecf20Sopenharmony_ci * If the wait is interrupted, an error is returned even if the lock
66298c2ecf20Sopenharmony_ci * was successfully acquired.  rbd_dev_image_unlock() will release it
66308c2ecf20Sopenharmony_ci * if needed.
66318c2ecf20Sopenharmony_ci */
66328c2ecf20Sopenharmony_cistatic int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
66338c2ecf20Sopenharmony_ci{
66348c2ecf20Sopenharmony_ci	long ret;
66358c2ecf20Sopenharmony_ci
66368c2ecf20Sopenharmony_ci	if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) {
66378c2ecf20Sopenharmony_ci		if (!rbd_dev->opts->exclusive && !rbd_dev->opts->lock_on_read)
66388c2ecf20Sopenharmony_ci			return 0;
66398c2ecf20Sopenharmony_ci
66408c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "exclusive-lock feature is not enabled");
66418c2ecf20Sopenharmony_ci		return -EINVAL;
66428c2ecf20Sopenharmony_ci	}
66438c2ecf20Sopenharmony_ci
66448c2ecf20Sopenharmony_ci	if (rbd_is_ro(rbd_dev))
66458c2ecf20Sopenharmony_ci		return 0;
66468c2ecf20Sopenharmony_ci
66478c2ecf20Sopenharmony_ci	rbd_assert(!rbd_is_lock_owner(rbd_dev));
66488c2ecf20Sopenharmony_ci	queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
66498c2ecf20Sopenharmony_ci	ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait,
66508c2ecf20Sopenharmony_ci			    ceph_timeout_jiffies(rbd_dev->opts->lock_timeout));
66518c2ecf20Sopenharmony_ci	if (ret > 0) {
66528c2ecf20Sopenharmony_ci		ret = rbd_dev->acquire_err;
66538c2ecf20Sopenharmony_ci	} else {
66548c2ecf20Sopenharmony_ci		cancel_delayed_work_sync(&rbd_dev->lock_dwork);
66558c2ecf20Sopenharmony_ci		if (!ret)
66568c2ecf20Sopenharmony_ci			ret = -ETIMEDOUT;
66578c2ecf20Sopenharmony_ci
66588c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret);
66598c2ecf20Sopenharmony_ci	}
66608c2ecf20Sopenharmony_ci	if (ret)
66618c2ecf20Sopenharmony_ci		return ret;
66628c2ecf20Sopenharmony_ci
66638c2ecf20Sopenharmony_ci	/*
66648c2ecf20Sopenharmony_ci	 * The lock may have been released by now, unless automatic lock
66658c2ecf20Sopenharmony_ci	 * transitions are disabled.
66668c2ecf20Sopenharmony_ci	 */
66678c2ecf20Sopenharmony_ci	rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev));
66688c2ecf20Sopenharmony_ci	return 0;
66698c2ecf20Sopenharmony_ci}
66708c2ecf20Sopenharmony_ci
66718c2ecf20Sopenharmony_ci/*
66728c2ecf20Sopenharmony_ci * An rbd format 2 image has a unique identifier, distinct from the
66738c2ecf20Sopenharmony_ci * name given to it by the user.  Internally, that identifier is
66748c2ecf20Sopenharmony_ci * what's used to specify the names of objects related to the image.
66758c2ecf20Sopenharmony_ci *
66768c2ecf20Sopenharmony_ci * A special "rbd id" object is used to map an rbd image name to its
66778c2ecf20Sopenharmony_ci * id.  If that object doesn't exist, then there is no v2 rbd image
66788c2ecf20Sopenharmony_ci * with the supplied name.
66798c2ecf20Sopenharmony_ci *
66808c2ecf20Sopenharmony_ci * This function will record the given rbd_dev's image_id field if
66818c2ecf20Sopenharmony_ci * it can be determined, and in that case will return 0.  If any
66828c2ecf20Sopenharmony_ci * errors occur a negative errno will be returned and the rbd_dev's
66838c2ecf20Sopenharmony_ci * image_id field will be unchanged (and should be NULL).
66848c2ecf20Sopenharmony_ci */
66858c2ecf20Sopenharmony_cistatic int rbd_dev_image_id(struct rbd_device *rbd_dev)
66868c2ecf20Sopenharmony_ci{
66878c2ecf20Sopenharmony_ci	int ret;
66888c2ecf20Sopenharmony_ci	size_t size;
66898c2ecf20Sopenharmony_ci	CEPH_DEFINE_OID_ONSTACK(oid);
66908c2ecf20Sopenharmony_ci	void *response;
66918c2ecf20Sopenharmony_ci	char *image_id;
66928c2ecf20Sopenharmony_ci
66938c2ecf20Sopenharmony_ci	/*
66948c2ecf20Sopenharmony_ci	 * When probing a parent image, the image id is already
66958c2ecf20Sopenharmony_ci	 * known (and the image name likely is not).  There's no
66968c2ecf20Sopenharmony_ci	 * need to fetch the image id again in this case.  We
66978c2ecf20Sopenharmony_ci	 * do still need to set the image format though.
66988c2ecf20Sopenharmony_ci	 */
66998c2ecf20Sopenharmony_ci	if (rbd_dev->spec->image_id) {
67008c2ecf20Sopenharmony_ci		rbd_dev->image_format = *rbd_dev->spec->image_id ? 2 : 1;
67018c2ecf20Sopenharmony_ci
67028c2ecf20Sopenharmony_ci		return 0;
67038c2ecf20Sopenharmony_ci	}
67048c2ecf20Sopenharmony_ci
67058c2ecf20Sopenharmony_ci	/*
67068c2ecf20Sopenharmony_ci	 * First, see if the format 2 image id file exists, and if
67078c2ecf20Sopenharmony_ci	 * so, get the image's persistent id from it.
67088c2ecf20Sopenharmony_ci	 */
67098c2ecf20Sopenharmony_ci	ret = ceph_oid_aprintf(&oid, GFP_KERNEL, "%s%s", RBD_ID_PREFIX,
67108c2ecf20Sopenharmony_ci			       rbd_dev->spec->image_name);
67118c2ecf20Sopenharmony_ci	if (ret)
67128c2ecf20Sopenharmony_ci		return ret;
67138c2ecf20Sopenharmony_ci
67148c2ecf20Sopenharmony_ci	dout("rbd id object name is %s\n", oid.name);
67158c2ecf20Sopenharmony_ci
67168c2ecf20Sopenharmony_ci	/* Response will be an encoded string, which includes a length */
67178c2ecf20Sopenharmony_ci	size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX;
67188c2ecf20Sopenharmony_ci	response = kzalloc(size, GFP_NOIO);
67198c2ecf20Sopenharmony_ci	if (!response) {
67208c2ecf20Sopenharmony_ci		ret = -ENOMEM;
67218c2ecf20Sopenharmony_ci		goto out;
67228c2ecf20Sopenharmony_ci	}
67238c2ecf20Sopenharmony_ci
67248c2ecf20Sopenharmony_ci	/* If it doesn't exist we'll assume it's a format 1 image */
67258c2ecf20Sopenharmony_ci
67268c2ecf20Sopenharmony_ci	ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc,
67278c2ecf20Sopenharmony_ci				  "get_id", NULL, 0,
67288c2ecf20Sopenharmony_ci				  response, size);
67298c2ecf20Sopenharmony_ci	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
67308c2ecf20Sopenharmony_ci	if (ret == -ENOENT) {
67318c2ecf20Sopenharmony_ci		image_id = kstrdup("", GFP_KERNEL);
67328c2ecf20Sopenharmony_ci		ret = image_id ? 0 : -ENOMEM;
67338c2ecf20Sopenharmony_ci		if (!ret)
67348c2ecf20Sopenharmony_ci			rbd_dev->image_format = 1;
67358c2ecf20Sopenharmony_ci	} else if (ret >= 0) {
67368c2ecf20Sopenharmony_ci		void *p = response;
67378c2ecf20Sopenharmony_ci
67388c2ecf20Sopenharmony_ci		image_id = ceph_extract_encoded_string(&p, p + ret,
67398c2ecf20Sopenharmony_ci						NULL, GFP_NOIO);
67408c2ecf20Sopenharmony_ci		ret = PTR_ERR_OR_ZERO(image_id);
67418c2ecf20Sopenharmony_ci		if (!ret)
67428c2ecf20Sopenharmony_ci			rbd_dev->image_format = 2;
67438c2ecf20Sopenharmony_ci	}
67448c2ecf20Sopenharmony_ci
67458c2ecf20Sopenharmony_ci	if (!ret) {
67468c2ecf20Sopenharmony_ci		rbd_dev->spec->image_id = image_id;
67478c2ecf20Sopenharmony_ci		dout("image_id is %s\n", image_id);
67488c2ecf20Sopenharmony_ci	}
67498c2ecf20Sopenharmony_ciout:
67508c2ecf20Sopenharmony_ci	kfree(response);
67518c2ecf20Sopenharmony_ci	ceph_oid_destroy(&oid);
67528c2ecf20Sopenharmony_ci	return ret;
67538c2ecf20Sopenharmony_ci}
67548c2ecf20Sopenharmony_ci
67558c2ecf20Sopenharmony_ci/*
67568c2ecf20Sopenharmony_ci * Undo whatever state changes are made by v1 or v2 header info
67578c2ecf20Sopenharmony_ci * call.
67588c2ecf20Sopenharmony_ci */
67598c2ecf20Sopenharmony_cistatic void rbd_dev_unprobe(struct rbd_device *rbd_dev)
67608c2ecf20Sopenharmony_ci{
67618c2ecf20Sopenharmony_ci	rbd_dev_parent_put(rbd_dev);
67628c2ecf20Sopenharmony_ci	rbd_object_map_free(rbd_dev);
67638c2ecf20Sopenharmony_ci	rbd_dev_mapping_clear(rbd_dev);
67648c2ecf20Sopenharmony_ci
67658c2ecf20Sopenharmony_ci	/* Free dynamic fields from the header, then zero it out */
67668c2ecf20Sopenharmony_ci
67678c2ecf20Sopenharmony_ci	rbd_image_header_cleanup(&rbd_dev->header);
67688c2ecf20Sopenharmony_ci}
67698c2ecf20Sopenharmony_ci
67708c2ecf20Sopenharmony_cistatic int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
67718c2ecf20Sopenharmony_ci				     struct rbd_image_header *header)
67728c2ecf20Sopenharmony_ci{
67738c2ecf20Sopenharmony_ci	int ret;
67748c2ecf20Sopenharmony_ci
67758c2ecf20Sopenharmony_ci	ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix);
67768c2ecf20Sopenharmony_ci	if (ret)
67778c2ecf20Sopenharmony_ci		return ret;
67788c2ecf20Sopenharmony_ci
67798c2ecf20Sopenharmony_ci	/*
67808c2ecf20Sopenharmony_ci	 * Get the and check features for the image.  Currently the
67818c2ecf20Sopenharmony_ci	 * features are assumed to never change.
67828c2ecf20Sopenharmony_ci	 */
67838c2ecf20Sopenharmony_ci	ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
67848c2ecf20Sopenharmony_ci					rbd_is_ro(rbd_dev), &header->features);
67858c2ecf20Sopenharmony_ci	if (ret)
67868c2ecf20Sopenharmony_ci		return ret;
67878c2ecf20Sopenharmony_ci
67888c2ecf20Sopenharmony_ci	/* If the image supports fancy striping, get its parameters */
67898c2ecf20Sopenharmony_ci
67908c2ecf20Sopenharmony_ci	if (header->features & RBD_FEATURE_STRIPINGV2) {
67918c2ecf20Sopenharmony_ci		ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit,
67928c2ecf20Sopenharmony_ci					       &header->stripe_count);
67938c2ecf20Sopenharmony_ci		if (ret)
67948c2ecf20Sopenharmony_ci			return ret;
67958c2ecf20Sopenharmony_ci	}
67968c2ecf20Sopenharmony_ci
67978c2ecf20Sopenharmony_ci	if (header->features & RBD_FEATURE_DATA_POOL) {
67988c2ecf20Sopenharmony_ci		ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id);
67998c2ecf20Sopenharmony_ci		if (ret)
68008c2ecf20Sopenharmony_ci			return ret;
68018c2ecf20Sopenharmony_ci	}
68028c2ecf20Sopenharmony_ci
68038c2ecf20Sopenharmony_ci	return 0;
68048c2ecf20Sopenharmony_ci}
68058c2ecf20Sopenharmony_ci
68068c2ecf20Sopenharmony_ci/*
68078c2ecf20Sopenharmony_ci * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
68088c2ecf20Sopenharmony_ci * rbd_dev_image_probe() recursion depth, which means it's also the
68098c2ecf20Sopenharmony_ci * length of the already discovered part of the parent chain.
68108c2ecf20Sopenharmony_ci */
68118c2ecf20Sopenharmony_cistatic int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
68128c2ecf20Sopenharmony_ci{
68138c2ecf20Sopenharmony_ci	struct rbd_device *parent = NULL;
68148c2ecf20Sopenharmony_ci	int ret;
68158c2ecf20Sopenharmony_ci
68168c2ecf20Sopenharmony_ci	if (!rbd_dev->parent_spec)
68178c2ecf20Sopenharmony_ci		return 0;
68188c2ecf20Sopenharmony_ci
68198c2ecf20Sopenharmony_ci	if (++depth > RBD_MAX_PARENT_CHAIN_LEN) {
68208c2ecf20Sopenharmony_ci		pr_info("parent chain is too long (%d)\n", depth);
68218c2ecf20Sopenharmony_ci		ret = -EINVAL;
68228c2ecf20Sopenharmony_ci		goto out_err;
68238c2ecf20Sopenharmony_ci	}
68248c2ecf20Sopenharmony_ci
68258c2ecf20Sopenharmony_ci	parent = __rbd_dev_create(rbd_dev->parent_spec);
68268c2ecf20Sopenharmony_ci	if (!parent) {
68278c2ecf20Sopenharmony_ci		ret = -ENOMEM;
68288c2ecf20Sopenharmony_ci		goto out_err;
68298c2ecf20Sopenharmony_ci	}
68308c2ecf20Sopenharmony_ci
68318c2ecf20Sopenharmony_ci	/*
68328c2ecf20Sopenharmony_ci	 * Images related by parent/child relationships always share
68338c2ecf20Sopenharmony_ci	 * rbd_client and spec/parent_spec, so bump their refcounts.
68348c2ecf20Sopenharmony_ci	 */
68358c2ecf20Sopenharmony_ci	parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client);
68368c2ecf20Sopenharmony_ci	parent->spec = rbd_spec_get(rbd_dev->parent_spec);
68378c2ecf20Sopenharmony_ci
68388c2ecf20Sopenharmony_ci	__set_bit(RBD_DEV_FLAG_READONLY, &parent->flags);
68398c2ecf20Sopenharmony_ci
68408c2ecf20Sopenharmony_ci	ret = rbd_dev_image_probe(parent, depth);
68418c2ecf20Sopenharmony_ci	if (ret < 0)
68428c2ecf20Sopenharmony_ci		goto out_err;
68438c2ecf20Sopenharmony_ci
68448c2ecf20Sopenharmony_ci	rbd_dev->parent = parent;
68458c2ecf20Sopenharmony_ci	atomic_set(&rbd_dev->parent_ref, 1);
68468c2ecf20Sopenharmony_ci	return 0;
68478c2ecf20Sopenharmony_ci
68488c2ecf20Sopenharmony_ciout_err:
68498c2ecf20Sopenharmony_ci	rbd_dev_unparent(rbd_dev);
68508c2ecf20Sopenharmony_ci	rbd_dev_destroy(parent);
68518c2ecf20Sopenharmony_ci	return ret;
68528c2ecf20Sopenharmony_ci}
68538c2ecf20Sopenharmony_ci
68548c2ecf20Sopenharmony_cistatic void rbd_dev_device_release(struct rbd_device *rbd_dev)
68558c2ecf20Sopenharmony_ci{
68568c2ecf20Sopenharmony_ci	clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
68578c2ecf20Sopenharmony_ci	rbd_free_disk(rbd_dev);
68588c2ecf20Sopenharmony_ci	if (!single_major)
68598c2ecf20Sopenharmony_ci		unregister_blkdev(rbd_dev->major, rbd_dev->name);
68608c2ecf20Sopenharmony_ci}
68618c2ecf20Sopenharmony_ci
68628c2ecf20Sopenharmony_ci/*
68638c2ecf20Sopenharmony_ci * rbd_dev->header_rwsem must be locked for write and will be unlocked
68648c2ecf20Sopenharmony_ci * upon return.
68658c2ecf20Sopenharmony_ci */
68668c2ecf20Sopenharmony_cistatic int rbd_dev_device_setup(struct rbd_device *rbd_dev)
68678c2ecf20Sopenharmony_ci{
68688c2ecf20Sopenharmony_ci	int ret;
68698c2ecf20Sopenharmony_ci
68708c2ecf20Sopenharmony_ci	/* Record our major and minor device numbers. */
68718c2ecf20Sopenharmony_ci
68728c2ecf20Sopenharmony_ci	if (!single_major) {
68738c2ecf20Sopenharmony_ci		ret = register_blkdev(0, rbd_dev->name);
68748c2ecf20Sopenharmony_ci		if (ret < 0)
68758c2ecf20Sopenharmony_ci			goto err_out_unlock;
68768c2ecf20Sopenharmony_ci
68778c2ecf20Sopenharmony_ci		rbd_dev->major = ret;
68788c2ecf20Sopenharmony_ci		rbd_dev->minor = 0;
68798c2ecf20Sopenharmony_ci	} else {
68808c2ecf20Sopenharmony_ci		rbd_dev->major = rbd_major;
68818c2ecf20Sopenharmony_ci		rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id);
68828c2ecf20Sopenharmony_ci	}
68838c2ecf20Sopenharmony_ci
68848c2ecf20Sopenharmony_ci	/* Set up the blkdev mapping. */
68858c2ecf20Sopenharmony_ci
68868c2ecf20Sopenharmony_ci	ret = rbd_init_disk(rbd_dev);
68878c2ecf20Sopenharmony_ci	if (ret)
68888c2ecf20Sopenharmony_ci		goto err_out_blkdev;
68898c2ecf20Sopenharmony_ci
68908c2ecf20Sopenharmony_ci	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
68918c2ecf20Sopenharmony_ci	set_disk_ro(rbd_dev->disk, rbd_is_ro(rbd_dev));
68928c2ecf20Sopenharmony_ci
68938c2ecf20Sopenharmony_ci	ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id);
68948c2ecf20Sopenharmony_ci	if (ret)
68958c2ecf20Sopenharmony_ci		goto err_out_disk;
68968c2ecf20Sopenharmony_ci
68978c2ecf20Sopenharmony_ci	set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
68988c2ecf20Sopenharmony_ci	up_write(&rbd_dev->header_rwsem);
68998c2ecf20Sopenharmony_ci	return 0;
69008c2ecf20Sopenharmony_ci
69018c2ecf20Sopenharmony_cierr_out_disk:
69028c2ecf20Sopenharmony_ci	rbd_free_disk(rbd_dev);
69038c2ecf20Sopenharmony_cierr_out_blkdev:
69048c2ecf20Sopenharmony_ci	if (!single_major)
69058c2ecf20Sopenharmony_ci		unregister_blkdev(rbd_dev->major, rbd_dev->name);
69068c2ecf20Sopenharmony_cierr_out_unlock:
69078c2ecf20Sopenharmony_ci	up_write(&rbd_dev->header_rwsem);
69088c2ecf20Sopenharmony_ci	return ret;
69098c2ecf20Sopenharmony_ci}
69108c2ecf20Sopenharmony_ci
69118c2ecf20Sopenharmony_cistatic int rbd_dev_header_name(struct rbd_device *rbd_dev)
69128c2ecf20Sopenharmony_ci{
69138c2ecf20Sopenharmony_ci	struct rbd_spec *spec = rbd_dev->spec;
69148c2ecf20Sopenharmony_ci	int ret;
69158c2ecf20Sopenharmony_ci
69168c2ecf20Sopenharmony_ci	/* Record the header object name for this rbd image. */
69178c2ecf20Sopenharmony_ci
69188c2ecf20Sopenharmony_ci	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
69198c2ecf20Sopenharmony_ci	if (rbd_dev->image_format == 1)
69208c2ecf20Sopenharmony_ci		ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
69218c2ecf20Sopenharmony_ci				       spec->image_name, RBD_SUFFIX);
69228c2ecf20Sopenharmony_ci	else
69238c2ecf20Sopenharmony_ci		ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s",
69248c2ecf20Sopenharmony_ci				       RBD_HEADER_PREFIX, spec->image_id);
69258c2ecf20Sopenharmony_ci
69268c2ecf20Sopenharmony_ci	return ret;
69278c2ecf20Sopenharmony_ci}
69288c2ecf20Sopenharmony_ci
69298c2ecf20Sopenharmony_cistatic void rbd_print_dne(struct rbd_device *rbd_dev, bool is_snap)
69308c2ecf20Sopenharmony_ci{
69318c2ecf20Sopenharmony_ci	if (!is_snap) {
69328c2ecf20Sopenharmony_ci		pr_info("image %s/%s%s%s does not exist\n",
69338c2ecf20Sopenharmony_ci			rbd_dev->spec->pool_name,
69348c2ecf20Sopenharmony_ci			rbd_dev->spec->pool_ns ?: "",
69358c2ecf20Sopenharmony_ci			rbd_dev->spec->pool_ns ? "/" : "",
69368c2ecf20Sopenharmony_ci			rbd_dev->spec->image_name);
69378c2ecf20Sopenharmony_ci	} else {
69388c2ecf20Sopenharmony_ci		pr_info("snap %s/%s%s%s@%s does not exist\n",
69398c2ecf20Sopenharmony_ci			rbd_dev->spec->pool_name,
69408c2ecf20Sopenharmony_ci			rbd_dev->spec->pool_ns ?: "",
69418c2ecf20Sopenharmony_ci			rbd_dev->spec->pool_ns ? "/" : "",
69428c2ecf20Sopenharmony_ci			rbd_dev->spec->image_name,
69438c2ecf20Sopenharmony_ci			rbd_dev->spec->snap_name);
69448c2ecf20Sopenharmony_ci	}
69458c2ecf20Sopenharmony_ci}
69468c2ecf20Sopenharmony_ci
69478c2ecf20Sopenharmony_cistatic void rbd_dev_image_release(struct rbd_device *rbd_dev)
69488c2ecf20Sopenharmony_ci{
69498c2ecf20Sopenharmony_ci	if (!rbd_is_ro(rbd_dev))
69508c2ecf20Sopenharmony_ci		rbd_unregister_watch(rbd_dev);
69518c2ecf20Sopenharmony_ci
69528c2ecf20Sopenharmony_ci	rbd_dev_unprobe(rbd_dev);
69538c2ecf20Sopenharmony_ci	rbd_dev->image_format = 0;
69548c2ecf20Sopenharmony_ci	kfree(rbd_dev->spec->image_id);
69558c2ecf20Sopenharmony_ci	rbd_dev->spec->image_id = NULL;
69568c2ecf20Sopenharmony_ci}
69578c2ecf20Sopenharmony_ci
69588c2ecf20Sopenharmony_ci/*
69598c2ecf20Sopenharmony_ci * Probe for the existence of the header object for the given rbd
69608c2ecf20Sopenharmony_ci * device.  If this image is the one being mapped (i.e., not a
69618c2ecf20Sopenharmony_ci * parent), initiate a watch on its header object before using that
69628c2ecf20Sopenharmony_ci * object to get detailed information about the rbd image.
69638c2ecf20Sopenharmony_ci *
69648c2ecf20Sopenharmony_ci * On success, returns with header_rwsem held for write if called
69658c2ecf20Sopenharmony_ci * with @depth == 0.
69668c2ecf20Sopenharmony_ci */
69678c2ecf20Sopenharmony_cistatic int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
69688c2ecf20Sopenharmony_ci{
69698c2ecf20Sopenharmony_ci	bool need_watch = !rbd_is_ro(rbd_dev);
69708c2ecf20Sopenharmony_ci	int ret;
69718c2ecf20Sopenharmony_ci
69728c2ecf20Sopenharmony_ci	/*
69738c2ecf20Sopenharmony_ci	 * Get the id from the image id object.  Unless there's an
69748c2ecf20Sopenharmony_ci	 * error, rbd_dev->spec->image_id will be filled in with
69758c2ecf20Sopenharmony_ci	 * a dynamically-allocated string, and rbd_dev->image_format
69768c2ecf20Sopenharmony_ci	 * will be set to either 1 or 2.
69778c2ecf20Sopenharmony_ci	 */
69788c2ecf20Sopenharmony_ci	ret = rbd_dev_image_id(rbd_dev);
69798c2ecf20Sopenharmony_ci	if (ret)
69808c2ecf20Sopenharmony_ci		return ret;
69818c2ecf20Sopenharmony_ci
69828c2ecf20Sopenharmony_ci	ret = rbd_dev_header_name(rbd_dev);
69838c2ecf20Sopenharmony_ci	if (ret)
69848c2ecf20Sopenharmony_ci		goto err_out_format;
69858c2ecf20Sopenharmony_ci
69868c2ecf20Sopenharmony_ci	if (need_watch) {
69878c2ecf20Sopenharmony_ci		ret = rbd_register_watch(rbd_dev);
69888c2ecf20Sopenharmony_ci		if (ret) {
69898c2ecf20Sopenharmony_ci			if (ret == -ENOENT)
69908c2ecf20Sopenharmony_ci				rbd_print_dne(rbd_dev, false);
69918c2ecf20Sopenharmony_ci			goto err_out_format;
69928c2ecf20Sopenharmony_ci		}
69938c2ecf20Sopenharmony_ci	}
69948c2ecf20Sopenharmony_ci
69958c2ecf20Sopenharmony_ci	if (!depth)
69968c2ecf20Sopenharmony_ci		down_write(&rbd_dev->header_rwsem);
69978c2ecf20Sopenharmony_ci
69988c2ecf20Sopenharmony_ci	ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true);
69998c2ecf20Sopenharmony_ci	if (ret) {
70008c2ecf20Sopenharmony_ci		if (ret == -ENOENT && !need_watch)
70018c2ecf20Sopenharmony_ci			rbd_print_dne(rbd_dev, false);
70028c2ecf20Sopenharmony_ci		goto err_out_probe;
70038c2ecf20Sopenharmony_ci	}
70048c2ecf20Sopenharmony_ci
70058c2ecf20Sopenharmony_ci	rbd_init_layout(rbd_dev);
70068c2ecf20Sopenharmony_ci
70078c2ecf20Sopenharmony_ci	/*
70088c2ecf20Sopenharmony_ci	 * If this image is the one being mapped, we have pool name and
70098c2ecf20Sopenharmony_ci	 * id, image name and id, and snap name - need to fill snap id.
70108c2ecf20Sopenharmony_ci	 * Otherwise this is a parent image, identified by pool, image
70118c2ecf20Sopenharmony_ci	 * and snap ids - need to fill in names for those ids.
70128c2ecf20Sopenharmony_ci	 */
70138c2ecf20Sopenharmony_ci	if (!depth)
70148c2ecf20Sopenharmony_ci		ret = rbd_spec_fill_snap_id(rbd_dev);
70158c2ecf20Sopenharmony_ci	else
70168c2ecf20Sopenharmony_ci		ret = rbd_spec_fill_names(rbd_dev);
70178c2ecf20Sopenharmony_ci	if (ret) {
70188c2ecf20Sopenharmony_ci		if (ret == -ENOENT)
70198c2ecf20Sopenharmony_ci			rbd_print_dne(rbd_dev, true);
70208c2ecf20Sopenharmony_ci		goto err_out_probe;
70218c2ecf20Sopenharmony_ci	}
70228c2ecf20Sopenharmony_ci
70238c2ecf20Sopenharmony_ci	ret = rbd_dev_mapping_set(rbd_dev);
70248c2ecf20Sopenharmony_ci	if (ret)
70258c2ecf20Sopenharmony_ci		goto err_out_probe;
70268c2ecf20Sopenharmony_ci
70278c2ecf20Sopenharmony_ci	if (rbd_is_snap(rbd_dev) &&
70288c2ecf20Sopenharmony_ci	    (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) {
70298c2ecf20Sopenharmony_ci		ret = rbd_object_map_load(rbd_dev);
70308c2ecf20Sopenharmony_ci		if (ret)
70318c2ecf20Sopenharmony_ci			goto err_out_probe;
70328c2ecf20Sopenharmony_ci	}
70338c2ecf20Sopenharmony_ci
70348c2ecf20Sopenharmony_ci	if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
70358c2ecf20Sopenharmony_ci		ret = rbd_dev_setup_parent(rbd_dev);
70368c2ecf20Sopenharmony_ci		if (ret)
70378c2ecf20Sopenharmony_ci			goto err_out_probe;
70388c2ecf20Sopenharmony_ci	}
70398c2ecf20Sopenharmony_ci
70408c2ecf20Sopenharmony_ci	ret = rbd_dev_probe_parent(rbd_dev, depth);
70418c2ecf20Sopenharmony_ci	if (ret)
70428c2ecf20Sopenharmony_ci		goto err_out_probe;
70438c2ecf20Sopenharmony_ci
70448c2ecf20Sopenharmony_ci	dout("discovered format %u image, header name is %s\n",
70458c2ecf20Sopenharmony_ci		rbd_dev->image_format, rbd_dev->header_oid.name);
70468c2ecf20Sopenharmony_ci	return 0;
70478c2ecf20Sopenharmony_ci
70488c2ecf20Sopenharmony_cierr_out_probe:
70498c2ecf20Sopenharmony_ci	if (!depth)
70508c2ecf20Sopenharmony_ci		up_write(&rbd_dev->header_rwsem);
70518c2ecf20Sopenharmony_ci	if (need_watch)
70528c2ecf20Sopenharmony_ci		rbd_unregister_watch(rbd_dev);
70538c2ecf20Sopenharmony_ci	rbd_dev_unprobe(rbd_dev);
70548c2ecf20Sopenharmony_cierr_out_format:
70558c2ecf20Sopenharmony_ci	rbd_dev->image_format = 0;
70568c2ecf20Sopenharmony_ci	kfree(rbd_dev->spec->image_id);
70578c2ecf20Sopenharmony_ci	rbd_dev->spec->image_id = NULL;
70588c2ecf20Sopenharmony_ci	return ret;
70598c2ecf20Sopenharmony_ci}
70608c2ecf20Sopenharmony_ci
70618c2ecf20Sopenharmony_cistatic void rbd_dev_update_header(struct rbd_device *rbd_dev,
70628c2ecf20Sopenharmony_ci				  struct rbd_image_header *header)
70638c2ecf20Sopenharmony_ci{
70648c2ecf20Sopenharmony_ci	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
70658c2ecf20Sopenharmony_ci	rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
70668c2ecf20Sopenharmony_ci
70678c2ecf20Sopenharmony_ci	if (rbd_dev->header.image_size != header->image_size) {
70688c2ecf20Sopenharmony_ci		rbd_dev->header.image_size = header->image_size;
70698c2ecf20Sopenharmony_ci
70708c2ecf20Sopenharmony_ci		if (!rbd_is_snap(rbd_dev)) {
70718c2ecf20Sopenharmony_ci			rbd_dev->mapping.size = header->image_size;
70728c2ecf20Sopenharmony_ci			rbd_dev_update_size(rbd_dev);
70738c2ecf20Sopenharmony_ci		}
70748c2ecf20Sopenharmony_ci	}
70758c2ecf20Sopenharmony_ci
70768c2ecf20Sopenharmony_ci	ceph_put_snap_context(rbd_dev->header.snapc);
70778c2ecf20Sopenharmony_ci	rbd_dev->header.snapc = header->snapc;
70788c2ecf20Sopenharmony_ci	header->snapc = NULL;
70798c2ecf20Sopenharmony_ci
70808c2ecf20Sopenharmony_ci	if (rbd_dev->image_format == 1) {
70818c2ecf20Sopenharmony_ci		kfree(rbd_dev->header.snap_names);
70828c2ecf20Sopenharmony_ci		rbd_dev->header.snap_names = header->snap_names;
70838c2ecf20Sopenharmony_ci		header->snap_names = NULL;
70848c2ecf20Sopenharmony_ci
70858c2ecf20Sopenharmony_ci		kfree(rbd_dev->header.snap_sizes);
70868c2ecf20Sopenharmony_ci		rbd_dev->header.snap_sizes = header->snap_sizes;
70878c2ecf20Sopenharmony_ci		header->snap_sizes = NULL;
70888c2ecf20Sopenharmony_ci	}
70898c2ecf20Sopenharmony_ci}
70908c2ecf20Sopenharmony_ci
70918c2ecf20Sopenharmony_cistatic void rbd_dev_update_parent(struct rbd_device *rbd_dev,
70928c2ecf20Sopenharmony_ci				  struct parent_image_info *pii)
70938c2ecf20Sopenharmony_ci{
70948c2ecf20Sopenharmony_ci	if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) {
70958c2ecf20Sopenharmony_ci		/*
70968c2ecf20Sopenharmony_ci		 * Either the parent never existed, or we have
70978c2ecf20Sopenharmony_ci		 * record of it but the image got flattened so it no
70988c2ecf20Sopenharmony_ci		 * longer has a parent.  When the parent of a
70998c2ecf20Sopenharmony_ci		 * layered image disappears we immediately set the
71008c2ecf20Sopenharmony_ci		 * overlap to 0.  The effect of this is that all new
71018c2ecf20Sopenharmony_ci		 * requests will be treated as if the image had no
71028c2ecf20Sopenharmony_ci		 * parent.
71038c2ecf20Sopenharmony_ci		 *
71048c2ecf20Sopenharmony_ci		 * If !pii.has_overlap, the parent image spec is not
71058c2ecf20Sopenharmony_ci		 * applicable.  It's there to avoid duplication in each
71068c2ecf20Sopenharmony_ci		 * snapshot record.
71078c2ecf20Sopenharmony_ci		 */
71088c2ecf20Sopenharmony_ci		if (rbd_dev->parent_overlap) {
71098c2ecf20Sopenharmony_ci			rbd_dev->parent_overlap = 0;
71108c2ecf20Sopenharmony_ci			rbd_dev_parent_put(rbd_dev);
71118c2ecf20Sopenharmony_ci			pr_info("%s: clone has been flattened\n",
71128c2ecf20Sopenharmony_ci				rbd_dev->disk->disk_name);
71138c2ecf20Sopenharmony_ci		}
71148c2ecf20Sopenharmony_ci	} else {
71158c2ecf20Sopenharmony_ci		rbd_assert(rbd_dev->parent_spec);
71168c2ecf20Sopenharmony_ci
71178c2ecf20Sopenharmony_ci		/*
71188c2ecf20Sopenharmony_ci		 * Update the parent overlap.  If it became zero, issue
71198c2ecf20Sopenharmony_ci		 * a warning as we will proceed as if there is no parent.
71208c2ecf20Sopenharmony_ci		 */
71218c2ecf20Sopenharmony_ci		if (!pii->overlap && rbd_dev->parent_overlap)
71228c2ecf20Sopenharmony_ci			rbd_warn(rbd_dev,
71238c2ecf20Sopenharmony_ci				 "clone has become standalone (overlap 0)");
71248c2ecf20Sopenharmony_ci		rbd_dev->parent_overlap = pii->overlap;
71258c2ecf20Sopenharmony_ci	}
71268c2ecf20Sopenharmony_ci}
71278c2ecf20Sopenharmony_ci
71288c2ecf20Sopenharmony_cistatic int rbd_dev_refresh(struct rbd_device *rbd_dev)
71298c2ecf20Sopenharmony_ci{
71308c2ecf20Sopenharmony_ci	struct rbd_image_header	header = { 0 };
71318c2ecf20Sopenharmony_ci	struct parent_image_info pii = { 0 };
71328c2ecf20Sopenharmony_ci	int ret;
71338c2ecf20Sopenharmony_ci
71348c2ecf20Sopenharmony_ci	dout("%s rbd_dev %p\n", __func__, rbd_dev);
71358c2ecf20Sopenharmony_ci
71368c2ecf20Sopenharmony_ci	ret = rbd_dev_header_info(rbd_dev, &header, false);
71378c2ecf20Sopenharmony_ci	if (ret)
71388c2ecf20Sopenharmony_ci		goto out;
71398c2ecf20Sopenharmony_ci
71408c2ecf20Sopenharmony_ci	/*
71418c2ecf20Sopenharmony_ci	 * If there is a parent, see if it has disappeared due to the
71428c2ecf20Sopenharmony_ci	 * mapped image getting flattened.
71438c2ecf20Sopenharmony_ci	 */
71448c2ecf20Sopenharmony_ci	if (rbd_dev->parent) {
71458c2ecf20Sopenharmony_ci		ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
71468c2ecf20Sopenharmony_ci		if (ret)
71478c2ecf20Sopenharmony_ci			goto out;
71488c2ecf20Sopenharmony_ci	}
71498c2ecf20Sopenharmony_ci
71508c2ecf20Sopenharmony_ci	down_write(&rbd_dev->header_rwsem);
71518c2ecf20Sopenharmony_ci	rbd_dev_update_header(rbd_dev, &header);
71528c2ecf20Sopenharmony_ci	if (rbd_dev->parent)
71538c2ecf20Sopenharmony_ci		rbd_dev_update_parent(rbd_dev, &pii);
71548c2ecf20Sopenharmony_ci	up_write(&rbd_dev->header_rwsem);
71558c2ecf20Sopenharmony_ci
71568c2ecf20Sopenharmony_ciout:
71578c2ecf20Sopenharmony_ci	rbd_parent_info_cleanup(&pii);
71588c2ecf20Sopenharmony_ci	rbd_image_header_cleanup(&header);
71598c2ecf20Sopenharmony_ci	return ret;
71608c2ecf20Sopenharmony_ci}
71618c2ecf20Sopenharmony_ci
71628c2ecf20Sopenharmony_cistatic ssize_t do_rbd_add(struct bus_type *bus,
71638c2ecf20Sopenharmony_ci			  const char *buf,
71648c2ecf20Sopenharmony_ci			  size_t count)
71658c2ecf20Sopenharmony_ci{
71668c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = NULL;
71678c2ecf20Sopenharmony_ci	struct ceph_options *ceph_opts = NULL;
71688c2ecf20Sopenharmony_ci	struct rbd_options *rbd_opts = NULL;
71698c2ecf20Sopenharmony_ci	struct rbd_spec *spec = NULL;
71708c2ecf20Sopenharmony_ci	struct rbd_client *rbdc;
71718c2ecf20Sopenharmony_ci	int rc;
71728c2ecf20Sopenharmony_ci
71738c2ecf20Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
71748c2ecf20Sopenharmony_ci		return -EPERM;
71758c2ecf20Sopenharmony_ci
71768c2ecf20Sopenharmony_ci	if (!try_module_get(THIS_MODULE))
71778c2ecf20Sopenharmony_ci		return -ENODEV;
71788c2ecf20Sopenharmony_ci
71798c2ecf20Sopenharmony_ci	/* parse add command */
71808c2ecf20Sopenharmony_ci	rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
71818c2ecf20Sopenharmony_ci	if (rc < 0)
71828c2ecf20Sopenharmony_ci		goto out;
71838c2ecf20Sopenharmony_ci
71848c2ecf20Sopenharmony_ci	rbdc = rbd_get_client(ceph_opts);
71858c2ecf20Sopenharmony_ci	if (IS_ERR(rbdc)) {
71868c2ecf20Sopenharmony_ci		rc = PTR_ERR(rbdc);
71878c2ecf20Sopenharmony_ci		goto err_out_args;
71888c2ecf20Sopenharmony_ci	}
71898c2ecf20Sopenharmony_ci
71908c2ecf20Sopenharmony_ci	/* pick the pool */
71918c2ecf20Sopenharmony_ci	rc = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, spec->pool_name);
71928c2ecf20Sopenharmony_ci	if (rc < 0) {
71938c2ecf20Sopenharmony_ci		if (rc == -ENOENT)
71948c2ecf20Sopenharmony_ci			pr_info("pool %s does not exist\n", spec->pool_name);
71958c2ecf20Sopenharmony_ci		goto err_out_client;
71968c2ecf20Sopenharmony_ci	}
71978c2ecf20Sopenharmony_ci	spec->pool_id = (u64)rc;
71988c2ecf20Sopenharmony_ci
71998c2ecf20Sopenharmony_ci	rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
72008c2ecf20Sopenharmony_ci	if (!rbd_dev) {
72018c2ecf20Sopenharmony_ci		rc = -ENOMEM;
72028c2ecf20Sopenharmony_ci		goto err_out_client;
72038c2ecf20Sopenharmony_ci	}
72048c2ecf20Sopenharmony_ci	rbdc = NULL;		/* rbd_dev now owns this */
72058c2ecf20Sopenharmony_ci	spec = NULL;		/* rbd_dev now owns this */
72068c2ecf20Sopenharmony_ci	rbd_opts = NULL;	/* rbd_dev now owns this */
72078c2ecf20Sopenharmony_ci
72088c2ecf20Sopenharmony_ci	/* if we are mapping a snapshot it will be a read-only mapping */
72098c2ecf20Sopenharmony_ci	if (rbd_dev->opts->read_only ||
72108c2ecf20Sopenharmony_ci	    strcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME))
72118c2ecf20Sopenharmony_ci		__set_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags);
72128c2ecf20Sopenharmony_ci
72138c2ecf20Sopenharmony_ci	rbd_dev->config_info = kstrdup(buf, GFP_KERNEL);
72148c2ecf20Sopenharmony_ci	if (!rbd_dev->config_info) {
72158c2ecf20Sopenharmony_ci		rc = -ENOMEM;
72168c2ecf20Sopenharmony_ci		goto err_out_rbd_dev;
72178c2ecf20Sopenharmony_ci	}
72188c2ecf20Sopenharmony_ci
72198c2ecf20Sopenharmony_ci	rc = rbd_dev_image_probe(rbd_dev, 0);
72208c2ecf20Sopenharmony_ci	if (rc < 0)
72218c2ecf20Sopenharmony_ci		goto err_out_rbd_dev;
72228c2ecf20Sopenharmony_ci
72238c2ecf20Sopenharmony_ci	if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) {
72248c2ecf20Sopenharmony_ci		rbd_warn(rbd_dev, "alloc_size adjusted to %u",
72258c2ecf20Sopenharmony_ci			 rbd_dev->layout.object_size);
72268c2ecf20Sopenharmony_ci		rbd_dev->opts->alloc_size = rbd_dev->layout.object_size;
72278c2ecf20Sopenharmony_ci	}
72288c2ecf20Sopenharmony_ci
72298c2ecf20Sopenharmony_ci	rc = rbd_dev_device_setup(rbd_dev);
72308c2ecf20Sopenharmony_ci	if (rc)
72318c2ecf20Sopenharmony_ci		goto err_out_image_probe;
72328c2ecf20Sopenharmony_ci
72338c2ecf20Sopenharmony_ci	rc = rbd_add_acquire_lock(rbd_dev);
72348c2ecf20Sopenharmony_ci	if (rc)
72358c2ecf20Sopenharmony_ci		goto err_out_image_lock;
72368c2ecf20Sopenharmony_ci
72378c2ecf20Sopenharmony_ci	/* Everything's ready.  Announce the disk to the world. */
72388c2ecf20Sopenharmony_ci
72398c2ecf20Sopenharmony_ci	rc = device_add(&rbd_dev->dev);
72408c2ecf20Sopenharmony_ci	if (rc)
72418c2ecf20Sopenharmony_ci		goto err_out_image_lock;
72428c2ecf20Sopenharmony_ci
72438c2ecf20Sopenharmony_ci	device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL);
72448c2ecf20Sopenharmony_ci	/* see rbd_init_disk() */
72458c2ecf20Sopenharmony_ci	blk_put_queue(rbd_dev->disk->queue);
72468c2ecf20Sopenharmony_ci
72478c2ecf20Sopenharmony_ci	spin_lock(&rbd_dev_list_lock);
72488c2ecf20Sopenharmony_ci	list_add_tail(&rbd_dev->node, &rbd_dev_list);
72498c2ecf20Sopenharmony_ci	spin_unlock(&rbd_dev_list_lock);
72508c2ecf20Sopenharmony_ci
72518c2ecf20Sopenharmony_ci	pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name,
72528c2ecf20Sopenharmony_ci		(unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT,
72538c2ecf20Sopenharmony_ci		rbd_dev->header.features);
72548c2ecf20Sopenharmony_ci	rc = count;
72558c2ecf20Sopenharmony_ciout:
72568c2ecf20Sopenharmony_ci	module_put(THIS_MODULE);
72578c2ecf20Sopenharmony_ci	return rc;
72588c2ecf20Sopenharmony_ci
72598c2ecf20Sopenharmony_cierr_out_image_lock:
72608c2ecf20Sopenharmony_ci	rbd_dev_image_unlock(rbd_dev);
72618c2ecf20Sopenharmony_ci	rbd_dev_device_release(rbd_dev);
72628c2ecf20Sopenharmony_cierr_out_image_probe:
72638c2ecf20Sopenharmony_ci	rbd_dev_image_release(rbd_dev);
72648c2ecf20Sopenharmony_cierr_out_rbd_dev:
72658c2ecf20Sopenharmony_ci	rbd_dev_destroy(rbd_dev);
72668c2ecf20Sopenharmony_cierr_out_client:
72678c2ecf20Sopenharmony_ci	rbd_put_client(rbdc);
72688c2ecf20Sopenharmony_cierr_out_args:
72698c2ecf20Sopenharmony_ci	rbd_spec_put(spec);
72708c2ecf20Sopenharmony_ci	kfree(rbd_opts);
72718c2ecf20Sopenharmony_ci	goto out;
72728c2ecf20Sopenharmony_ci}
72738c2ecf20Sopenharmony_ci
72748c2ecf20Sopenharmony_cistatic ssize_t add_store(struct bus_type *bus, const char *buf, size_t count)
72758c2ecf20Sopenharmony_ci{
72768c2ecf20Sopenharmony_ci	if (single_major)
72778c2ecf20Sopenharmony_ci		return -EINVAL;
72788c2ecf20Sopenharmony_ci
72798c2ecf20Sopenharmony_ci	return do_rbd_add(bus, buf, count);
72808c2ecf20Sopenharmony_ci}
72818c2ecf20Sopenharmony_ci
72828c2ecf20Sopenharmony_cistatic ssize_t add_single_major_store(struct bus_type *bus, const char *buf,
72838c2ecf20Sopenharmony_ci				      size_t count)
72848c2ecf20Sopenharmony_ci{
72858c2ecf20Sopenharmony_ci	return do_rbd_add(bus, buf, count);
72868c2ecf20Sopenharmony_ci}
72878c2ecf20Sopenharmony_ci
72888c2ecf20Sopenharmony_cistatic void rbd_dev_remove_parent(struct rbd_device *rbd_dev)
72898c2ecf20Sopenharmony_ci{
72908c2ecf20Sopenharmony_ci	while (rbd_dev->parent) {
72918c2ecf20Sopenharmony_ci		struct rbd_device *first = rbd_dev;
72928c2ecf20Sopenharmony_ci		struct rbd_device *second = first->parent;
72938c2ecf20Sopenharmony_ci		struct rbd_device *third;
72948c2ecf20Sopenharmony_ci
72958c2ecf20Sopenharmony_ci		/*
72968c2ecf20Sopenharmony_ci		 * Follow to the parent with no grandparent and
72978c2ecf20Sopenharmony_ci		 * remove it.
72988c2ecf20Sopenharmony_ci		 */
72998c2ecf20Sopenharmony_ci		while (second && (third = second->parent)) {
73008c2ecf20Sopenharmony_ci			first = second;
73018c2ecf20Sopenharmony_ci			second = third;
73028c2ecf20Sopenharmony_ci		}
73038c2ecf20Sopenharmony_ci		rbd_assert(second);
73048c2ecf20Sopenharmony_ci		rbd_dev_image_release(second);
73058c2ecf20Sopenharmony_ci		rbd_dev_destroy(second);
73068c2ecf20Sopenharmony_ci		first->parent = NULL;
73078c2ecf20Sopenharmony_ci		first->parent_overlap = 0;
73088c2ecf20Sopenharmony_ci
73098c2ecf20Sopenharmony_ci		rbd_assert(first->parent_spec);
73108c2ecf20Sopenharmony_ci		rbd_spec_put(first->parent_spec);
73118c2ecf20Sopenharmony_ci		first->parent_spec = NULL;
73128c2ecf20Sopenharmony_ci	}
73138c2ecf20Sopenharmony_ci}
73148c2ecf20Sopenharmony_ci
73158c2ecf20Sopenharmony_cistatic ssize_t do_rbd_remove(struct bus_type *bus,
73168c2ecf20Sopenharmony_ci			     const char *buf,
73178c2ecf20Sopenharmony_ci			     size_t count)
73188c2ecf20Sopenharmony_ci{
73198c2ecf20Sopenharmony_ci	struct rbd_device *rbd_dev = NULL;
73208c2ecf20Sopenharmony_ci	struct list_head *tmp;
73218c2ecf20Sopenharmony_ci	int dev_id;
73228c2ecf20Sopenharmony_ci	char opt_buf[6];
73238c2ecf20Sopenharmony_ci	bool force = false;
73248c2ecf20Sopenharmony_ci	int ret;
73258c2ecf20Sopenharmony_ci
73268c2ecf20Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
73278c2ecf20Sopenharmony_ci		return -EPERM;
73288c2ecf20Sopenharmony_ci
73298c2ecf20Sopenharmony_ci	dev_id = -1;
73308c2ecf20Sopenharmony_ci	opt_buf[0] = '\0';
73318c2ecf20Sopenharmony_ci	sscanf(buf, "%d %5s", &dev_id, opt_buf);
73328c2ecf20Sopenharmony_ci	if (dev_id < 0) {
73338c2ecf20Sopenharmony_ci		pr_err("dev_id out of range\n");
73348c2ecf20Sopenharmony_ci		return -EINVAL;
73358c2ecf20Sopenharmony_ci	}
73368c2ecf20Sopenharmony_ci	if (opt_buf[0] != '\0') {
73378c2ecf20Sopenharmony_ci		if (!strcmp(opt_buf, "force")) {
73388c2ecf20Sopenharmony_ci			force = true;
73398c2ecf20Sopenharmony_ci		} else {
73408c2ecf20Sopenharmony_ci			pr_err("bad remove option at '%s'\n", opt_buf);
73418c2ecf20Sopenharmony_ci			return -EINVAL;
73428c2ecf20Sopenharmony_ci		}
73438c2ecf20Sopenharmony_ci	}
73448c2ecf20Sopenharmony_ci
73458c2ecf20Sopenharmony_ci	ret = -ENOENT;
73468c2ecf20Sopenharmony_ci	spin_lock(&rbd_dev_list_lock);
73478c2ecf20Sopenharmony_ci	list_for_each(tmp, &rbd_dev_list) {
73488c2ecf20Sopenharmony_ci		rbd_dev = list_entry(tmp, struct rbd_device, node);
73498c2ecf20Sopenharmony_ci		if (rbd_dev->dev_id == dev_id) {
73508c2ecf20Sopenharmony_ci			ret = 0;
73518c2ecf20Sopenharmony_ci			break;
73528c2ecf20Sopenharmony_ci		}
73538c2ecf20Sopenharmony_ci	}
73548c2ecf20Sopenharmony_ci	if (!ret) {
73558c2ecf20Sopenharmony_ci		spin_lock_irq(&rbd_dev->lock);
73568c2ecf20Sopenharmony_ci		if (rbd_dev->open_count && !force)
73578c2ecf20Sopenharmony_ci			ret = -EBUSY;
73588c2ecf20Sopenharmony_ci		else if (test_and_set_bit(RBD_DEV_FLAG_REMOVING,
73598c2ecf20Sopenharmony_ci					  &rbd_dev->flags))
73608c2ecf20Sopenharmony_ci			ret = -EINPROGRESS;
73618c2ecf20Sopenharmony_ci		spin_unlock_irq(&rbd_dev->lock);
73628c2ecf20Sopenharmony_ci	}
73638c2ecf20Sopenharmony_ci	spin_unlock(&rbd_dev_list_lock);
73648c2ecf20Sopenharmony_ci	if (ret)
73658c2ecf20Sopenharmony_ci		return ret;
73668c2ecf20Sopenharmony_ci
73678c2ecf20Sopenharmony_ci	if (force) {
73688c2ecf20Sopenharmony_ci		/*
73698c2ecf20Sopenharmony_ci		 * Prevent new IO from being queued and wait for existing
73708c2ecf20Sopenharmony_ci		 * IO to complete/fail.
73718c2ecf20Sopenharmony_ci		 */
73728c2ecf20Sopenharmony_ci		blk_mq_freeze_queue(rbd_dev->disk->queue);
73738c2ecf20Sopenharmony_ci		blk_set_queue_dying(rbd_dev->disk->queue);
73748c2ecf20Sopenharmony_ci	}
73758c2ecf20Sopenharmony_ci
73768c2ecf20Sopenharmony_ci	del_gendisk(rbd_dev->disk);
73778c2ecf20Sopenharmony_ci	spin_lock(&rbd_dev_list_lock);
73788c2ecf20Sopenharmony_ci	list_del_init(&rbd_dev->node);
73798c2ecf20Sopenharmony_ci	spin_unlock(&rbd_dev_list_lock);
73808c2ecf20Sopenharmony_ci	device_del(&rbd_dev->dev);
73818c2ecf20Sopenharmony_ci
73828c2ecf20Sopenharmony_ci	rbd_dev_image_unlock(rbd_dev);
73838c2ecf20Sopenharmony_ci	rbd_dev_device_release(rbd_dev);
73848c2ecf20Sopenharmony_ci	rbd_dev_image_release(rbd_dev);
73858c2ecf20Sopenharmony_ci	rbd_dev_destroy(rbd_dev);
73868c2ecf20Sopenharmony_ci	return count;
73878c2ecf20Sopenharmony_ci}
73888c2ecf20Sopenharmony_ci
73898c2ecf20Sopenharmony_cistatic ssize_t remove_store(struct bus_type *bus, const char *buf, size_t count)
73908c2ecf20Sopenharmony_ci{
73918c2ecf20Sopenharmony_ci	if (single_major)
73928c2ecf20Sopenharmony_ci		return -EINVAL;
73938c2ecf20Sopenharmony_ci
73948c2ecf20Sopenharmony_ci	return do_rbd_remove(bus, buf, count);
73958c2ecf20Sopenharmony_ci}
73968c2ecf20Sopenharmony_ci
73978c2ecf20Sopenharmony_cistatic ssize_t remove_single_major_store(struct bus_type *bus, const char *buf,
73988c2ecf20Sopenharmony_ci					 size_t count)
73998c2ecf20Sopenharmony_ci{
74008c2ecf20Sopenharmony_ci	return do_rbd_remove(bus, buf, count);
74018c2ecf20Sopenharmony_ci}
74028c2ecf20Sopenharmony_ci
74038c2ecf20Sopenharmony_ci/*
74048c2ecf20Sopenharmony_ci * create control files in sysfs
74058c2ecf20Sopenharmony_ci * /sys/bus/rbd/...
74068c2ecf20Sopenharmony_ci */
74078c2ecf20Sopenharmony_cistatic int __init rbd_sysfs_init(void)
74088c2ecf20Sopenharmony_ci{
74098c2ecf20Sopenharmony_ci	int ret;
74108c2ecf20Sopenharmony_ci
74118c2ecf20Sopenharmony_ci	ret = device_register(&rbd_root_dev);
74128c2ecf20Sopenharmony_ci	if (ret < 0)
74138c2ecf20Sopenharmony_ci		return ret;
74148c2ecf20Sopenharmony_ci
74158c2ecf20Sopenharmony_ci	ret = bus_register(&rbd_bus_type);
74168c2ecf20Sopenharmony_ci	if (ret < 0)
74178c2ecf20Sopenharmony_ci		device_unregister(&rbd_root_dev);
74188c2ecf20Sopenharmony_ci
74198c2ecf20Sopenharmony_ci	return ret;
74208c2ecf20Sopenharmony_ci}
74218c2ecf20Sopenharmony_ci
74228c2ecf20Sopenharmony_cistatic void __exit rbd_sysfs_cleanup(void)
74238c2ecf20Sopenharmony_ci{
74248c2ecf20Sopenharmony_ci	bus_unregister(&rbd_bus_type);
74258c2ecf20Sopenharmony_ci	device_unregister(&rbd_root_dev);
74268c2ecf20Sopenharmony_ci}
74278c2ecf20Sopenharmony_ci
74288c2ecf20Sopenharmony_cistatic int __init rbd_slab_init(void)
74298c2ecf20Sopenharmony_ci{
74308c2ecf20Sopenharmony_ci	rbd_assert(!rbd_img_request_cache);
74318c2ecf20Sopenharmony_ci	rbd_img_request_cache = KMEM_CACHE(rbd_img_request, 0);
74328c2ecf20Sopenharmony_ci	if (!rbd_img_request_cache)
74338c2ecf20Sopenharmony_ci		return -ENOMEM;
74348c2ecf20Sopenharmony_ci
74358c2ecf20Sopenharmony_ci	rbd_assert(!rbd_obj_request_cache);
74368c2ecf20Sopenharmony_ci	rbd_obj_request_cache = KMEM_CACHE(rbd_obj_request, 0);
74378c2ecf20Sopenharmony_ci	if (!rbd_obj_request_cache)
74388c2ecf20Sopenharmony_ci		goto out_err;
74398c2ecf20Sopenharmony_ci
74408c2ecf20Sopenharmony_ci	return 0;
74418c2ecf20Sopenharmony_ci
74428c2ecf20Sopenharmony_ciout_err:
74438c2ecf20Sopenharmony_ci	kmem_cache_destroy(rbd_img_request_cache);
74448c2ecf20Sopenharmony_ci	rbd_img_request_cache = NULL;
74458c2ecf20Sopenharmony_ci	return -ENOMEM;
74468c2ecf20Sopenharmony_ci}
74478c2ecf20Sopenharmony_ci
74488c2ecf20Sopenharmony_cistatic void rbd_slab_exit(void)
74498c2ecf20Sopenharmony_ci{
74508c2ecf20Sopenharmony_ci	rbd_assert(rbd_obj_request_cache);
74518c2ecf20Sopenharmony_ci	kmem_cache_destroy(rbd_obj_request_cache);
74528c2ecf20Sopenharmony_ci	rbd_obj_request_cache = NULL;
74538c2ecf20Sopenharmony_ci
74548c2ecf20Sopenharmony_ci	rbd_assert(rbd_img_request_cache);
74558c2ecf20Sopenharmony_ci	kmem_cache_destroy(rbd_img_request_cache);
74568c2ecf20Sopenharmony_ci	rbd_img_request_cache = NULL;
74578c2ecf20Sopenharmony_ci}
74588c2ecf20Sopenharmony_ci
74598c2ecf20Sopenharmony_cistatic int __init rbd_init(void)
74608c2ecf20Sopenharmony_ci{
74618c2ecf20Sopenharmony_ci	int rc;
74628c2ecf20Sopenharmony_ci
74638c2ecf20Sopenharmony_ci	if (!libceph_compatible(NULL)) {
74648c2ecf20Sopenharmony_ci		rbd_warn(NULL, "libceph incompatibility (quitting)");
74658c2ecf20Sopenharmony_ci		return -EINVAL;
74668c2ecf20Sopenharmony_ci	}
74678c2ecf20Sopenharmony_ci
74688c2ecf20Sopenharmony_ci	rc = rbd_slab_init();
74698c2ecf20Sopenharmony_ci	if (rc)
74708c2ecf20Sopenharmony_ci		return rc;
74718c2ecf20Sopenharmony_ci
74728c2ecf20Sopenharmony_ci	/*
74738c2ecf20Sopenharmony_ci	 * The number of active work items is limited by the number of
74748c2ecf20Sopenharmony_ci	 * rbd devices * queue depth, so leave @max_active at default.
74758c2ecf20Sopenharmony_ci	 */
74768c2ecf20Sopenharmony_ci	rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
74778c2ecf20Sopenharmony_ci	if (!rbd_wq) {
74788c2ecf20Sopenharmony_ci		rc = -ENOMEM;
74798c2ecf20Sopenharmony_ci		goto err_out_slab;
74808c2ecf20Sopenharmony_ci	}
74818c2ecf20Sopenharmony_ci
74828c2ecf20Sopenharmony_ci	if (single_major) {
74838c2ecf20Sopenharmony_ci		rbd_major = register_blkdev(0, RBD_DRV_NAME);
74848c2ecf20Sopenharmony_ci		if (rbd_major < 0) {
74858c2ecf20Sopenharmony_ci			rc = rbd_major;
74868c2ecf20Sopenharmony_ci			goto err_out_wq;
74878c2ecf20Sopenharmony_ci		}
74888c2ecf20Sopenharmony_ci	}
74898c2ecf20Sopenharmony_ci
74908c2ecf20Sopenharmony_ci	rc = rbd_sysfs_init();
74918c2ecf20Sopenharmony_ci	if (rc)
74928c2ecf20Sopenharmony_ci		goto err_out_blkdev;
74938c2ecf20Sopenharmony_ci
74948c2ecf20Sopenharmony_ci	if (single_major)
74958c2ecf20Sopenharmony_ci		pr_info("loaded (major %d)\n", rbd_major);
74968c2ecf20Sopenharmony_ci	else
74978c2ecf20Sopenharmony_ci		pr_info("loaded\n");
74988c2ecf20Sopenharmony_ci
74998c2ecf20Sopenharmony_ci	return 0;
75008c2ecf20Sopenharmony_ci
75018c2ecf20Sopenharmony_cierr_out_blkdev:
75028c2ecf20Sopenharmony_ci	if (single_major)
75038c2ecf20Sopenharmony_ci		unregister_blkdev(rbd_major, RBD_DRV_NAME);
75048c2ecf20Sopenharmony_cierr_out_wq:
75058c2ecf20Sopenharmony_ci	destroy_workqueue(rbd_wq);
75068c2ecf20Sopenharmony_cierr_out_slab:
75078c2ecf20Sopenharmony_ci	rbd_slab_exit();
75088c2ecf20Sopenharmony_ci	return rc;
75098c2ecf20Sopenharmony_ci}
75108c2ecf20Sopenharmony_ci
75118c2ecf20Sopenharmony_cistatic void __exit rbd_exit(void)
75128c2ecf20Sopenharmony_ci{
75138c2ecf20Sopenharmony_ci	ida_destroy(&rbd_dev_id_ida);
75148c2ecf20Sopenharmony_ci	rbd_sysfs_cleanup();
75158c2ecf20Sopenharmony_ci	if (single_major)
75168c2ecf20Sopenharmony_ci		unregister_blkdev(rbd_major, RBD_DRV_NAME);
75178c2ecf20Sopenharmony_ci	destroy_workqueue(rbd_wq);
75188c2ecf20Sopenharmony_ci	rbd_slab_exit();
75198c2ecf20Sopenharmony_ci}
75208c2ecf20Sopenharmony_ci
75218c2ecf20Sopenharmony_cimodule_init(rbd_init);
75228c2ecf20Sopenharmony_cimodule_exit(rbd_exit);
75238c2ecf20Sopenharmony_ci
75248c2ecf20Sopenharmony_ciMODULE_AUTHOR("Alex Elder <elder@inktank.com>");
75258c2ecf20Sopenharmony_ciMODULE_AUTHOR("Sage Weil <sage@newdream.net>");
75268c2ecf20Sopenharmony_ciMODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
75278c2ecf20Sopenharmony_ci/* following authorship retained from original osdblk.c */
75288c2ecf20Sopenharmony_ciMODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
75298c2ecf20Sopenharmony_ci
75308c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("RADOS Block Device (RBD) driver");
75318c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
7532