18c2ecf20Sopenharmony_ci 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci rbd.c -- Export ceph rados objects as a Linux block device 48c2ecf20Sopenharmony_ci 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci based on drivers/block/osdblk.c: 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci Copyright 2009 Red Hat, Inc. 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci This program is free software; you can redistribute it and/or modify 118c2ecf20Sopenharmony_ci it under the terms of the GNU General Public License as published by 128c2ecf20Sopenharmony_ci the Free Software Foundation. 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci This program is distributed in the hope that it will be useful, 158c2ecf20Sopenharmony_ci but WITHOUT ANY WARRANTY; without even the implied warranty of 168c2ecf20Sopenharmony_ci MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 178c2ecf20Sopenharmony_ci GNU General Public License for more details. 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci You should have received a copy of the GNU General Public License 208c2ecf20Sopenharmony_ci along with this program; see the file COPYING. If not, write to 218c2ecf20Sopenharmony_ci the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci For usage instructions, please refer to: 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci Documentation/ABI/testing/sysfs-bus-rbd 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci */ 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci#include <linux/ceph/libceph.h> 328c2ecf20Sopenharmony_ci#include <linux/ceph/osd_client.h> 338c2ecf20Sopenharmony_ci#include <linux/ceph/mon_client.h> 348c2ecf20Sopenharmony_ci#include <linux/ceph/cls_lock_client.h> 358c2ecf20Sopenharmony_ci#include <linux/ceph/striper.h> 368c2ecf20Sopenharmony_ci#include <linux/ceph/decode.h> 378c2ecf20Sopenharmony_ci#include <linux/fs_parser.h> 388c2ecf20Sopenharmony_ci#include <linux/bsearch.h> 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci#include <linux/kernel.h> 418c2ecf20Sopenharmony_ci#include <linux/device.h> 428c2ecf20Sopenharmony_ci#include <linux/module.h> 438c2ecf20Sopenharmony_ci#include <linux/blk-mq.h> 448c2ecf20Sopenharmony_ci#include <linux/fs.h> 458c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 468c2ecf20Sopenharmony_ci#include <linux/slab.h> 478c2ecf20Sopenharmony_ci#include <linux/idr.h> 488c2ecf20Sopenharmony_ci#include <linux/workqueue.h> 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci#include "rbd_types.h" 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#define RBD_DEBUG /* Activate rbd_assert() calls */ 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci/* 558c2ecf20Sopenharmony_ci * Increment the given counter and return its updated value. 568c2ecf20Sopenharmony_ci * If the counter is already 0 it will not be incremented. 578c2ecf20Sopenharmony_ci * If the counter is already at its maximum value returns 588c2ecf20Sopenharmony_ci * -EINVAL without updating it. 598c2ecf20Sopenharmony_ci */ 608c2ecf20Sopenharmony_cistatic int atomic_inc_return_safe(atomic_t *v) 618c2ecf20Sopenharmony_ci{ 628c2ecf20Sopenharmony_ci unsigned int counter; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci counter = (unsigned int)atomic_fetch_add_unless(v, 1, 0); 658c2ecf20Sopenharmony_ci if (counter <= (unsigned int)INT_MAX) 668c2ecf20Sopenharmony_ci return (int)counter; 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci atomic_dec(v); 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci return -EINVAL; 718c2ecf20Sopenharmony_ci} 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci/* Decrement the counter. Return the resulting value, or -EINVAL */ 748c2ecf20Sopenharmony_cistatic int atomic_dec_return_safe(atomic_t *v) 758c2ecf20Sopenharmony_ci{ 768c2ecf20Sopenharmony_ci int counter; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci counter = atomic_dec_return(v); 798c2ecf20Sopenharmony_ci if (counter >= 0) 808c2ecf20Sopenharmony_ci return counter; 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci atomic_inc(v); 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci return -EINVAL; 858c2ecf20Sopenharmony_ci} 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci#define RBD_DRV_NAME "rbd" 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci#define RBD_MINORS_PER_MAJOR 256 908c2ecf20Sopenharmony_ci#define RBD_SINGLE_MAJOR_PART_SHIFT 4 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci#define RBD_MAX_PARENT_CHAIN_LEN 16 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci#define RBD_SNAP_DEV_NAME_PREFIX "snap_" 958c2ecf20Sopenharmony_ci#define RBD_MAX_SNAP_NAME_LEN \ 968c2ecf20Sopenharmony_ci (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci#define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci#define RBD_SNAP_HEAD_NAME "-" 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci#define BAD_SNAP_INDEX U32_MAX /* invalid index into snap array */ 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci/* This allows a single page to hold an image name sent by OSD */ 1058c2ecf20Sopenharmony_ci#define RBD_IMAGE_NAME_LEN_MAX (PAGE_SIZE - sizeof (__le32) - 1) 1068c2ecf20Sopenharmony_ci#define RBD_IMAGE_ID_LEN_MAX 64 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci#define RBD_OBJ_PREFIX_LEN_MAX 64 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci#define RBD_NOTIFY_TIMEOUT 5 /* seconds */ 1118c2ecf20Sopenharmony_ci#define RBD_RETRY_DELAY msecs_to_jiffies(1000) 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci/* Feature bits */ 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci#define RBD_FEATURE_LAYERING (1ULL<<0) 1168c2ecf20Sopenharmony_ci#define RBD_FEATURE_STRIPINGV2 (1ULL<<1) 1178c2ecf20Sopenharmony_ci#define RBD_FEATURE_EXCLUSIVE_LOCK (1ULL<<2) 1188c2ecf20Sopenharmony_ci#define RBD_FEATURE_OBJECT_MAP (1ULL<<3) 1198c2ecf20Sopenharmony_ci#define RBD_FEATURE_FAST_DIFF (1ULL<<4) 1208c2ecf20Sopenharmony_ci#define RBD_FEATURE_DEEP_FLATTEN (1ULL<<5) 1218c2ecf20Sopenharmony_ci#define RBD_FEATURE_DATA_POOL (1ULL<<7) 1228c2ecf20Sopenharmony_ci#define RBD_FEATURE_OPERATIONS (1ULL<<8) 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci#define RBD_FEATURES_ALL (RBD_FEATURE_LAYERING | \ 1258c2ecf20Sopenharmony_ci RBD_FEATURE_STRIPINGV2 | \ 1268c2ecf20Sopenharmony_ci RBD_FEATURE_EXCLUSIVE_LOCK | \ 1278c2ecf20Sopenharmony_ci RBD_FEATURE_OBJECT_MAP | \ 1288c2ecf20Sopenharmony_ci RBD_FEATURE_FAST_DIFF | \ 1298c2ecf20Sopenharmony_ci RBD_FEATURE_DEEP_FLATTEN | \ 1308c2ecf20Sopenharmony_ci RBD_FEATURE_DATA_POOL | \ 1318c2ecf20Sopenharmony_ci RBD_FEATURE_OPERATIONS) 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci/* Features supported by this (client software) implementation. */ 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci#define RBD_FEATURES_SUPPORTED (RBD_FEATURES_ALL) 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci/* 1388c2ecf20Sopenharmony_ci * An RBD device name will be "rbd#", where the "rbd" comes from 1398c2ecf20Sopenharmony_ci * RBD_DRV_NAME above, and # is a unique integer identifier. 1408c2ecf20Sopenharmony_ci */ 1418c2ecf20Sopenharmony_ci#define DEV_NAME_LEN 32 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci/* 1448c2ecf20Sopenharmony_ci * block device image metadata (in-memory version) 1458c2ecf20Sopenharmony_ci */ 1468c2ecf20Sopenharmony_cistruct rbd_image_header { 1478c2ecf20Sopenharmony_ci /* These six fields never change for a given rbd image */ 1488c2ecf20Sopenharmony_ci char *object_prefix; 1498c2ecf20Sopenharmony_ci __u8 obj_order; 1508c2ecf20Sopenharmony_ci u64 stripe_unit; 1518c2ecf20Sopenharmony_ci u64 stripe_count; 1528c2ecf20Sopenharmony_ci s64 data_pool_id; 1538c2ecf20Sopenharmony_ci u64 features; /* Might be changeable someday? */ 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci /* The remaining fields need to be updated occasionally */ 1568c2ecf20Sopenharmony_ci u64 image_size; 1578c2ecf20Sopenharmony_ci struct ceph_snap_context *snapc; 1588c2ecf20Sopenharmony_ci char *snap_names; /* format 1 only */ 1598c2ecf20Sopenharmony_ci u64 *snap_sizes; /* format 1 only */ 1608c2ecf20Sopenharmony_ci}; 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci/* 1638c2ecf20Sopenharmony_ci * An rbd image specification. 1648c2ecf20Sopenharmony_ci * 1658c2ecf20Sopenharmony_ci * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely 1668c2ecf20Sopenharmony_ci * identify an image. Each rbd_dev structure includes a pointer to 1678c2ecf20Sopenharmony_ci * an rbd_spec structure that encapsulates this identity. 1688c2ecf20Sopenharmony_ci * 1698c2ecf20Sopenharmony_ci * Each of the id's in an rbd_spec has an associated name. For a 1708c2ecf20Sopenharmony_ci * user-mapped image, the names are supplied and the id's associated 1718c2ecf20Sopenharmony_ci * with them are looked up. For a layered image, a parent image is 1728c2ecf20Sopenharmony_ci * defined by the tuple, and the names are looked up. 1738c2ecf20Sopenharmony_ci * 1748c2ecf20Sopenharmony_ci * An rbd_dev structure contains a parent_spec pointer which is 1758c2ecf20Sopenharmony_ci * non-null if the image it represents is a child in a layered 1768c2ecf20Sopenharmony_ci * image. This pointer will refer to the rbd_spec structure used 1778c2ecf20Sopenharmony_ci * by the parent rbd_dev for its own identity (i.e., the structure 1788c2ecf20Sopenharmony_ci * is shared between the parent and child). 1798c2ecf20Sopenharmony_ci * 1808c2ecf20Sopenharmony_ci * Since these structures are populated once, during the discovery 1818c2ecf20Sopenharmony_ci * phase of image construction, they are effectively immutable so 1828c2ecf20Sopenharmony_ci * we make no effort to synchronize access to them. 1838c2ecf20Sopenharmony_ci * 1848c2ecf20Sopenharmony_ci * Note that code herein does not assume the image name is known (it 1858c2ecf20Sopenharmony_ci * could be a null pointer). 1868c2ecf20Sopenharmony_ci */ 1878c2ecf20Sopenharmony_cistruct rbd_spec { 1888c2ecf20Sopenharmony_ci u64 pool_id; 1898c2ecf20Sopenharmony_ci const char *pool_name; 1908c2ecf20Sopenharmony_ci const char *pool_ns; /* NULL if default, never "" */ 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci const char *image_id; 1938c2ecf20Sopenharmony_ci const char *image_name; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci u64 snap_id; 1968c2ecf20Sopenharmony_ci const char *snap_name; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci struct kref kref; 1998c2ecf20Sopenharmony_ci}; 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci/* 2028c2ecf20Sopenharmony_ci * an instance of the client. multiple devices may share an rbd client. 2038c2ecf20Sopenharmony_ci */ 2048c2ecf20Sopenharmony_cistruct rbd_client { 2058c2ecf20Sopenharmony_ci struct ceph_client *client; 2068c2ecf20Sopenharmony_ci struct kref kref; 2078c2ecf20Sopenharmony_ci struct list_head node; 2088c2ecf20Sopenharmony_ci}; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_cistruct pending_result { 2118c2ecf20Sopenharmony_ci int result; /* first nonzero result */ 2128c2ecf20Sopenharmony_ci int num_pending; 2138c2ecf20Sopenharmony_ci}; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_cistruct rbd_img_request; 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_cienum obj_request_type { 2188c2ecf20Sopenharmony_ci OBJ_REQUEST_NODATA = 1, 2198c2ecf20Sopenharmony_ci OBJ_REQUEST_BIO, /* pointer into provided bio (list) */ 2208c2ecf20Sopenharmony_ci OBJ_REQUEST_BVECS, /* pointer into provided bio_vec array */ 2218c2ecf20Sopenharmony_ci OBJ_REQUEST_OWN_BVECS, /* private bio_vec array, doesn't own pages */ 2228c2ecf20Sopenharmony_ci}; 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_cienum obj_operation_type { 2258c2ecf20Sopenharmony_ci OBJ_OP_READ = 1, 2268c2ecf20Sopenharmony_ci OBJ_OP_WRITE, 2278c2ecf20Sopenharmony_ci OBJ_OP_DISCARD, 2288c2ecf20Sopenharmony_ci OBJ_OP_ZEROOUT, 2298c2ecf20Sopenharmony_ci}; 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci#define RBD_OBJ_FLAG_DELETION (1U << 0) 2328c2ecf20Sopenharmony_ci#define RBD_OBJ_FLAG_COPYUP_ENABLED (1U << 1) 2338c2ecf20Sopenharmony_ci#define RBD_OBJ_FLAG_COPYUP_ZEROS (1U << 2) 2348c2ecf20Sopenharmony_ci#define RBD_OBJ_FLAG_MAY_EXIST (1U << 3) 2358c2ecf20Sopenharmony_ci#define RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT (1U << 4) 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_cienum rbd_obj_read_state { 2388c2ecf20Sopenharmony_ci RBD_OBJ_READ_START = 1, 2398c2ecf20Sopenharmony_ci RBD_OBJ_READ_OBJECT, 2408c2ecf20Sopenharmony_ci RBD_OBJ_READ_PARENT, 2418c2ecf20Sopenharmony_ci}; 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci/* 2448c2ecf20Sopenharmony_ci * Writes go through the following state machine to deal with 2458c2ecf20Sopenharmony_ci * layering: 2468c2ecf20Sopenharmony_ci * 2478c2ecf20Sopenharmony_ci * . . . . . RBD_OBJ_WRITE_GUARD. . . . . . . . . . . . . . 2488c2ecf20Sopenharmony_ci * . | . 2498c2ecf20Sopenharmony_ci * . v . 2508c2ecf20Sopenharmony_ci * . RBD_OBJ_WRITE_READ_FROM_PARENT. . . . 2518c2ecf20Sopenharmony_ci * . | . . 2528c2ecf20Sopenharmony_ci * . v v (deep-copyup . 2538c2ecf20Sopenharmony_ci * (image . RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC . not needed) . 2548c2ecf20Sopenharmony_ci * flattened) v | . . 2558c2ecf20Sopenharmony_ci * . v . . 2568c2ecf20Sopenharmony_ci * . . . .RBD_OBJ_WRITE_COPYUP_OPS. . . . . (copyup . 2578c2ecf20Sopenharmony_ci * | not needed) v 2588c2ecf20Sopenharmony_ci * v . 2598c2ecf20Sopenharmony_ci * done . . . . . . . . . . . . . . . . . . 2608c2ecf20Sopenharmony_ci * ^ 2618c2ecf20Sopenharmony_ci * | 2628c2ecf20Sopenharmony_ci * RBD_OBJ_WRITE_FLAT 2638c2ecf20Sopenharmony_ci * 2648c2ecf20Sopenharmony_ci * Writes start in RBD_OBJ_WRITE_GUARD or _FLAT, depending on whether 2658c2ecf20Sopenharmony_ci * assert_exists guard is needed or not (in some cases it's not needed 2668c2ecf20Sopenharmony_ci * even if there is a parent). 2678c2ecf20Sopenharmony_ci */ 2688c2ecf20Sopenharmony_cienum rbd_obj_write_state { 2698c2ecf20Sopenharmony_ci RBD_OBJ_WRITE_START = 1, 2708c2ecf20Sopenharmony_ci RBD_OBJ_WRITE_PRE_OBJECT_MAP, 2718c2ecf20Sopenharmony_ci RBD_OBJ_WRITE_OBJECT, 2728c2ecf20Sopenharmony_ci __RBD_OBJ_WRITE_COPYUP, 2738c2ecf20Sopenharmony_ci RBD_OBJ_WRITE_COPYUP, 2748c2ecf20Sopenharmony_ci RBD_OBJ_WRITE_POST_OBJECT_MAP, 2758c2ecf20Sopenharmony_ci}; 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_cienum rbd_obj_copyup_state { 2788c2ecf20Sopenharmony_ci RBD_OBJ_COPYUP_START = 1, 2798c2ecf20Sopenharmony_ci RBD_OBJ_COPYUP_READ_PARENT, 2808c2ecf20Sopenharmony_ci __RBD_OBJ_COPYUP_OBJECT_MAPS, 2818c2ecf20Sopenharmony_ci RBD_OBJ_COPYUP_OBJECT_MAPS, 2828c2ecf20Sopenharmony_ci __RBD_OBJ_COPYUP_WRITE_OBJECT, 2838c2ecf20Sopenharmony_ci RBD_OBJ_COPYUP_WRITE_OBJECT, 2848c2ecf20Sopenharmony_ci}; 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_cistruct rbd_obj_request { 2878c2ecf20Sopenharmony_ci struct ceph_object_extent ex; 2888c2ecf20Sopenharmony_ci unsigned int flags; /* RBD_OBJ_FLAG_* */ 2898c2ecf20Sopenharmony_ci union { 2908c2ecf20Sopenharmony_ci enum rbd_obj_read_state read_state; /* for reads */ 2918c2ecf20Sopenharmony_ci enum rbd_obj_write_state write_state; /* for writes */ 2928c2ecf20Sopenharmony_ci }; 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci struct rbd_img_request *img_request; 2958c2ecf20Sopenharmony_ci struct ceph_file_extent *img_extents; 2968c2ecf20Sopenharmony_ci u32 num_img_extents; 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci union { 2998c2ecf20Sopenharmony_ci struct ceph_bio_iter bio_pos; 3008c2ecf20Sopenharmony_ci struct { 3018c2ecf20Sopenharmony_ci struct ceph_bvec_iter bvec_pos; 3028c2ecf20Sopenharmony_ci u32 bvec_count; 3038c2ecf20Sopenharmony_ci u32 bvec_idx; 3048c2ecf20Sopenharmony_ci }; 3058c2ecf20Sopenharmony_ci }; 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_ci enum rbd_obj_copyup_state copyup_state; 3088c2ecf20Sopenharmony_ci struct bio_vec *copyup_bvecs; 3098c2ecf20Sopenharmony_ci u32 copyup_bvec_count; 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci struct list_head osd_reqs; /* w/ r_private_item */ 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci struct mutex state_mutex; 3148c2ecf20Sopenharmony_ci struct pending_result pending; 3158c2ecf20Sopenharmony_ci struct kref kref; 3168c2ecf20Sopenharmony_ci}; 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_cienum img_req_flags { 3198c2ecf20Sopenharmony_ci IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */ 3208c2ecf20Sopenharmony_ci IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */ 3218c2ecf20Sopenharmony_ci}; 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_cienum rbd_img_state { 3248c2ecf20Sopenharmony_ci RBD_IMG_START = 1, 3258c2ecf20Sopenharmony_ci RBD_IMG_EXCLUSIVE_LOCK, 3268c2ecf20Sopenharmony_ci __RBD_IMG_OBJECT_REQUESTS, 3278c2ecf20Sopenharmony_ci RBD_IMG_OBJECT_REQUESTS, 3288c2ecf20Sopenharmony_ci}; 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_cistruct rbd_img_request { 3318c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev; 3328c2ecf20Sopenharmony_ci enum obj_operation_type op_type; 3338c2ecf20Sopenharmony_ci enum obj_request_type data_type; 3348c2ecf20Sopenharmony_ci unsigned long flags; 3358c2ecf20Sopenharmony_ci enum rbd_img_state state; 3368c2ecf20Sopenharmony_ci union { 3378c2ecf20Sopenharmony_ci u64 snap_id; /* for reads */ 3388c2ecf20Sopenharmony_ci struct ceph_snap_context *snapc; /* for writes */ 3398c2ecf20Sopenharmony_ci }; 3408c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_request; /* obj req initiator */ 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci struct list_head lock_item; 3438c2ecf20Sopenharmony_ci struct list_head object_extents; /* obj_req.ex structs */ 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci struct mutex state_mutex; 3468c2ecf20Sopenharmony_ci struct pending_result pending; 3478c2ecf20Sopenharmony_ci struct work_struct work; 3488c2ecf20Sopenharmony_ci int work_result; 3498c2ecf20Sopenharmony_ci}; 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci#define for_each_obj_request(ireq, oreq) \ 3528c2ecf20Sopenharmony_ci list_for_each_entry(oreq, &(ireq)->object_extents, ex.oe_item) 3538c2ecf20Sopenharmony_ci#define for_each_obj_request_safe(ireq, oreq, n) \ 3548c2ecf20Sopenharmony_ci list_for_each_entry_safe(oreq, n, &(ireq)->object_extents, ex.oe_item) 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_cienum rbd_watch_state { 3578c2ecf20Sopenharmony_ci RBD_WATCH_STATE_UNREGISTERED, 3588c2ecf20Sopenharmony_ci RBD_WATCH_STATE_REGISTERED, 3598c2ecf20Sopenharmony_ci RBD_WATCH_STATE_ERROR, 3608c2ecf20Sopenharmony_ci}; 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_cienum rbd_lock_state { 3638c2ecf20Sopenharmony_ci RBD_LOCK_STATE_UNLOCKED, 3648c2ecf20Sopenharmony_ci RBD_LOCK_STATE_LOCKED, 3658c2ecf20Sopenharmony_ci RBD_LOCK_STATE_RELEASING, 3668c2ecf20Sopenharmony_ci}; 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci/* WatchNotify::ClientId */ 3698c2ecf20Sopenharmony_cistruct rbd_client_id { 3708c2ecf20Sopenharmony_ci u64 gid; 3718c2ecf20Sopenharmony_ci u64 handle; 3728c2ecf20Sopenharmony_ci}; 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_cistruct rbd_mapping { 3758c2ecf20Sopenharmony_ci u64 size; 3768c2ecf20Sopenharmony_ci}; 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci/* 3798c2ecf20Sopenharmony_ci * a single device 3808c2ecf20Sopenharmony_ci */ 3818c2ecf20Sopenharmony_cistruct rbd_device { 3828c2ecf20Sopenharmony_ci int dev_id; /* blkdev unique id */ 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci int major; /* blkdev assigned major */ 3858c2ecf20Sopenharmony_ci int minor; 3868c2ecf20Sopenharmony_ci struct gendisk *disk; /* blkdev's gendisk and rq */ 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci u32 image_format; /* Either 1 or 2 */ 3898c2ecf20Sopenharmony_ci struct rbd_client *rbd_client; 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci spinlock_t lock; /* queue, flags, open_count */ 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci struct rbd_image_header header; 3968c2ecf20Sopenharmony_ci unsigned long flags; /* possibly lock protected */ 3978c2ecf20Sopenharmony_ci struct rbd_spec *spec; 3988c2ecf20Sopenharmony_ci struct rbd_options *opts; 3998c2ecf20Sopenharmony_ci char *config_info; /* add{,_single_major} string */ 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci struct ceph_object_id header_oid; 4028c2ecf20Sopenharmony_ci struct ceph_object_locator header_oloc; 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci struct ceph_file_layout layout; /* used for all rbd requests */ 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci struct mutex watch_mutex; 4078c2ecf20Sopenharmony_ci enum rbd_watch_state watch_state; 4088c2ecf20Sopenharmony_ci struct ceph_osd_linger_request *watch_handle; 4098c2ecf20Sopenharmony_ci u64 watch_cookie; 4108c2ecf20Sopenharmony_ci struct delayed_work watch_dwork; 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci struct rw_semaphore lock_rwsem; 4138c2ecf20Sopenharmony_ci enum rbd_lock_state lock_state; 4148c2ecf20Sopenharmony_ci char lock_cookie[32]; 4158c2ecf20Sopenharmony_ci struct rbd_client_id owner_cid; 4168c2ecf20Sopenharmony_ci struct work_struct acquired_lock_work; 4178c2ecf20Sopenharmony_ci struct work_struct released_lock_work; 4188c2ecf20Sopenharmony_ci struct delayed_work lock_dwork; 4198c2ecf20Sopenharmony_ci struct work_struct unlock_work; 4208c2ecf20Sopenharmony_ci spinlock_t lock_lists_lock; 4218c2ecf20Sopenharmony_ci struct list_head acquiring_list; 4228c2ecf20Sopenharmony_ci struct list_head running_list; 4238c2ecf20Sopenharmony_ci struct completion acquire_wait; 4248c2ecf20Sopenharmony_ci int acquire_err; 4258c2ecf20Sopenharmony_ci struct completion releasing_wait; 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci spinlock_t object_map_lock; 4288c2ecf20Sopenharmony_ci u8 *object_map; 4298c2ecf20Sopenharmony_ci u64 object_map_size; /* in objects */ 4308c2ecf20Sopenharmony_ci u64 object_map_flags; 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci struct workqueue_struct *task_wq; 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci struct rbd_spec *parent_spec; 4358c2ecf20Sopenharmony_ci u64 parent_overlap; 4368c2ecf20Sopenharmony_ci atomic_t parent_ref; 4378c2ecf20Sopenharmony_ci struct rbd_device *parent; 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci /* Block layer tags. */ 4408c2ecf20Sopenharmony_ci struct blk_mq_tag_set tag_set; 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci /* protects updating the header */ 4438c2ecf20Sopenharmony_ci struct rw_semaphore header_rwsem; 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci struct rbd_mapping mapping; 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_ci struct list_head node; 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci /* sysfs related */ 4508c2ecf20Sopenharmony_ci struct device dev; 4518c2ecf20Sopenharmony_ci unsigned long open_count; /* protected by lock */ 4528c2ecf20Sopenharmony_ci}; 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci/* 4558c2ecf20Sopenharmony_ci * Flag bits for rbd_dev->flags: 4568c2ecf20Sopenharmony_ci * - REMOVING (which is coupled with rbd_dev->open_count) is protected 4578c2ecf20Sopenharmony_ci * by rbd_dev->lock 4588c2ecf20Sopenharmony_ci */ 4598c2ecf20Sopenharmony_cienum rbd_dev_flags { 4608c2ecf20Sopenharmony_ci RBD_DEV_FLAG_EXISTS, /* rbd_dev_device_setup() ran */ 4618c2ecf20Sopenharmony_ci RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ 4628c2ecf20Sopenharmony_ci RBD_DEV_FLAG_READONLY, /* -o ro or snapshot */ 4638c2ecf20Sopenharmony_ci}; 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(client_mutex); /* Serialize client creation */ 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_cistatic LIST_HEAD(rbd_dev_list); /* devices */ 4688c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(rbd_dev_list_lock); 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_cistatic LIST_HEAD(rbd_client_list); /* clients */ 4718c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(rbd_client_list_lock); 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_ci/* Slab caches for frequently-allocated structures */ 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_cistatic struct kmem_cache *rbd_img_request_cache; 4768c2ecf20Sopenharmony_cistatic struct kmem_cache *rbd_obj_request_cache; 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_cistatic int rbd_major; 4798c2ecf20Sopenharmony_cistatic DEFINE_IDA(rbd_dev_id_ida); 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_cistatic struct workqueue_struct *rbd_wq; 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_cistatic struct ceph_snap_context rbd_empty_snapc = { 4848c2ecf20Sopenharmony_ci .nref = REFCOUNT_INIT(1), 4858c2ecf20Sopenharmony_ci}; 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci/* 4888c2ecf20Sopenharmony_ci * single-major requires >= 0.75 version of userspace rbd utility. 4898c2ecf20Sopenharmony_ci */ 4908c2ecf20Sopenharmony_cistatic bool single_major = true; 4918c2ecf20Sopenharmony_cimodule_param(single_major, bool, 0444); 4928c2ecf20Sopenharmony_ciMODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)"); 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_cistatic ssize_t add_store(struct bus_type *bus, const char *buf, size_t count); 4958c2ecf20Sopenharmony_cistatic ssize_t remove_store(struct bus_type *bus, const char *buf, 4968c2ecf20Sopenharmony_ci size_t count); 4978c2ecf20Sopenharmony_cistatic ssize_t add_single_major_store(struct bus_type *bus, const char *buf, 4988c2ecf20Sopenharmony_ci size_t count); 4998c2ecf20Sopenharmony_cistatic ssize_t remove_single_major_store(struct bus_type *bus, const char *buf, 5008c2ecf20Sopenharmony_ci size_t count); 5018c2ecf20Sopenharmony_cistatic int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth); 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_cistatic int rbd_dev_id_to_minor(int dev_id) 5048c2ecf20Sopenharmony_ci{ 5058c2ecf20Sopenharmony_ci return dev_id << RBD_SINGLE_MAJOR_PART_SHIFT; 5068c2ecf20Sopenharmony_ci} 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_cistatic int minor_to_rbd_dev_id(int minor) 5098c2ecf20Sopenharmony_ci{ 5108c2ecf20Sopenharmony_ci return minor >> RBD_SINGLE_MAJOR_PART_SHIFT; 5118c2ecf20Sopenharmony_ci} 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_cistatic bool rbd_is_ro(struct rbd_device *rbd_dev) 5148c2ecf20Sopenharmony_ci{ 5158c2ecf20Sopenharmony_ci return test_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags); 5168c2ecf20Sopenharmony_ci} 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_cistatic bool rbd_is_snap(struct rbd_device *rbd_dev) 5198c2ecf20Sopenharmony_ci{ 5208c2ecf20Sopenharmony_ci return rbd_dev->spec->snap_id != CEPH_NOSNAP; 5218c2ecf20Sopenharmony_ci} 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_cistatic bool __rbd_is_lock_owner(struct rbd_device *rbd_dev) 5248c2ecf20Sopenharmony_ci{ 5258c2ecf20Sopenharmony_ci lockdep_assert_held(&rbd_dev->lock_rwsem); 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED || 5288c2ecf20Sopenharmony_ci rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING; 5298c2ecf20Sopenharmony_ci} 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_cistatic bool rbd_is_lock_owner(struct rbd_device *rbd_dev) 5328c2ecf20Sopenharmony_ci{ 5338c2ecf20Sopenharmony_ci bool is_lock_owner; 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci down_read(&rbd_dev->lock_rwsem); 5368c2ecf20Sopenharmony_ci is_lock_owner = __rbd_is_lock_owner(rbd_dev); 5378c2ecf20Sopenharmony_ci up_read(&rbd_dev->lock_rwsem); 5388c2ecf20Sopenharmony_ci return is_lock_owner; 5398c2ecf20Sopenharmony_ci} 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_cistatic ssize_t supported_features_show(struct bus_type *bus, char *buf) 5428c2ecf20Sopenharmony_ci{ 5438c2ecf20Sopenharmony_ci return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED); 5448c2ecf20Sopenharmony_ci} 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_cistatic BUS_ATTR_WO(add); 5478c2ecf20Sopenharmony_cistatic BUS_ATTR_WO(remove); 5488c2ecf20Sopenharmony_cistatic BUS_ATTR_WO(add_single_major); 5498c2ecf20Sopenharmony_cistatic BUS_ATTR_WO(remove_single_major); 5508c2ecf20Sopenharmony_cistatic BUS_ATTR_RO(supported_features); 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_cistatic struct attribute *rbd_bus_attrs[] = { 5538c2ecf20Sopenharmony_ci &bus_attr_add.attr, 5548c2ecf20Sopenharmony_ci &bus_attr_remove.attr, 5558c2ecf20Sopenharmony_ci &bus_attr_add_single_major.attr, 5568c2ecf20Sopenharmony_ci &bus_attr_remove_single_major.attr, 5578c2ecf20Sopenharmony_ci &bus_attr_supported_features.attr, 5588c2ecf20Sopenharmony_ci NULL, 5598c2ecf20Sopenharmony_ci}; 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_cistatic umode_t rbd_bus_is_visible(struct kobject *kobj, 5628c2ecf20Sopenharmony_ci struct attribute *attr, int index) 5638c2ecf20Sopenharmony_ci{ 5648c2ecf20Sopenharmony_ci if (!single_major && 5658c2ecf20Sopenharmony_ci (attr == &bus_attr_add_single_major.attr || 5668c2ecf20Sopenharmony_ci attr == &bus_attr_remove_single_major.attr)) 5678c2ecf20Sopenharmony_ci return 0; 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci return attr->mode; 5708c2ecf20Sopenharmony_ci} 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_cistatic const struct attribute_group rbd_bus_group = { 5738c2ecf20Sopenharmony_ci .attrs = rbd_bus_attrs, 5748c2ecf20Sopenharmony_ci .is_visible = rbd_bus_is_visible, 5758c2ecf20Sopenharmony_ci}; 5768c2ecf20Sopenharmony_ci__ATTRIBUTE_GROUPS(rbd_bus); 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_cistatic struct bus_type rbd_bus_type = { 5798c2ecf20Sopenharmony_ci .name = "rbd", 5808c2ecf20Sopenharmony_ci .bus_groups = rbd_bus_groups, 5818c2ecf20Sopenharmony_ci}; 5828c2ecf20Sopenharmony_ci 5838c2ecf20Sopenharmony_cistatic void rbd_root_dev_release(struct device *dev) 5848c2ecf20Sopenharmony_ci{ 5858c2ecf20Sopenharmony_ci} 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_cistatic struct device rbd_root_dev = { 5888c2ecf20Sopenharmony_ci .init_name = "rbd", 5898c2ecf20Sopenharmony_ci .release = rbd_root_dev_release, 5908c2ecf20Sopenharmony_ci}; 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_cistatic __printf(2, 3) 5938c2ecf20Sopenharmony_civoid rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) 5948c2ecf20Sopenharmony_ci{ 5958c2ecf20Sopenharmony_ci struct va_format vaf; 5968c2ecf20Sopenharmony_ci va_list args; 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci va_start(args, fmt); 5998c2ecf20Sopenharmony_ci vaf.fmt = fmt; 6008c2ecf20Sopenharmony_ci vaf.va = &args; 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci if (!rbd_dev) 6038c2ecf20Sopenharmony_ci printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf); 6048c2ecf20Sopenharmony_ci else if (rbd_dev->disk) 6058c2ecf20Sopenharmony_ci printk(KERN_WARNING "%s: %s: %pV\n", 6068c2ecf20Sopenharmony_ci RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); 6078c2ecf20Sopenharmony_ci else if (rbd_dev->spec && rbd_dev->spec->image_name) 6088c2ecf20Sopenharmony_ci printk(KERN_WARNING "%s: image %s: %pV\n", 6098c2ecf20Sopenharmony_ci RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); 6108c2ecf20Sopenharmony_ci else if (rbd_dev->spec && rbd_dev->spec->image_id) 6118c2ecf20Sopenharmony_ci printk(KERN_WARNING "%s: id %s: %pV\n", 6128c2ecf20Sopenharmony_ci RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); 6138c2ecf20Sopenharmony_ci else /* punt */ 6148c2ecf20Sopenharmony_ci printk(KERN_WARNING "%s: rbd_dev %p: %pV\n", 6158c2ecf20Sopenharmony_ci RBD_DRV_NAME, rbd_dev, &vaf); 6168c2ecf20Sopenharmony_ci va_end(args); 6178c2ecf20Sopenharmony_ci} 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci#ifdef RBD_DEBUG 6208c2ecf20Sopenharmony_ci#define rbd_assert(expr) \ 6218c2ecf20Sopenharmony_ci if (unlikely(!(expr))) { \ 6228c2ecf20Sopenharmony_ci printk(KERN_ERR "\nAssertion failure in %s() " \ 6238c2ecf20Sopenharmony_ci "at line %d:\n\n" \ 6248c2ecf20Sopenharmony_ci "\trbd_assert(%s);\n\n", \ 6258c2ecf20Sopenharmony_ci __func__, __LINE__, #expr); \ 6268c2ecf20Sopenharmony_ci BUG(); \ 6278c2ecf20Sopenharmony_ci } 6288c2ecf20Sopenharmony_ci#else /* !RBD_DEBUG */ 6298c2ecf20Sopenharmony_ci# define rbd_assert(expr) ((void) 0) 6308c2ecf20Sopenharmony_ci#endif /* !RBD_DEBUG */ 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_cistatic void rbd_dev_remove_parent(struct rbd_device *rbd_dev); 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_cistatic int rbd_dev_refresh(struct rbd_device *rbd_dev); 6358c2ecf20Sopenharmony_cistatic int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, 6368c2ecf20Sopenharmony_ci struct rbd_image_header *header); 6378c2ecf20Sopenharmony_cistatic const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, 6388c2ecf20Sopenharmony_ci u64 snap_id); 6398c2ecf20Sopenharmony_cistatic int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, 6408c2ecf20Sopenharmony_ci u8 *order, u64 *snap_size); 6418c2ecf20Sopenharmony_cistatic int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev); 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_cistatic void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result); 6448c2ecf20Sopenharmony_cistatic void rbd_img_handle_request(struct rbd_img_request *img_req, int result); 6458c2ecf20Sopenharmony_ci 6468c2ecf20Sopenharmony_ci/* 6478c2ecf20Sopenharmony_ci * Return true if nothing else is pending. 6488c2ecf20Sopenharmony_ci */ 6498c2ecf20Sopenharmony_cistatic bool pending_result_dec(struct pending_result *pending, int *result) 6508c2ecf20Sopenharmony_ci{ 6518c2ecf20Sopenharmony_ci rbd_assert(pending->num_pending > 0); 6528c2ecf20Sopenharmony_ci 6538c2ecf20Sopenharmony_ci if (*result && !pending->result) 6548c2ecf20Sopenharmony_ci pending->result = *result; 6558c2ecf20Sopenharmony_ci if (--pending->num_pending) 6568c2ecf20Sopenharmony_ci return false; 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci *result = pending->result; 6598c2ecf20Sopenharmony_ci return true; 6608c2ecf20Sopenharmony_ci} 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_cistatic int rbd_open(struct block_device *bdev, fmode_t mode) 6638c2ecf20Sopenharmony_ci{ 6648c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 6658c2ecf20Sopenharmony_ci bool removing = false; 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci spin_lock_irq(&rbd_dev->lock); 6688c2ecf20Sopenharmony_ci if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) 6698c2ecf20Sopenharmony_ci removing = true; 6708c2ecf20Sopenharmony_ci else 6718c2ecf20Sopenharmony_ci rbd_dev->open_count++; 6728c2ecf20Sopenharmony_ci spin_unlock_irq(&rbd_dev->lock); 6738c2ecf20Sopenharmony_ci if (removing) 6748c2ecf20Sopenharmony_ci return -ENOENT; 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_ci (void) get_device(&rbd_dev->dev); 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci return 0; 6798c2ecf20Sopenharmony_ci} 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_cistatic void rbd_release(struct gendisk *disk, fmode_t mode) 6828c2ecf20Sopenharmony_ci{ 6838c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = disk->private_data; 6848c2ecf20Sopenharmony_ci unsigned long open_count_before; 6858c2ecf20Sopenharmony_ci 6868c2ecf20Sopenharmony_ci spin_lock_irq(&rbd_dev->lock); 6878c2ecf20Sopenharmony_ci open_count_before = rbd_dev->open_count--; 6888c2ecf20Sopenharmony_ci spin_unlock_irq(&rbd_dev->lock); 6898c2ecf20Sopenharmony_ci rbd_assert(open_count_before > 0); 6908c2ecf20Sopenharmony_ci 6918c2ecf20Sopenharmony_ci put_device(&rbd_dev->dev); 6928c2ecf20Sopenharmony_ci} 6938c2ecf20Sopenharmony_ci 6948c2ecf20Sopenharmony_cistatic int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) 6958c2ecf20Sopenharmony_ci{ 6968c2ecf20Sopenharmony_ci int ro; 6978c2ecf20Sopenharmony_ci 6988c2ecf20Sopenharmony_ci if (get_user(ro, (int __user *)arg)) 6998c2ecf20Sopenharmony_ci return -EFAULT; 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_ci /* 7028c2ecf20Sopenharmony_ci * Both images mapped read-only and snapshots can't be marked 7038c2ecf20Sopenharmony_ci * read-write. 7048c2ecf20Sopenharmony_ci */ 7058c2ecf20Sopenharmony_ci if (!ro) { 7068c2ecf20Sopenharmony_ci if (rbd_is_ro(rbd_dev)) 7078c2ecf20Sopenharmony_ci return -EROFS; 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci rbd_assert(!rbd_is_snap(rbd_dev)); 7108c2ecf20Sopenharmony_ci } 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci /* Let blkdev_roset() handle it */ 7138c2ecf20Sopenharmony_ci return -ENOTTY; 7148c2ecf20Sopenharmony_ci} 7158c2ecf20Sopenharmony_ci 7168c2ecf20Sopenharmony_cistatic int rbd_ioctl(struct block_device *bdev, fmode_t mode, 7178c2ecf20Sopenharmony_ci unsigned int cmd, unsigned long arg) 7188c2ecf20Sopenharmony_ci{ 7198c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 7208c2ecf20Sopenharmony_ci int ret; 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_ci switch (cmd) { 7238c2ecf20Sopenharmony_ci case BLKROSET: 7248c2ecf20Sopenharmony_ci ret = rbd_ioctl_set_ro(rbd_dev, arg); 7258c2ecf20Sopenharmony_ci break; 7268c2ecf20Sopenharmony_ci default: 7278c2ecf20Sopenharmony_ci ret = -ENOTTY; 7288c2ecf20Sopenharmony_ci } 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci return ret; 7318c2ecf20Sopenharmony_ci} 7328c2ecf20Sopenharmony_ci 7338c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 7348c2ecf20Sopenharmony_cistatic int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode, 7358c2ecf20Sopenharmony_ci unsigned int cmd, unsigned long arg) 7368c2ecf20Sopenharmony_ci{ 7378c2ecf20Sopenharmony_ci return rbd_ioctl(bdev, mode, cmd, arg); 7388c2ecf20Sopenharmony_ci} 7398c2ecf20Sopenharmony_ci#endif /* CONFIG_COMPAT */ 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_cistatic const struct block_device_operations rbd_bd_ops = { 7428c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 7438c2ecf20Sopenharmony_ci .open = rbd_open, 7448c2ecf20Sopenharmony_ci .release = rbd_release, 7458c2ecf20Sopenharmony_ci .ioctl = rbd_ioctl, 7468c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 7478c2ecf20Sopenharmony_ci .compat_ioctl = rbd_compat_ioctl, 7488c2ecf20Sopenharmony_ci#endif 7498c2ecf20Sopenharmony_ci}; 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_ci/* 7528c2ecf20Sopenharmony_ci * Initialize an rbd client instance. Success or not, this function 7538c2ecf20Sopenharmony_ci * consumes ceph_opts. Caller holds client_mutex. 7548c2ecf20Sopenharmony_ci */ 7558c2ecf20Sopenharmony_cistatic struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) 7568c2ecf20Sopenharmony_ci{ 7578c2ecf20Sopenharmony_ci struct rbd_client *rbdc; 7588c2ecf20Sopenharmony_ci int ret = -ENOMEM; 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci dout("%s:\n", __func__); 7618c2ecf20Sopenharmony_ci rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); 7628c2ecf20Sopenharmony_ci if (!rbdc) 7638c2ecf20Sopenharmony_ci goto out_opt; 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci kref_init(&rbdc->kref); 7668c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rbdc->node); 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci rbdc->client = ceph_create_client(ceph_opts, rbdc); 7698c2ecf20Sopenharmony_ci if (IS_ERR(rbdc->client)) 7708c2ecf20Sopenharmony_ci goto out_rbdc; 7718c2ecf20Sopenharmony_ci ceph_opts = NULL; /* Now rbdc->client is responsible for ceph_opts */ 7728c2ecf20Sopenharmony_ci 7738c2ecf20Sopenharmony_ci ret = ceph_open_session(rbdc->client); 7748c2ecf20Sopenharmony_ci if (ret < 0) 7758c2ecf20Sopenharmony_ci goto out_client; 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci spin_lock(&rbd_client_list_lock); 7788c2ecf20Sopenharmony_ci list_add_tail(&rbdc->node, &rbd_client_list); 7798c2ecf20Sopenharmony_ci spin_unlock(&rbd_client_list_lock); 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_ci dout("%s: rbdc %p\n", __func__, rbdc); 7828c2ecf20Sopenharmony_ci 7838c2ecf20Sopenharmony_ci return rbdc; 7848c2ecf20Sopenharmony_ciout_client: 7858c2ecf20Sopenharmony_ci ceph_destroy_client(rbdc->client); 7868c2ecf20Sopenharmony_ciout_rbdc: 7878c2ecf20Sopenharmony_ci kfree(rbdc); 7888c2ecf20Sopenharmony_ciout_opt: 7898c2ecf20Sopenharmony_ci if (ceph_opts) 7908c2ecf20Sopenharmony_ci ceph_destroy_options(ceph_opts); 7918c2ecf20Sopenharmony_ci dout("%s: error %d\n", __func__, ret); 7928c2ecf20Sopenharmony_ci 7938c2ecf20Sopenharmony_ci return ERR_PTR(ret); 7948c2ecf20Sopenharmony_ci} 7958c2ecf20Sopenharmony_ci 7968c2ecf20Sopenharmony_cistatic struct rbd_client *__rbd_get_client(struct rbd_client *rbdc) 7978c2ecf20Sopenharmony_ci{ 7988c2ecf20Sopenharmony_ci kref_get(&rbdc->kref); 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_ci return rbdc; 8018c2ecf20Sopenharmony_ci} 8028c2ecf20Sopenharmony_ci 8038c2ecf20Sopenharmony_ci/* 8048c2ecf20Sopenharmony_ci * Find a ceph client with specific addr and configuration. If 8058c2ecf20Sopenharmony_ci * found, bump its reference count. 8068c2ecf20Sopenharmony_ci */ 8078c2ecf20Sopenharmony_cistatic struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts) 8088c2ecf20Sopenharmony_ci{ 8098c2ecf20Sopenharmony_ci struct rbd_client *client_node; 8108c2ecf20Sopenharmony_ci bool found = false; 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_ci if (ceph_opts->flags & CEPH_OPT_NOSHARE) 8138c2ecf20Sopenharmony_ci return NULL; 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci spin_lock(&rbd_client_list_lock); 8168c2ecf20Sopenharmony_ci list_for_each_entry(client_node, &rbd_client_list, node) { 8178c2ecf20Sopenharmony_ci if (!ceph_compare_options(ceph_opts, client_node->client)) { 8188c2ecf20Sopenharmony_ci __rbd_get_client(client_node); 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_ci found = true; 8218c2ecf20Sopenharmony_ci break; 8228c2ecf20Sopenharmony_ci } 8238c2ecf20Sopenharmony_ci } 8248c2ecf20Sopenharmony_ci spin_unlock(&rbd_client_list_lock); 8258c2ecf20Sopenharmony_ci 8268c2ecf20Sopenharmony_ci return found ? client_node : NULL; 8278c2ecf20Sopenharmony_ci} 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_ci/* 8308c2ecf20Sopenharmony_ci * (Per device) rbd map options 8318c2ecf20Sopenharmony_ci */ 8328c2ecf20Sopenharmony_cienum { 8338c2ecf20Sopenharmony_ci Opt_queue_depth, 8348c2ecf20Sopenharmony_ci Opt_alloc_size, 8358c2ecf20Sopenharmony_ci Opt_lock_timeout, 8368c2ecf20Sopenharmony_ci /* int args above */ 8378c2ecf20Sopenharmony_ci Opt_pool_ns, 8388c2ecf20Sopenharmony_ci Opt_compression_hint, 8398c2ecf20Sopenharmony_ci /* string args above */ 8408c2ecf20Sopenharmony_ci Opt_read_only, 8418c2ecf20Sopenharmony_ci Opt_read_write, 8428c2ecf20Sopenharmony_ci Opt_lock_on_read, 8438c2ecf20Sopenharmony_ci Opt_exclusive, 8448c2ecf20Sopenharmony_ci Opt_notrim, 8458c2ecf20Sopenharmony_ci}; 8468c2ecf20Sopenharmony_ci 8478c2ecf20Sopenharmony_cienum { 8488c2ecf20Sopenharmony_ci Opt_compression_hint_none, 8498c2ecf20Sopenharmony_ci Opt_compression_hint_compressible, 8508c2ecf20Sopenharmony_ci Opt_compression_hint_incompressible, 8518c2ecf20Sopenharmony_ci}; 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_cistatic const struct constant_table rbd_param_compression_hint[] = { 8548c2ecf20Sopenharmony_ci {"none", Opt_compression_hint_none}, 8558c2ecf20Sopenharmony_ci {"compressible", Opt_compression_hint_compressible}, 8568c2ecf20Sopenharmony_ci {"incompressible", Opt_compression_hint_incompressible}, 8578c2ecf20Sopenharmony_ci {} 8588c2ecf20Sopenharmony_ci}; 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_cistatic const struct fs_parameter_spec rbd_parameters[] = { 8618c2ecf20Sopenharmony_ci fsparam_u32 ("alloc_size", Opt_alloc_size), 8628c2ecf20Sopenharmony_ci fsparam_enum ("compression_hint", Opt_compression_hint, 8638c2ecf20Sopenharmony_ci rbd_param_compression_hint), 8648c2ecf20Sopenharmony_ci fsparam_flag ("exclusive", Opt_exclusive), 8658c2ecf20Sopenharmony_ci fsparam_flag ("lock_on_read", Opt_lock_on_read), 8668c2ecf20Sopenharmony_ci fsparam_u32 ("lock_timeout", Opt_lock_timeout), 8678c2ecf20Sopenharmony_ci fsparam_flag ("notrim", Opt_notrim), 8688c2ecf20Sopenharmony_ci fsparam_string ("_pool_ns", Opt_pool_ns), 8698c2ecf20Sopenharmony_ci fsparam_u32 ("queue_depth", Opt_queue_depth), 8708c2ecf20Sopenharmony_ci fsparam_flag ("read_only", Opt_read_only), 8718c2ecf20Sopenharmony_ci fsparam_flag ("read_write", Opt_read_write), 8728c2ecf20Sopenharmony_ci fsparam_flag ("ro", Opt_read_only), 8738c2ecf20Sopenharmony_ci fsparam_flag ("rw", Opt_read_write), 8748c2ecf20Sopenharmony_ci {} 8758c2ecf20Sopenharmony_ci}; 8768c2ecf20Sopenharmony_ci 8778c2ecf20Sopenharmony_cistruct rbd_options { 8788c2ecf20Sopenharmony_ci int queue_depth; 8798c2ecf20Sopenharmony_ci int alloc_size; 8808c2ecf20Sopenharmony_ci unsigned long lock_timeout; 8818c2ecf20Sopenharmony_ci bool read_only; 8828c2ecf20Sopenharmony_ci bool lock_on_read; 8838c2ecf20Sopenharmony_ci bool exclusive; 8848c2ecf20Sopenharmony_ci bool trim; 8858c2ecf20Sopenharmony_ci 8868c2ecf20Sopenharmony_ci u32 alloc_hint_flags; /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */ 8878c2ecf20Sopenharmony_ci}; 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ci#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ 8908c2ecf20Sopenharmony_ci#define RBD_ALLOC_SIZE_DEFAULT (64 * 1024) 8918c2ecf20Sopenharmony_ci#define RBD_LOCK_TIMEOUT_DEFAULT 0 /* no timeout */ 8928c2ecf20Sopenharmony_ci#define RBD_READ_ONLY_DEFAULT false 8938c2ecf20Sopenharmony_ci#define RBD_LOCK_ON_READ_DEFAULT false 8948c2ecf20Sopenharmony_ci#define RBD_EXCLUSIVE_DEFAULT false 8958c2ecf20Sopenharmony_ci#define RBD_TRIM_DEFAULT true 8968c2ecf20Sopenharmony_ci 8978c2ecf20Sopenharmony_cistruct rbd_parse_opts_ctx { 8988c2ecf20Sopenharmony_ci struct rbd_spec *spec; 8998c2ecf20Sopenharmony_ci struct ceph_options *copts; 9008c2ecf20Sopenharmony_ci struct rbd_options *opts; 9018c2ecf20Sopenharmony_ci}; 9028c2ecf20Sopenharmony_ci 9038c2ecf20Sopenharmony_cistatic char* obj_op_name(enum obj_operation_type op_type) 9048c2ecf20Sopenharmony_ci{ 9058c2ecf20Sopenharmony_ci switch (op_type) { 9068c2ecf20Sopenharmony_ci case OBJ_OP_READ: 9078c2ecf20Sopenharmony_ci return "read"; 9088c2ecf20Sopenharmony_ci case OBJ_OP_WRITE: 9098c2ecf20Sopenharmony_ci return "write"; 9108c2ecf20Sopenharmony_ci case OBJ_OP_DISCARD: 9118c2ecf20Sopenharmony_ci return "discard"; 9128c2ecf20Sopenharmony_ci case OBJ_OP_ZEROOUT: 9138c2ecf20Sopenharmony_ci return "zeroout"; 9148c2ecf20Sopenharmony_ci default: 9158c2ecf20Sopenharmony_ci return "???"; 9168c2ecf20Sopenharmony_ci } 9178c2ecf20Sopenharmony_ci} 9188c2ecf20Sopenharmony_ci 9198c2ecf20Sopenharmony_ci/* 9208c2ecf20Sopenharmony_ci * Destroy ceph client 9218c2ecf20Sopenharmony_ci * 9228c2ecf20Sopenharmony_ci * Caller must hold rbd_client_list_lock. 9238c2ecf20Sopenharmony_ci */ 9248c2ecf20Sopenharmony_cistatic void rbd_client_release(struct kref *kref) 9258c2ecf20Sopenharmony_ci{ 9268c2ecf20Sopenharmony_ci struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci dout("%s: rbdc %p\n", __func__, rbdc); 9298c2ecf20Sopenharmony_ci spin_lock(&rbd_client_list_lock); 9308c2ecf20Sopenharmony_ci list_del(&rbdc->node); 9318c2ecf20Sopenharmony_ci spin_unlock(&rbd_client_list_lock); 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_ci ceph_destroy_client(rbdc->client); 9348c2ecf20Sopenharmony_ci kfree(rbdc); 9358c2ecf20Sopenharmony_ci} 9368c2ecf20Sopenharmony_ci 9378c2ecf20Sopenharmony_ci/* 9388c2ecf20Sopenharmony_ci * Drop reference to ceph client node. If it's not referenced anymore, release 9398c2ecf20Sopenharmony_ci * it. 9408c2ecf20Sopenharmony_ci */ 9418c2ecf20Sopenharmony_cistatic void rbd_put_client(struct rbd_client *rbdc) 9428c2ecf20Sopenharmony_ci{ 9438c2ecf20Sopenharmony_ci if (rbdc) 9448c2ecf20Sopenharmony_ci kref_put(&rbdc->kref, rbd_client_release); 9458c2ecf20Sopenharmony_ci} 9468c2ecf20Sopenharmony_ci 9478c2ecf20Sopenharmony_ci/* 9488c2ecf20Sopenharmony_ci * Get a ceph client with specific addr and configuration, if one does 9498c2ecf20Sopenharmony_ci * not exist create it. Either way, ceph_opts is consumed by this 9508c2ecf20Sopenharmony_ci * function. 9518c2ecf20Sopenharmony_ci */ 9528c2ecf20Sopenharmony_cistatic struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) 9538c2ecf20Sopenharmony_ci{ 9548c2ecf20Sopenharmony_ci struct rbd_client *rbdc; 9558c2ecf20Sopenharmony_ci int ret; 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci mutex_lock(&client_mutex); 9588c2ecf20Sopenharmony_ci rbdc = rbd_client_find(ceph_opts); 9598c2ecf20Sopenharmony_ci if (rbdc) { 9608c2ecf20Sopenharmony_ci ceph_destroy_options(ceph_opts); 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci /* 9638c2ecf20Sopenharmony_ci * Using an existing client. Make sure ->pg_pools is up to 9648c2ecf20Sopenharmony_ci * date before we look up the pool id in do_rbd_add(). 9658c2ecf20Sopenharmony_ci */ 9668c2ecf20Sopenharmony_ci ret = ceph_wait_for_latest_osdmap(rbdc->client, 9678c2ecf20Sopenharmony_ci rbdc->client->options->mount_timeout); 9688c2ecf20Sopenharmony_ci if (ret) { 9698c2ecf20Sopenharmony_ci rbd_warn(NULL, "failed to get latest osdmap: %d", ret); 9708c2ecf20Sopenharmony_ci rbd_put_client(rbdc); 9718c2ecf20Sopenharmony_ci rbdc = ERR_PTR(ret); 9728c2ecf20Sopenharmony_ci } 9738c2ecf20Sopenharmony_ci } else { 9748c2ecf20Sopenharmony_ci rbdc = rbd_client_create(ceph_opts); 9758c2ecf20Sopenharmony_ci } 9768c2ecf20Sopenharmony_ci mutex_unlock(&client_mutex); 9778c2ecf20Sopenharmony_ci 9788c2ecf20Sopenharmony_ci return rbdc; 9798c2ecf20Sopenharmony_ci} 9808c2ecf20Sopenharmony_ci 9818c2ecf20Sopenharmony_cistatic bool rbd_image_format_valid(u32 image_format) 9828c2ecf20Sopenharmony_ci{ 9838c2ecf20Sopenharmony_ci return image_format == 1 || image_format == 2; 9848c2ecf20Sopenharmony_ci} 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_cistatic bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) 9878c2ecf20Sopenharmony_ci{ 9888c2ecf20Sopenharmony_ci size_t size; 9898c2ecf20Sopenharmony_ci u32 snap_count; 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci /* The header has to start with the magic rbd header text */ 9928c2ecf20Sopenharmony_ci if (memcmp(&ondisk->text, RBD_HEADER_TEXT, sizeof (RBD_HEADER_TEXT))) 9938c2ecf20Sopenharmony_ci return false; 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci /* The bio layer requires at least sector-sized I/O */ 9968c2ecf20Sopenharmony_ci 9978c2ecf20Sopenharmony_ci if (ondisk->options.order < SECTOR_SHIFT) 9988c2ecf20Sopenharmony_ci return false; 9998c2ecf20Sopenharmony_ci 10008c2ecf20Sopenharmony_ci /* If we use u64 in a few spots we may be able to loosen this */ 10018c2ecf20Sopenharmony_ci 10028c2ecf20Sopenharmony_ci if (ondisk->options.order > 8 * sizeof (int) - 1) 10038c2ecf20Sopenharmony_ci return false; 10048c2ecf20Sopenharmony_ci 10058c2ecf20Sopenharmony_ci /* 10068c2ecf20Sopenharmony_ci * The size of a snapshot header has to fit in a size_t, and 10078c2ecf20Sopenharmony_ci * that limits the number of snapshots. 10088c2ecf20Sopenharmony_ci */ 10098c2ecf20Sopenharmony_ci snap_count = le32_to_cpu(ondisk->snap_count); 10108c2ecf20Sopenharmony_ci size = SIZE_MAX - sizeof (struct ceph_snap_context); 10118c2ecf20Sopenharmony_ci if (snap_count > size / sizeof (__le64)) 10128c2ecf20Sopenharmony_ci return false; 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_ci /* 10158c2ecf20Sopenharmony_ci * Not only that, but the size of the entire the snapshot 10168c2ecf20Sopenharmony_ci * header must also be representable in a size_t. 10178c2ecf20Sopenharmony_ci */ 10188c2ecf20Sopenharmony_ci size -= snap_count * sizeof (__le64); 10198c2ecf20Sopenharmony_ci if ((u64) size < le64_to_cpu(ondisk->snap_names_len)) 10208c2ecf20Sopenharmony_ci return false; 10218c2ecf20Sopenharmony_ci 10228c2ecf20Sopenharmony_ci return true; 10238c2ecf20Sopenharmony_ci} 10248c2ecf20Sopenharmony_ci 10258c2ecf20Sopenharmony_ci/* 10268c2ecf20Sopenharmony_ci * returns the size of an object in the image 10278c2ecf20Sopenharmony_ci */ 10288c2ecf20Sopenharmony_cistatic u32 rbd_obj_bytes(struct rbd_image_header *header) 10298c2ecf20Sopenharmony_ci{ 10308c2ecf20Sopenharmony_ci return 1U << header->obj_order; 10318c2ecf20Sopenharmony_ci} 10328c2ecf20Sopenharmony_ci 10338c2ecf20Sopenharmony_cistatic void rbd_init_layout(struct rbd_device *rbd_dev) 10348c2ecf20Sopenharmony_ci{ 10358c2ecf20Sopenharmony_ci if (rbd_dev->header.stripe_unit == 0 || 10368c2ecf20Sopenharmony_ci rbd_dev->header.stripe_count == 0) { 10378c2ecf20Sopenharmony_ci rbd_dev->header.stripe_unit = rbd_obj_bytes(&rbd_dev->header); 10388c2ecf20Sopenharmony_ci rbd_dev->header.stripe_count = 1; 10398c2ecf20Sopenharmony_ci } 10408c2ecf20Sopenharmony_ci 10418c2ecf20Sopenharmony_ci rbd_dev->layout.stripe_unit = rbd_dev->header.stripe_unit; 10428c2ecf20Sopenharmony_ci rbd_dev->layout.stripe_count = rbd_dev->header.stripe_count; 10438c2ecf20Sopenharmony_ci rbd_dev->layout.object_size = rbd_obj_bytes(&rbd_dev->header); 10448c2ecf20Sopenharmony_ci rbd_dev->layout.pool_id = rbd_dev->header.data_pool_id == CEPH_NOPOOL ? 10458c2ecf20Sopenharmony_ci rbd_dev->spec->pool_id : rbd_dev->header.data_pool_id; 10468c2ecf20Sopenharmony_ci RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); 10478c2ecf20Sopenharmony_ci} 10488c2ecf20Sopenharmony_ci 10498c2ecf20Sopenharmony_cistatic void rbd_image_header_cleanup(struct rbd_image_header *header) 10508c2ecf20Sopenharmony_ci{ 10518c2ecf20Sopenharmony_ci kfree(header->object_prefix); 10528c2ecf20Sopenharmony_ci ceph_put_snap_context(header->snapc); 10538c2ecf20Sopenharmony_ci kfree(header->snap_sizes); 10548c2ecf20Sopenharmony_ci kfree(header->snap_names); 10558c2ecf20Sopenharmony_ci 10568c2ecf20Sopenharmony_ci memset(header, 0, sizeof(*header)); 10578c2ecf20Sopenharmony_ci} 10588c2ecf20Sopenharmony_ci 10598c2ecf20Sopenharmony_ci/* 10608c2ecf20Sopenharmony_ci * Fill an rbd image header with information from the given format 1 10618c2ecf20Sopenharmony_ci * on-disk header. 10628c2ecf20Sopenharmony_ci */ 10638c2ecf20Sopenharmony_cistatic int rbd_header_from_disk(struct rbd_image_header *header, 10648c2ecf20Sopenharmony_ci struct rbd_image_header_ondisk *ondisk, 10658c2ecf20Sopenharmony_ci bool first_time) 10668c2ecf20Sopenharmony_ci{ 10678c2ecf20Sopenharmony_ci struct ceph_snap_context *snapc; 10688c2ecf20Sopenharmony_ci char *object_prefix = NULL; 10698c2ecf20Sopenharmony_ci char *snap_names = NULL; 10708c2ecf20Sopenharmony_ci u64 *snap_sizes = NULL; 10718c2ecf20Sopenharmony_ci u32 snap_count; 10728c2ecf20Sopenharmony_ci int ret = -ENOMEM; 10738c2ecf20Sopenharmony_ci u32 i; 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci /* Allocate this now to avoid having to handle failure below */ 10768c2ecf20Sopenharmony_ci 10778c2ecf20Sopenharmony_ci if (first_time) { 10788c2ecf20Sopenharmony_ci object_prefix = kstrndup(ondisk->object_prefix, 10798c2ecf20Sopenharmony_ci sizeof(ondisk->object_prefix), 10808c2ecf20Sopenharmony_ci GFP_KERNEL); 10818c2ecf20Sopenharmony_ci if (!object_prefix) 10828c2ecf20Sopenharmony_ci return -ENOMEM; 10838c2ecf20Sopenharmony_ci } 10848c2ecf20Sopenharmony_ci 10858c2ecf20Sopenharmony_ci /* Allocate the snapshot context and fill it in */ 10868c2ecf20Sopenharmony_ci 10878c2ecf20Sopenharmony_ci snap_count = le32_to_cpu(ondisk->snap_count); 10888c2ecf20Sopenharmony_ci snapc = ceph_create_snap_context(snap_count, GFP_KERNEL); 10898c2ecf20Sopenharmony_ci if (!snapc) 10908c2ecf20Sopenharmony_ci goto out_err; 10918c2ecf20Sopenharmony_ci snapc->seq = le64_to_cpu(ondisk->snap_seq); 10928c2ecf20Sopenharmony_ci if (snap_count) { 10938c2ecf20Sopenharmony_ci struct rbd_image_snap_ondisk *snaps; 10948c2ecf20Sopenharmony_ci u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); 10958c2ecf20Sopenharmony_ci 10968c2ecf20Sopenharmony_ci /* We'll keep a copy of the snapshot names... */ 10978c2ecf20Sopenharmony_ci 10988c2ecf20Sopenharmony_ci if (snap_names_len > (u64)SIZE_MAX) 10998c2ecf20Sopenharmony_ci goto out_2big; 11008c2ecf20Sopenharmony_ci snap_names = kmalloc(snap_names_len, GFP_KERNEL); 11018c2ecf20Sopenharmony_ci if (!snap_names) 11028c2ecf20Sopenharmony_ci goto out_err; 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ci /* ...as well as the array of their sizes. */ 11058c2ecf20Sopenharmony_ci snap_sizes = kmalloc_array(snap_count, 11068c2ecf20Sopenharmony_ci sizeof(*header->snap_sizes), 11078c2ecf20Sopenharmony_ci GFP_KERNEL); 11088c2ecf20Sopenharmony_ci if (!snap_sizes) 11098c2ecf20Sopenharmony_ci goto out_err; 11108c2ecf20Sopenharmony_ci 11118c2ecf20Sopenharmony_ci /* 11128c2ecf20Sopenharmony_ci * Copy the names, and fill in each snapshot's id 11138c2ecf20Sopenharmony_ci * and size. 11148c2ecf20Sopenharmony_ci * 11158c2ecf20Sopenharmony_ci * Note that rbd_dev_v1_header_info() guarantees the 11168c2ecf20Sopenharmony_ci * ondisk buffer we're working with has 11178c2ecf20Sopenharmony_ci * snap_names_len bytes beyond the end of the 11188c2ecf20Sopenharmony_ci * snapshot id array, this memcpy() is safe. 11198c2ecf20Sopenharmony_ci */ 11208c2ecf20Sopenharmony_ci memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len); 11218c2ecf20Sopenharmony_ci snaps = ondisk->snaps; 11228c2ecf20Sopenharmony_ci for (i = 0; i < snap_count; i++) { 11238c2ecf20Sopenharmony_ci snapc->snaps[i] = le64_to_cpu(snaps[i].id); 11248c2ecf20Sopenharmony_ci snap_sizes[i] = le64_to_cpu(snaps[i].image_size); 11258c2ecf20Sopenharmony_ci } 11268c2ecf20Sopenharmony_ci } 11278c2ecf20Sopenharmony_ci 11288c2ecf20Sopenharmony_ci /* We won't fail any more, fill in the header */ 11298c2ecf20Sopenharmony_ci 11308c2ecf20Sopenharmony_ci if (first_time) { 11318c2ecf20Sopenharmony_ci header->object_prefix = object_prefix; 11328c2ecf20Sopenharmony_ci header->obj_order = ondisk->options.order; 11338c2ecf20Sopenharmony_ci } 11348c2ecf20Sopenharmony_ci 11358c2ecf20Sopenharmony_ci /* The remaining fields always get updated (when we refresh) */ 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_ci header->image_size = le64_to_cpu(ondisk->image_size); 11388c2ecf20Sopenharmony_ci header->snapc = snapc; 11398c2ecf20Sopenharmony_ci header->snap_names = snap_names; 11408c2ecf20Sopenharmony_ci header->snap_sizes = snap_sizes; 11418c2ecf20Sopenharmony_ci 11428c2ecf20Sopenharmony_ci return 0; 11438c2ecf20Sopenharmony_ciout_2big: 11448c2ecf20Sopenharmony_ci ret = -EIO; 11458c2ecf20Sopenharmony_ciout_err: 11468c2ecf20Sopenharmony_ci kfree(snap_sizes); 11478c2ecf20Sopenharmony_ci kfree(snap_names); 11488c2ecf20Sopenharmony_ci ceph_put_snap_context(snapc); 11498c2ecf20Sopenharmony_ci kfree(object_prefix); 11508c2ecf20Sopenharmony_ci 11518c2ecf20Sopenharmony_ci return ret; 11528c2ecf20Sopenharmony_ci} 11538c2ecf20Sopenharmony_ci 11548c2ecf20Sopenharmony_cistatic const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which) 11558c2ecf20Sopenharmony_ci{ 11568c2ecf20Sopenharmony_ci const char *snap_name; 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci rbd_assert(which < rbd_dev->header.snapc->num_snaps); 11598c2ecf20Sopenharmony_ci 11608c2ecf20Sopenharmony_ci /* Skip over names until we find the one we are looking for */ 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_ci snap_name = rbd_dev->header.snap_names; 11638c2ecf20Sopenharmony_ci while (which--) 11648c2ecf20Sopenharmony_ci snap_name += strlen(snap_name) + 1; 11658c2ecf20Sopenharmony_ci 11668c2ecf20Sopenharmony_ci return kstrdup(snap_name, GFP_KERNEL); 11678c2ecf20Sopenharmony_ci} 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_ci/* 11708c2ecf20Sopenharmony_ci * Snapshot id comparison function for use with qsort()/bsearch(). 11718c2ecf20Sopenharmony_ci * Note that result is for snapshots in *descending* order. 11728c2ecf20Sopenharmony_ci */ 11738c2ecf20Sopenharmony_cistatic int snapid_compare_reverse(const void *s1, const void *s2) 11748c2ecf20Sopenharmony_ci{ 11758c2ecf20Sopenharmony_ci u64 snap_id1 = *(u64 *)s1; 11768c2ecf20Sopenharmony_ci u64 snap_id2 = *(u64 *)s2; 11778c2ecf20Sopenharmony_ci 11788c2ecf20Sopenharmony_ci if (snap_id1 < snap_id2) 11798c2ecf20Sopenharmony_ci return 1; 11808c2ecf20Sopenharmony_ci return snap_id1 == snap_id2 ? 0 : -1; 11818c2ecf20Sopenharmony_ci} 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_ci/* 11848c2ecf20Sopenharmony_ci * Search a snapshot context to see if the given snapshot id is 11858c2ecf20Sopenharmony_ci * present. 11868c2ecf20Sopenharmony_ci * 11878c2ecf20Sopenharmony_ci * Returns the position of the snapshot id in the array if it's found, 11888c2ecf20Sopenharmony_ci * or BAD_SNAP_INDEX otherwise. 11898c2ecf20Sopenharmony_ci * 11908c2ecf20Sopenharmony_ci * Note: The snapshot array is in kept sorted (by the osd) in 11918c2ecf20Sopenharmony_ci * reverse order, highest snapshot id first. 11928c2ecf20Sopenharmony_ci */ 11938c2ecf20Sopenharmony_cistatic u32 rbd_dev_snap_index(struct rbd_device *rbd_dev, u64 snap_id) 11948c2ecf20Sopenharmony_ci{ 11958c2ecf20Sopenharmony_ci struct ceph_snap_context *snapc = rbd_dev->header.snapc; 11968c2ecf20Sopenharmony_ci u64 *found; 11978c2ecf20Sopenharmony_ci 11988c2ecf20Sopenharmony_ci found = bsearch(&snap_id, &snapc->snaps, snapc->num_snaps, 11998c2ecf20Sopenharmony_ci sizeof (snap_id), snapid_compare_reverse); 12008c2ecf20Sopenharmony_ci 12018c2ecf20Sopenharmony_ci return found ? (u32)(found - &snapc->snaps[0]) : BAD_SNAP_INDEX; 12028c2ecf20Sopenharmony_ci} 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_cistatic const char *rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, 12058c2ecf20Sopenharmony_ci u64 snap_id) 12068c2ecf20Sopenharmony_ci{ 12078c2ecf20Sopenharmony_ci u32 which; 12088c2ecf20Sopenharmony_ci const char *snap_name; 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci which = rbd_dev_snap_index(rbd_dev, snap_id); 12118c2ecf20Sopenharmony_ci if (which == BAD_SNAP_INDEX) 12128c2ecf20Sopenharmony_ci return ERR_PTR(-ENOENT); 12138c2ecf20Sopenharmony_ci 12148c2ecf20Sopenharmony_ci snap_name = _rbd_dev_v1_snap_name(rbd_dev, which); 12158c2ecf20Sopenharmony_ci return snap_name ? snap_name : ERR_PTR(-ENOMEM); 12168c2ecf20Sopenharmony_ci} 12178c2ecf20Sopenharmony_ci 12188c2ecf20Sopenharmony_cistatic const char *rbd_snap_name(struct rbd_device *rbd_dev, u64 snap_id) 12198c2ecf20Sopenharmony_ci{ 12208c2ecf20Sopenharmony_ci if (snap_id == CEPH_NOSNAP) 12218c2ecf20Sopenharmony_ci return RBD_SNAP_HEAD_NAME; 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ci rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); 12248c2ecf20Sopenharmony_ci if (rbd_dev->image_format == 1) 12258c2ecf20Sopenharmony_ci return rbd_dev_v1_snap_name(rbd_dev, snap_id); 12268c2ecf20Sopenharmony_ci 12278c2ecf20Sopenharmony_ci return rbd_dev_v2_snap_name(rbd_dev, snap_id); 12288c2ecf20Sopenharmony_ci} 12298c2ecf20Sopenharmony_ci 12308c2ecf20Sopenharmony_cistatic int rbd_snap_size(struct rbd_device *rbd_dev, u64 snap_id, 12318c2ecf20Sopenharmony_ci u64 *snap_size) 12328c2ecf20Sopenharmony_ci{ 12338c2ecf20Sopenharmony_ci rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); 12348c2ecf20Sopenharmony_ci if (snap_id == CEPH_NOSNAP) { 12358c2ecf20Sopenharmony_ci *snap_size = rbd_dev->header.image_size; 12368c2ecf20Sopenharmony_ci } else if (rbd_dev->image_format == 1) { 12378c2ecf20Sopenharmony_ci u32 which; 12388c2ecf20Sopenharmony_ci 12398c2ecf20Sopenharmony_ci which = rbd_dev_snap_index(rbd_dev, snap_id); 12408c2ecf20Sopenharmony_ci if (which == BAD_SNAP_INDEX) 12418c2ecf20Sopenharmony_ci return -ENOENT; 12428c2ecf20Sopenharmony_ci 12438c2ecf20Sopenharmony_ci *snap_size = rbd_dev->header.snap_sizes[which]; 12448c2ecf20Sopenharmony_ci } else { 12458c2ecf20Sopenharmony_ci u64 size = 0; 12468c2ecf20Sopenharmony_ci int ret; 12478c2ecf20Sopenharmony_ci 12488c2ecf20Sopenharmony_ci ret = _rbd_dev_v2_snap_size(rbd_dev, snap_id, NULL, &size); 12498c2ecf20Sopenharmony_ci if (ret) 12508c2ecf20Sopenharmony_ci return ret; 12518c2ecf20Sopenharmony_ci 12528c2ecf20Sopenharmony_ci *snap_size = size; 12538c2ecf20Sopenharmony_ci } 12548c2ecf20Sopenharmony_ci return 0; 12558c2ecf20Sopenharmony_ci} 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_cistatic int rbd_dev_mapping_set(struct rbd_device *rbd_dev) 12588c2ecf20Sopenharmony_ci{ 12598c2ecf20Sopenharmony_ci u64 snap_id = rbd_dev->spec->snap_id; 12608c2ecf20Sopenharmony_ci u64 size = 0; 12618c2ecf20Sopenharmony_ci int ret; 12628c2ecf20Sopenharmony_ci 12638c2ecf20Sopenharmony_ci ret = rbd_snap_size(rbd_dev, snap_id, &size); 12648c2ecf20Sopenharmony_ci if (ret) 12658c2ecf20Sopenharmony_ci return ret; 12668c2ecf20Sopenharmony_ci 12678c2ecf20Sopenharmony_ci rbd_dev->mapping.size = size; 12688c2ecf20Sopenharmony_ci return 0; 12698c2ecf20Sopenharmony_ci} 12708c2ecf20Sopenharmony_ci 12718c2ecf20Sopenharmony_cistatic void rbd_dev_mapping_clear(struct rbd_device *rbd_dev) 12728c2ecf20Sopenharmony_ci{ 12738c2ecf20Sopenharmony_ci rbd_dev->mapping.size = 0; 12748c2ecf20Sopenharmony_ci} 12758c2ecf20Sopenharmony_ci 12768c2ecf20Sopenharmony_cistatic void zero_bvec(struct bio_vec *bv) 12778c2ecf20Sopenharmony_ci{ 12788c2ecf20Sopenharmony_ci void *buf; 12798c2ecf20Sopenharmony_ci unsigned long flags; 12808c2ecf20Sopenharmony_ci 12818c2ecf20Sopenharmony_ci buf = bvec_kmap_irq(bv, &flags); 12828c2ecf20Sopenharmony_ci memset(buf, 0, bv->bv_len); 12838c2ecf20Sopenharmony_ci flush_dcache_page(bv->bv_page); 12848c2ecf20Sopenharmony_ci bvec_kunmap_irq(buf, &flags); 12858c2ecf20Sopenharmony_ci} 12868c2ecf20Sopenharmony_ci 12878c2ecf20Sopenharmony_cistatic void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes) 12888c2ecf20Sopenharmony_ci{ 12898c2ecf20Sopenharmony_ci struct ceph_bio_iter it = *bio_pos; 12908c2ecf20Sopenharmony_ci 12918c2ecf20Sopenharmony_ci ceph_bio_iter_advance(&it, off); 12928c2ecf20Sopenharmony_ci ceph_bio_iter_advance_step(&it, bytes, ({ 12938c2ecf20Sopenharmony_ci zero_bvec(&bv); 12948c2ecf20Sopenharmony_ci })); 12958c2ecf20Sopenharmony_ci} 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_cistatic void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes) 12988c2ecf20Sopenharmony_ci{ 12998c2ecf20Sopenharmony_ci struct ceph_bvec_iter it = *bvec_pos; 13008c2ecf20Sopenharmony_ci 13018c2ecf20Sopenharmony_ci ceph_bvec_iter_advance(&it, off); 13028c2ecf20Sopenharmony_ci ceph_bvec_iter_advance_step(&it, bytes, ({ 13038c2ecf20Sopenharmony_ci zero_bvec(&bv); 13048c2ecf20Sopenharmony_ci })); 13058c2ecf20Sopenharmony_ci} 13068c2ecf20Sopenharmony_ci 13078c2ecf20Sopenharmony_ci/* 13088c2ecf20Sopenharmony_ci * Zero a range in @obj_req data buffer defined by a bio (list) or 13098c2ecf20Sopenharmony_ci * (private) bio_vec array. 13108c2ecf20Sopenharmony_ci * 13118c2ecf20Sopenharmony_ci * @off is relative to the start of the data buffer. 13128c2ecf20Sopenharmony_ci */ 13138c2ecf20Sopenharmony_cistatic void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off, 13148c2ecf20Sopenharmony_ci u32 bytes) 13158c2ecf20Sopenharmony_ci{ 13168c2ecf20Sopenharmony_ci dout("%s %p data buf %u~%u\n", __func__, obj_req, off, bytes); 13178c2ecf20Sopenharmony_ci 13188c2ecf20Sopenharmony_ci switch (obj_req->img_request->data_type) { 13198c2ecf20Sopenharmony_ci case OBJ_REQUEST_BIO: 13208c2ecf20Sopenharmony_ci zero_bios(&obj_req->bio_pos, off, bytes); 13218c2ecf20Sopenharmony_ci break; 13228c2ecf20Sopenharmony_ci case OBJ_REQUEST_BVECS: 13238c2ecf20Sopenharmony_ci case OBJ_REQUEST_OWN_BVECS: 13248c2ecf20Sopenharmony_ci zero_bvecs(&obj_req->bvec_pos, off, bytes); 13258c2ecf20Sopenharmony_ci break; 13268c2ecf20Sopenharmony_ci default: 13278c2ecf20Sopenharmony_ci BUG(); 13288c2ecf20Sopenharmony_ci } 13298c2ecf20Sopenharmony_ci} 13308c2ecf20Sopenharmony_ci 13318c2ecf20Sopenharmony_cistatic void rbd_obj_request_destroy(struct kref *kref); 13328c2ecf20Sopenharmony_cistatic void rbd_obj_request_put(struct rbd_obj_request *obj_request) 13338c2ecf20Sopenharmony_ci{ 13348c2ecf20Sopenharmony_ci rbd_assert(obj_request != NULL); 13358c2ecf20Sopenharmony_ci dout("%s: obj %p (was %d)\n", __func__, obj_request, 13368c2ecf20Sopenharmony_ci kref_read(&obj_request->kref)); 13378c2ecf20Sopenharmony_ci kref_put(&obj_request->kref, rbd_obj_request_destroy); 13388c2ecf20Sopenharmony_ci} 13398c2ecf20Sopenharmony_ci 13408c2ecf20Sopenharmony_cistatic inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, 13418c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_request) 13428c2ecf20Sopenharmony_ci{ 13438c2ecf20Sopenharmony_ci rbd_assert(obj_request->img_request == NULL); 13448c2ecf20Sopenharmony_ci 13458c2ecf20Sopenharmony_ci /* Image request now owns object's original reference */ 13468c2ecf20Sopenharmony_ci obj_request->img_request = img_request; 13478c2ecf20Sopenharmony_ci dout("%s: img %p obj %p\n", __func__, img_request, obj_request); 13488c2ecf20Sopenharmony_ci} 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_cistatic inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, 13518c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_request) 13528c2ecf20Sopenharmony_ci{ 13538c2ecf20Sopenharmony_ci dout("%s: img %p obj %p\n", __func__, img_request, obj_request); 13548c2ecf20Sopenharmony_ci list_del(&obj_request->ex.oe_item); 13558c2ecf20Sopenharmony_ci rbd_assert(obj_request->img_request == img_request); 13568c2ecf20Sopenharmony_ci rbd_obj_request_put(obj_request); 13578c2ecf20Sopenharmony_ci} 13588c2ecf20Sopenharmony_ci 13598c2ecf20Sopenharmony_cistatic void rbd_osd_submit(struct ceph_osd_request *osd_req) 13608c2ecf20Sopenharmony_ci{ 13618c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = osd_req->r_priv; 13628c2ecf20Sopenharmony_ci 13638c2ecf20Sopenharmony_ci dout("%s osd_req %p for obj_req %p objno %llu %llu~%llu\n", 13648c2ecf20Sopenharmony_ci __func__, osd_req, obj_req, obj_req->ex.oe_objno, 13658c2ecf20Sopenharmony_ci obj_req->ex.oe_off, obj_req->ex.oe_len); 13668c2ecf20Sopenharmony_ci ceph_osdc_start_request(osd_req->r_osdc, osd_req, false); 13678c2ecf20Sopenharmony_ci} 13688c2ecf20Sopenharmony_ci 13698c2ecf20Sopenharmony_ci/* 13708c2ecf20Sopenharmony_ci * The default/initial value for all image request flags is 0. Each 13718c2ecf20Sopenharmony_ci * is conditionally set to 1 at image request initialization time 13728c2ecf20Sopenharmony_ci * and currently never change thereafter. 13738c2ecf20Sopenharmony_ci */ 13748c2ecf20Sopenharmony_cistatic void img_request_layered_set(struct rbd_img_request *img_request) 13758c2ecf20Sopenharmony_ci{ 13768c2ecf20Sopenharmony_ci set_bit(IMG_REQ_LAYERED, &img_request->flags); 13778c2ecf20Sopenharmony_ci} 13788c2ecf20Sopenharmony_ci 13798c2ecf20Sopenharmony_cistatic bool img_request_layered_test(struct rbd_img_request *img_request) 13808c2ecf20Sopenharmony_ci{ 13818c2ecf20Sopenharmony_ci return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0; 13828c2ecf20Sopenharmony_ci} 13838c2ecf20Sopenharmony_ci 13848c2ecf20Sopenharmony_cistatic bool rbd_obj_is_entire(struct rbd_obj_request *obj_req) 13858c2ecf20Sopenharmony_ci{ 13868c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 13878c2ecf20Sopenharmony_ci 13888c2ecf20Sopenharmony_ci return !obj_req->ex.oe_off && 13898c2ecf20Sopenharmony_ci obj_req->ex.oe_len == rbd_dev->layout.object_size; 13908c2ecf20Sopenharmony_ci} 13918c2ecf20Sopenharmony_ci 13928c2ecf20Sopenharmony_cistatic bool rbd_obj_is_tail(struct rbd_obj_request *obj_req) 13938c2ecf20Sopenharmony_ci{ 13948c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 13958c2ecf20Sopenharmony_ci 13968c2ecf20Sopenharmony_ci return obj_req->ex.oe_off + obj_req->ex.oe_len == 13978c2ecf20Sopenharmony_ci rbd_dev->layout.object_size; 13988c2ecf20Sopenharmony_ci} 13998c2ecf20Sopenharmony_ci 14008c2ecf20Sopenharmony_ci/* 14018c2ecf20Sopenharmony_ci * Must be called after rbd_obj_calc_img_extents(). 14028c2ecf20Sopenharmony_ci */ 14038c2ecf20Sopenharmony_cistatic void rbd_obj_set_copyup_enabled(struct rbd_obj_request *obj_req) 14048c2ecf20Sopenharmony_ci{ 14058c2ecf20Sopenharmony_ci rbd_assert(obj_req->img_request->snapc); 14068c2ecf20Sopenharmony_ci 14078c2ecf20Sopenharmony_ci if (obj_req->img_request->op_type == OBJ_OP_DISCARD) { 14088c2ecf20Sopenharmony_ci dout("%s %p objno %llu discard\n", __func__, obj_req, 14098c2ecf20Sopenharmony_ci obj_req->ex.oe_objno); 14108c2ecf20Sopenharmony_ci return; 14118c2ecf20Sopenharmony_ci } 14128c2ecf20Sopenharmony_ci 14138c2ecf20Sopenharmony_ci if (!obj_req->num_img_extents) { 14148c2ecf20Sopenharmony_ci dout("%s %p objno %llu not overlapping\n", __func__, obj_req, 14158c2ecf20Sopenharmony_ci obj_req->ex.oe_objno); 14168c2ecf20Sopenharmony_ci return; 14178c2ecf20Sopenharmony_ci } 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci if (rbd_obj_is_entire(obj_req) && 14208c2ecf20Sopenharmony_ci !obj_req->img_request->snapc->num_snaps) { 14218c2ecf20Sopenharmony_ci dout("%s %p objno %llu entire\n", __func__, obj_req, 14228c2ecf20Sopenharmony_ci obj_req->ex.oe_objno); 14238c2ecf20Sopenharmony_ci return; 14248c2ecf20Sopenharmony_ci } 14258c2ecf20Sopenharmony_ci 14268c2ecf20Sopenharmony_ci obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ENABLED; 14278c2ecf20Sopenharmony_ci} 14288c2ecf20Sopenharmony_ci 14298c2ecf20Sopenharmony_cistatic u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req) 14308c2ecf20Sopenharmony_ci{ 14318c2ecf20Sopenharmony_ci return ceph_file_extents_bytes(obj_req->img_extents, 14328c2ecf20Sopenharmony_ci obj_req->num_img_extents); 14338c2ecf20Sopenharmony_ci} 14348c2ecf20Sopenharmony_ci 14358c2ecf20Sopenharmony_cistatic bool rbd_img_is_write(struct rbd_img_request *img_req) 14368c2ecf20Sopenharmony_ci{ 14378c2ecf20Sopenharmony_ci switch (img_req->op_type) { 14388c2ecf20Sopenharmony_ci case OBJ_OP_READ: 14398c2ecf20Sopenharmony_ci return false; 14408c2ecf20Sopenharmony_ci case OBJ_OP_WRITE: 14418c2ecf20Sopenharmony_ci case OBJ_OP_DISCARD: 14428c2ecf20Sopenharmony_ci case OBJ_OP_ZEROOUT: 14438c2ecf20Sopenharmony_ci return true; 14448c2ecf20Sopenharmony_ci default: 14458c2ecf20Sopenharmony_ci BUG(); 14468c2ecf20Sopenharmony_ci } 14478c2ecf20Sopenharmony_ci} 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_cistatic void rbd_osd_req_callback(struct ceph_osd_request *osd_req) 14508c2ecf20Sopenharmony_ci{ 14518c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = osd_req->r_priv; 14528c2ecf20Sopenharmony_ci int result; 14538c2ecf20Sopenharmony_ci 14548c2ecf20Sopenharmony_ci dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req, 14558c2ecf20Sopenharmony_ci osd_req->r_result, obj_req); 14568c2ecf20Sopenharmony_ci 14578c2ecf20Sopenharmony_ci /* 14588c2ecf20Sopenharmony_ci * Writes aren't allowed to return a data payload. In some 14598c2ecf20Sopenharmony_ci * guarded write cases (e.g. stat + zero on an empty object) 14608c2ecf20Sopenharmony_ci * a stat response makes it through, but we don't care. 14618c2ecf20Sopenharmony_ci */ 14628c2ecf20Sopenharmony_ci if (osd_req->r_result > 0 && rbd_img_is_write(obj_req->img_request)) 14638c2ecf20Sopenharmony_ci result = 0; 14648c2ecf20Sopenharmony_ci else 14658c2ecf20Sopenharmony_ci result = osd_req->r_result; 14668c2ecf20Sopenharmony_ci 14678c2ecf20Sopenharmony_ci rbd_obj_handle_request(obj_req, result); 14688c2ecf20Sopenharmony_ci} 14698c2ecf20Sopenharmony_ci 14708c2ecf20Sopenharmony_cistatic void rbd_osd_format_read(struct ceph_osd_request *osd_req) 14718c2ecf20Sopenharmony_ci{ 14728c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_request = osd_req->r_priv; 14738c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev; 14748c2ecf20Sopenharmony_ci struct ceph_options *opt = rbd_dev->rbd_client->client->options; 14758c2ecf20Sopenharmony_ci 14768c2ecf20Sopenharmony_ci osd_req->r_flags = CEPH_OSD_FLAG_READ | opt->read_from_replica; 14778c2ecf20Sopenharmony_ci osd_req->r_snapid = obj_request->img_request->snap_id; 14788c2ecf20Sopenharmony_ci} 14798c2ecf20Sopenharmony_ci 14808c2ecf20Sopenharmony_cistatic void rbd_osd_format_write(struct ceph_osd_request *osd_req) 14818c2ecf20Sopenharmony_ci{ 14828c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_request = osd_req->r_priv; 14838c2ecf20Sopenharmony_ci 14848c2ecf20Sopenharmony_ci osd_req->r_flags = CEPH_OSD_FLAG_WRITE; 14858c2ecf20Sopenharmony_ci ktime_get_real_ts64(&osd_req->r_mtime); 14868c2ecf20Sopenharmony_ci osd_req->r_data_offset = obj_request->ex.oe_off; 14878c2ecf20Sopenharmony_ci} 14888c2ecf20Sopenharmony_ci 14898c2ecf20Sopenharmony_cistatic struct ceph_osd_request * 14908c2ecf20Sopenharmony_ci__rbd_obj_add_osd_request(struct rbd_obj_request *obj_req, 14918c2ecf20Sopenharmony_ci struct ceph_snap_context *snapc, int num_ops) 14928c2ecf20Sopenharmony_ci{ 14938c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 14948c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 14958c2ecf20Sopenharmony_ci struct ceph_osd_request *req; 14968c2ecf20Sopenharmony_ci const char *name_format = rbd_dev->image_format == 1 ? 14978c2ecf20Sopenharmony_ci RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT; 14988c2ecf20Sopenharmony_ci int ret; 14998c2ecf20Sopenharmony_ci 15008c2ecf20Sopenharmony_ci req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO); 15018c2ecf20Sopenharmony_ci if (!req) 15028c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 15038c2ecf20Sopenharmony_ci 15048c2ecf20Sopenharmony_ci list_add_tail(&req->r_private_item, &obj_req->osd_reqs); 15058c2ecf20Sopenharmony_ci req->r_callback = rbd_osd_req_callback; 15068c2ecf20Sopenharmony_ci req->r_priv = obj_req; 15078c2ecf20Sopenharmony_ci 15088c2ecf20Sopenharmony_ci /* 15098c2ecf20Sopenharmony_ci * Data objects may be stored in a separate pool, but always in 15108c2ecf20Sopenharmony_ci * the same namespace in that pool as the header in its pool. 15118c2ecf20Sopenharmony_ci */ 15128c2ecf20Sopenharmony_ci ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc); 15138c2ecf20Sopenharmony_ci req->r_base_oloc.pool = rbd_dev->layout.pool_id; 15148c2ecf20Sopenharmony_ci 15158c2ecf20Sopenharmony_ci ret = ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format, 15168c2ecf20Sopenharmony_ci rbd_dev->header.object_prefix, 15178c2ecf20Sopenharmony_ci obj_req->ex.oe_objno); 15188c2ecf20Sopenharmony_ci if (ret) 15198c2ecf20Sopenharmony_ci return ERR_PTR(ret); 15208c2ecf20Sopenharmony_ci 15218c2ecf20Sopenharmony_ci return req; 15228c2ecf20Sopenharmony_ci} 15238c2ecf20Sopenharmony_ci 15248c2ecf20Sopenharmony_cistatic struct ceph_osd_request * 15258c2ecf20Sopenharmony_cirbd_obj_add_osd_request(struct rbd_obj_request *obj_req, int num_ops) 15268c2ecf20Sopenharmony_ci{ 15278c2ecf20Sopenharmony_ci rbd_assert(obj_req->img_request->snapc); 15288c2ecf20Sopenharmony_ci return __rbd_obj_add_osd_request(obj_req, obj_req->img_request->snapc, 15298c2ecf20Sopenharmony_ci num_ops); 15308c2ecf20Sopenharmony_ci} 15318c2ecf20Sopenharmony_ci 15328c2ecf20Sopenharmony_cistatic struct rbd_obj_request *rbd_obj_request_create(void) 15338c2ecf20Sopenharmony_ci{ 15348c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_request; 15358c2ecf20Sopenharmony_ci 15368c2ecf20Sopenharmony_ci obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO); 15378c2ecf20Sopenharmony_ci if (!obj_request) 15388c2ecf20Sopenharmony_ci return NULL; 15398c2ecf20Sopenharmony_ci 15408c2ecf20Sopenharmony_ci ceph_object_extent_init(&obj_request->ex); 15418c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&obj_request->osd_reqs); 15428c2ecf20Sopenharmony_ci mutex_init(&obj_request->state_mutex); 15438c2ecf20Sopenharmony_ci kref_init(&obj_request->kref); 15448c2ecf20Sopenharmony_ci 15458c2ecf20Sopenharmony_ci dout("%s %p\n", __func__, obj_request); 15468c2ecf20Sopenharmony_ci return obj_request; 15478c2ecf20Sopenharmony_ci} 15488c2ecf20Sopenharmony_ci 15498c2ecf20Sopenharmony_cistatic void rbd_obj_request_destroy(struct kref *kref) 15508c2ecf20Sopenharmony_ci{ 15518c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_request; 15528c2ecf20Sopenharmony_ci struct ceph_osd_request *osd_req; 15538c2ecf20Sopenharmony_ci u32 i; 15548c2ecf20Sopenharmony_ci 15558c2ecf20Sopenharmony_ci obj_request = container_of(kref, struct rbd_obj_request, kref); 15568c2ecf20Sopenharmony_ci 15578c2ecf20Sopenharmony_ci dout("%s: obj %p\n", __func__, obj_request); 15588c2ecf20Sopenharmony_ci 15598c2ecf20Sopenharmony_ci while (!list_empty(&obj_request->osd_reqs)) { 15608c2ecf20Sopenharmony_ci osd_req = list_first_entry(&obj_request->osd_reqs, 15618c2ecf20Sopenharmony_ci struct ceph_osd_request, r_private_item); 15628c2ecf20Sopenharmony_ci list_del_init(&osd_req->r_private_item); 15638c2ecf20Sopenharmony_ci ceph_osdc_put_request(osd_req); 15648c2ecf20Sopenharmony_ci } 15658c2ecf20Sopenharmony_ci 15668c2ecf20Sopenharmony_ci switch (obj_request->img_request->data_type) { 15678c2ecf20Sopenharmony_ci case OBJ_REQUEST_NODATA: 15688c2ecf20Sopenharmony_ci case OBJ_REQUEST_BIO: 15698c2ecf20Sopenharmony_ci case OBJ_REQUEST_BVECS: 15708c2ecf20Sopenharmony_ci break; /* Nothing to do */ 15718c2ecf20Sopenharmony_ci case OBJ_REQUEST_OWN_BVECS: 15728c2ecf20Sopenharmony_ci kfree(obj_request->bvec_pos.bvecs); 15738c2ecf20Sopenharmony_ci break; 15748c2ecf20Sopenharmony_ci default: 15758c2ecf20Sopenharmony_ci BUG(); 15768c2ecf20Sopenharmony_ci } 15778c2ecf20Sopenharmony_ci 15788c2ecf20Sopenharmony_ci kfree(obj_request->img_extents); 15798c2ecf20Sopenharmony_ci if (obj_request->copyup_bvecs) { 15808c2ecf20Sopenharmony_ci for (i = 0; i < obj_request->copyup_bvec_count; i++) { 15818c2ecf20Sopenharmony_ci if (obj_request->copyup_bvecs[i].bv_page) 15828c2ecf20Sopenharmony_ci __free_page(obj_request->copyup_bvecs[i].bv_page); 15838c2ecf20Sopenharmony_ci } 15848c2ecf20Sopenharmony_ci kfree(obj_request->copyup_bvecs); 15858c2ecf20Sopenharmony_ci } 15868c2ecf20Sopenharmony_ci 15878c2ecf20Sopenharmony_ci kmem_cache_free(rbd_obj_request_cache, obj_request); 15888c2ecf20Sopenharmony_ci} 15898c2ecf20Sopenharmony_ci 15908c2ecf20Sopenharmony_ci/* It's OK to call this for a device with no parent */ 15918c2ecf20Sopenharmony_ci 15928c2ecf20Sopenharmony_cistatic void rbd_spec_put(struct rbd_spec *spec); 15938c2ecf20Sopenharmony_cistatic void rbd_dev_unparent(struct rbd_device *rbd_dev) 15948c2ecf20Sopenharmony_ci{ 15958c2ecf20Sopenharmony_ci rbd_dev_remove_parent(rbd_dev); 15968c2ecf20Sopenharmony_ci rbd_spec_put(rbd_dev->parent_spec); 15978c2ecf20Sopenharmony_ci rbd_dev->parent_spec = NULL; 15988c2ecf20Sopenharmony_ci rbd_dev->parent_overlap = 0; 15998c2ecf20Sopenharmony_ci} 16008c2ecf20Sopenharmony_ci 16018c2ecf20Sopenharmony_ci/* 16028c2ecf20Sopenharmony_ci * Parent image reference counting is used to determine when an 16038c2ecf20Sopenharmony_ci * image's parent fields can be safely torn down--after there are no 16048c2ecf20Sopenharmony_ci * more in-flight requests to the parent image. When the last 16058c2ecf20Sopenharmony_ci * reference is dropped, cleaning them up is safe. 16068c2ecf20Sopenharmony_ci */ 16078c2ecf20Sopenharmony_cistatic void rbd_dev_parent_put(struct rbd_device *rbd_dev) 16088c2ecf20Sopenharmony_ci{ 16098c2ecf20Sopenharmony_ci int counter; 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ci if (!rbd_dev->parent_spec) 16128c2ecf20Sopenharmony_ci return; 16138c2ecf20Sopenharmony_ci 16148c2ecf20Sopenharmony_ci counter = atomic_dec_return_safe(&rbd_dev->parent_ref); 16158c2ecf20Sopenharmony_ci if (counter > 0) 16168c2ecf20Sopenharmony_ci return; 16178c2ecf20Sopenharmony_ci 16188c2ecf20Sopenharmony_ci /* Last reference; clean up parent data structures */ 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci if (!counter) 16218c2ecf20Sopenharmony_ci rbd_dev_unparent(rbd_dev); 16228c2ecf20Sopenharmony_ci else 16238c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "parent reference underflow"); 16248c2ecf20Sopenharmony_ci} 16258c2ecf20Sopenharmony_ci 16268c2ecf20Sopenharmony_ci/* 16278c2ecf20Sopenharmony_ci * If an image has a non-zero parent overlap, get a reference to its 16288c2ecf20Sopenharmony_ci * parent. 16298c2ecf20Sopenharmony_ci * 16308c2ecf20Sopenharmony_ci * Returns true if the rbd device has a parent with a non-zero 16318c2ecf20Sopenharmony_ci * overlap and a reference for it was successfully taken, or 16328c2ecf20Sopenharmony_ci * false otherwise. 16338c2ecf20Sopenharmony_ci */ 16348c2ecf20Sopenharmony_cistatic bool rbd_dev_parent_get(struct rbd_device *rbd_dev) 16358c2ecf20Sopenharmony_ci{ 16368c2ecf20Sopenharmony_ci int counter = 0; 16378c2ecf20Sopenharmony_ci 16388c2ecf20Sopenharmony_ci if (!rbd_dev->parent_spec) 16398c2ecf20Sopenharmony_ci return false; 16408c2ecf20Sopenharmony_ci 16418c2ecf20Sopenharmony_ci if (rbd_dev->parent_overlap) 16428c2ecf20Sopenharmony_ci counter = atomic_inc_return_safe(&rbd_dev->parent_ref); 16438c2ecf20Sopenharmony_ci 16448c2ecf20Sopenharmony_ci if (counter < 0) 16458c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "parent reference overflow"); 16468c2ecf20Sopenharmony_ci 16478c2ecf20Sopenharmony_ci return counter > 0; 16488c2ecf20Sopenharmony_ci} 16498c2ecf20Sopenharmony_ci 16508c2ecf20Sopenharmony_cistatic void rbd_img_request_init(struct rbd_img_request *img_request, 16518c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev, 16528c2ecf20Sopenharmony_ci enum obj_operation_type op_type) 16538c2ecf20Sopenharmony_ci{ 16548c2ecf20Sopenharmony_ci memset(img_request, 0, sizeof(*img_request)); 16558c2ecf20Sopenharmony_ci 16568c2ecf20Sopenharmony_ci img_request->rbd_dev = rbd_dev; 16578c2ecf20Sopenharmony_ci img_request->op_type = op_type; 16588c2ecf20Sopenharmony_ci 16598c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&img_request->lock_item); 16608c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&img_request->object_extents); 16618c2ecf20Sopenharmony_ci mutex_init(&img_request->state_mutex); 16628c2ecf20Sopenharmony_ci} 16638c2ecf20Sopenharmony_ci 16648c2ecf20Sopenharmony_ci/* 16658c2ecf20Sopenharmony_ci * Only snap_id is captured here, for reads. For writes, snapshot 16668c2ecf20Sopenharmony_ci * context is captured in rbd_img_object_requests() after exclusive 16678c2ecf20Sopenharmony_ci * lock is ensured to be held. 16688c2ecf20Sopenharmony_ci */ 16698c2ecf20Sopenharmony_cistatic void rbd_img_capture_header(struct rbd_img_request *img_req) 16708c2ecf20Sopenharmony_ci{ 16718c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = img_req->rbd_dev; 16728c2ecf20Sopenharmony_ci 16738c2ecf20Sopenharmony_ci lockdep_assert_held(&rbd_dev->header_rwsem); 16748c2ecf20Sopenharmony_ci 16758c2ecf20Sopenharmony_ci if (!rbd_img_is_write(img_req)) 16768c2ecf20Sopenharmony_ci img_req->snap_id = rbd_dev->spec->snap_id; 16778c2ecf20Sopenharmony_ci 16788c2ecf20Sopenharmony_ci if (rbd_dev_parent_get(rbd_dev)) 16798c2ecf20Sopenharmony_ci img_request_layered_set(img_req); 16808c2ecf20Sopenharmony_ci} 16818c2ecf20Sopenharmony_ci 16828c2ecf20Sopenharmony_cistatic void rbd_img_request_destroy(struct rbd_img_request *img_request) 16838c2ecf20Sopenharmony_ci{ 16848c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_request; 16858c2ecf20Sopenharmony_ci struct rbd_obj_request *next_obj_request; 16868c2ecf20Sopenharmony_ci 16878c2ecf20Sopenharmony_ci dout("%s: img %p\n", __func__, img_request); 16888c2ecf20Sopenharmony_ci 16898c2ecf20Sopenharmony_ci WARN_ON(!list_empty(&img_request->lock_item)); 16908c2ecf20Sopenharmony_ci for_each_obj_request_safe(img_request, obj_request, next_obj_request) 16918c2ecf20Sopenharmony_ci rbd_img_obj_request_del(img_request, obj_request); 16928c2ecf20Sopenharmony_ci 16938c2ecf20Sopenharmony_ci if (img_request_layered_test(img_request)) 16948c2ecf20Sopenharmony_ci rbd_dev_parent_put(img_request->rbd_dev); 16958c2ecf20Sopenharmony_ci 16968c2ecf20Sopenharmony_ci if (rbd_img_is_write(img_request)) 16978c2ecf20Sopenharmony_ci ceph_put_snap_context(img_request->snapc); 16988c2ecf20Sopenharmony_ci 16998c2ecf20Sopenharmony_ci if (test_bit(IMG_REQ_CHILD, &img_request->flags)) 17008c2ecf20Sopenharmony_ci kmem_cache_free(rbd_img_request_cache, img_request); 17018c2ecf20Sopenharmony_ci} 17028c2ecf20Sopenharmony_ci 17038c2ecf20Sopenharmony_ci#define BITS_PER_OBJ 2 17048c2ecf20Sopenharmony_ci#define OBJS_PER_BYTE (BITS_PER_BYTE / BITS_PER_OBJ) 17058c2ecf20Sopenharmony_ci#define OBJ_MASK ((1 << BITS_PER_OBJ) - 1) 17068c2ecf20Sopenharmony_ci 17078c2ecf20Sopenharmony_cistatic void __rbd_object_map_index(struct rbd_device *rbd_dev, u64 objno, 17088c2ecf20Sopenharmony_ci u64 *index, u8 *shift) 17098c2ecf20Sopenharmony_ci{ 17108c2ecf20Sopenharmony_ci u32 off; 17118c2ecf20Sopenharmony_ci 17128c2ecf20Sopenharmony_ci rbd_assert(objno < rbd_dev->object_map_size); 17138c2ecf20Sopenharmony_ci *index = div_u64_rem(objno, OBJS_PER_BYTE, &off); 17148c2ecf20Sopenharmony_ci *shift = (OBJS_PER_BYTE - off - 1) * BITS_PER_OBJ; 17158c2ecf20Sopenharmony_ci} 17168c2ecf20Sopenharmony_ci 17178c2ecf20Sopenharmony_cistatic u8 __rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno) 17188c2ecf20Sopenharmony_ci{ 17198c2ecf20Sopenharmony_ci u64 index; 17208c2ecf20Sopenharmony_ci u8 shift; 17218c2ecf20Sopenharmony_ci 17228c2ecf20Sopenharmony_ci lockdep_assert_held(&rbd_dev->object_map_lock); 17238c2ecf20Sopenharmony_ci __rbd_object_map_index(rbd_dev, objno, &index, &shift); 17248c2ecf20Sopenharmony_ci return (rbd_dev->object_map[index] >> shift) & OBJ_MASK; 17258c2ecf20Sopenharmony_ci} 17268c2ecf20Sopenharmony_ci 17278c2ecf20Sopenharmony_cistatic void __rbd_object_map_set(struct rbd_device *rbd_dev, u64 objno, u8 val) 17288c2ecf20Sopenharmony_ci{ 17298c2ecf20Sopenharmony_ci u64 index; 17308c2ecf20Sopenharmony_ci u8 shift; 17318c2ecf20Sopenharmony_ci u8 *p; 17328c2ecf20Sopenharmony_ci 17338c2ecf20Sopenharmony_ci lockdep_assert_held(&rbd_dev->object_map_lock); 17348c2ecf20Sopenharmony_ci rbd_assert(!(val & ~OBJ_MASK)); 17358c2ecf20Sopenharmony_ci 17368c2ecf20Sopenharmony_ci __rbd_object_map_index(rbd_dev, objno, &index, &shift); 17378c2ecf20Sopenharmony_ci p = &rbd_dev->object_map[index]; 17388c2ecf20Sopenharmony_ci *p = (*p & ~(OBJ_MASK << shift)) | (val << shift); 17398c2ecf20Sopenharmony_ci} 17408c2ecf20Sopenharmony_ci 17418c2ecf20Sopenharmony_cistatic u8 rbd_object_map_get(struct rbd_device *rbd_dev, u64 objno) 17428c2ecf20Sopenharmony_ci{ 17438c2ecf20Sopenharmony_ci u8 state; 17448c2ecf20Sopenharmony_ci 17458c2ecf20Sopenharmony_ci spin_lock(&rbd_dev->object_map_lock); 17468c2ecf20Sopenharmony_ci state = __rbd_object_map_get(rbd_dev, objno); 17478c2ecf20Sopenharmony_ci spin_unlock(&rbd_dev->object_map_lock); 17488c2ecf20Sopenharmony_ci return state; 17498c2ecf20Sopenharmony_ci} 17508c2ecf20Sopenharmony_ci 17518c2ecf20Sopenharmony_cistatic bool use_object_map(struct rbd_device *rbd_dev) 17528c2ecf20Sopenharmony_ci{ 17538c2ecf20Sopenharmony_ci /* 17548c2ecf20Sopenharmony_ci * An image mapped read-only can't use the object map -- it isn't 17558c2ecf20Sopenharmony_ci * loaded because the header lock isn't acquired. Someone else can 17568c2ecf20Sopenharmony_ci * write to the image and update the object map behind our back. 17578c2ecf20Sopenharmony_ci * 17588c2ecf20Sopenharmony_ci * A snapshot can't be written to, so using the object map is always 17598c2ecf20Sopenharmony_ci * safe. 17608c2ecf20Sopenharmony_ci */ 17618c2ecf20Sopenharmony_ci if (!rbd_is_snap(rbd_dev) && rbd_is_ro(rbd_dev)) 17628c2ecf20Sopenharmony_ci return false; 17638c2ecf20Sopenharmony_ci 17648c2ecf20Sopenharmony_ci return ((rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) && 17658c2ecf20Sopenharmony_ci !(rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID)); 17668c2ecf20Sopenharmony_ci} 17678c2ecf20Sopenharmony_ci 17688c2ecf20Sopenharmony_cistatic bool rbd_object_map_may_exist(struct rbd_device *rbd_dev, u64 objno) 17698c2ecf20Sopenharmony_ci{ 17708c2ecf20Sopenharmony_ci u8 state; 17718c2ecf20Sopenharmony_ci 17728c2ecf20Sopenharmony_ci /* fall back to default logic if object map is disabled or invalid */ 17738c2ecf20Sopenharmony_ci if (!use_object_map(rbd_dev)) 17748c2ecf20Sopenharmony_ci return true; 17758c2ecf20Sopenharmony_ci 17768c2ecf20Sopenharmony_ci state = rbd_object_map_get(rbd_dev, objno); 17778c2ecf20Sopenharmony_ci return state != OBJECT_NONEXISTENT; 17788c2ecf20Sopenharmony_ci} 17798c2ecf20Sopenharmony_ci 17808c2ecf20Sopenharmony_cistatic void rbd_object_map_name(struct rbd_device *rbd_dev, u64 snap_id, 17818c2ecf20Sopenharmony_ci struct ceph_object_id *oid) 17828c2ecf20Sopenharmony_ci{ 17838c2ecf20Sopenharmony_ci if (snap_id == CEPH_NOSNAP) 17848c2ecf20Sopenharmony_ci ceph_oid_printf(oid, "%s%s", RBD_OBJECT_MAP_PREFIX, 17858c2ecf20Sopenharmony_ci rbd_dev->spec->image_id); 17868c2ecf20Sopenharmony_ci else 17878c2ecf20Sopenharmony_ci ceph_oid_printf(oid, "%s%s.%016llx", RBD_OBJECT_MAP_PREFIX, 17888c2ecf20Sopenharmony_ci rbd_dev->spec->image_id, snap_id); 17898c2ecf20Sopenharmony_ci} 17908c2ecf20Sopenharmony_ci 17918c2ecf20Sopenharmony_cistatic int rbd_object_map_lock(struct rbd_device *rbd_dev) 17928c2ecf20Sopenharmony_ci{ 17938c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 17948c2ecf20Sopenharmony_ci CEPH_DEFINE_OID_ONSTACK(oid); 17958c2ecf20Sopenharmony_ci u8 lock_type; 17968c2ecf20Sopenharmony_ci char *lock_tag; 17978c2ecf20Sopenharmony_ci struct ceph_locker *lockers; 17988c2ecf20Sopenharmony_ci u32 num_lockers; 17998c2ecf20Sopenharmony_ci bool broke_lock = false; 18008c2ecf20Sopenharmony_ci int ret; 18018c2ecf20Sopenharmony_ci 18028c2ecf20Sopenharmony_ci rbd_object_map_name(rbd_dev, CEPH_NOSNAP, &oid); 18038c2ecf20Sopenharmony_ci 18048c2ecf20Sopenharmony_ciagain: 18058c2ecf20Sopenharmony_ci ret = ceph_cls_lock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, 18068c2ecf20Sopenharmony_ci CEPH_CLS_LOCK_EXCLUSIVE, "", "", "", 0); 18078c2ecf20Sopenharmony_ci if (ret != -EBUSY || broke_lock) { 18088c2ecf20Sopenharmony_ci if (ret == -EEXIST) 18098c2ecf20Sopenharmony_ci ret = 0; /* already locked by myself */ 18108c2ecf20Sopenharmony_ci if (ret) 18118c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to lock object map: %d", ret); 18128c2ecf20Sopenharmony_ci return ret; 18138c2ecf20Sopenharmony_ci } 18148c2ecf20Sopenharmony_ci 18158c2ecf20Sopenharmony_ci ret = ceph_cls_lock_info(osdc, &oid, &rbd_dev->header_oloc, 18168c2ecf20Sopenharmony_ci RBD_LOCK_NAME, &lock_type, &lock_tag, 18178c2ecf20Sopenharmony_ci &lockers, &num_lockers); 18188c2ecf20Sopenharmony_ci if (ret) { 18198c2ecf20Sopenharmony_ci if (ret == -ENOENT) 18208c2ecf20Sopenharmony_ci goto again; 18218c2ecf20Sopenharmony_ci 18228c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to get object map lockers: %d", ret); 18238c2ecf20Sopenharmony_ci return ret; 18248c2ecf20Sopenharmony_ci } 18258c2ecf20Sopenharmony_ci 18268c2ecf20Sopenharmony_ci kfree(lock_tag); 18278c2ecf20Sopenharmony_ci if (num_lockers == 0) 18288c2ecf20Sopenharmony_ci goto again; 18298c2ecf20Sopenharmony_ci 18308c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "breaking object map lock owned by %s%llu", 18318c2ecf20Sopenharmony_ci ENTITY_NAME(lockers[0].id.name)); 18328c2ecf20Sopenharmony_ci 18338c2ecf20Sopenharmony_ci ret = ceph_cls_break_lock(osdc, &oid, &rbd_dev->header_oloc, 18348c2ecf20Sopenharmony_ci RBD_LOCK_NAME, lockers[0].id.cookie, 18358c2ecf20Sopenharmony_ci &lockers[0].id.name); 18368c2ecf20Sopenharmony_ci ceph_free_lockers(lockers, num_lockers); 18378c2ecf20Sopenharmony_ci if (ret) { 18388c2ecf20Sopenharmony_ci if (ret == -ENOENT) 18398c2ecf20Sopenharmony_ci goto again; 18408c2ecf20Sopenharmony_ci 18418c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to break object map lock: %d", ret); 18428c2ecf20Sopenharmony_ci return ret; 18438c2ecf20Sopenharmony_ci } 18448c2ecf20Sopenharmony_ci 18458c2ecf20Sopenharmony_ci broke_lock = true; 18468c2ecf20Sopenharmony_ci goto again; 18478c2ecf20Sopenharmony_ci} 18488c2ecf20Sopenharmony_ci 18498c2ecf20Sopenharmony_cistatic void rbd_object_map_unlock(struct rbd_device *rbd_dev) 18508c2ecf20Sopenharmony_ci{ 18518c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 18528c2ecf20Sopenharmony_ci CEPH_DEFINE_OID_ONSTACK(oid); 18538c2ecf20Sopenharmony_ci int ret; 18548c2ecf20Sopenharmony_ci 18558c2ecf20Sopenharmony_ci rbd_object_map_name(rbd_dev, CEPH_NOSNAP, &oid); 18568c2ecf20Sopenharmony_ci 18578c2ecf20Sopenharmony_ci ret = ceph_cls_unlock(osdc, &oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, 18588c2ecf20Sopenharmony_ci ""); 18598c2ecf20Sopenharmony_ci if (ret && ret != -ENOENT) 18608c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to unlock object map: %d", ret); 18618c2ecf20Sopenharmony_ci} 18628c2ecf20Sopenharmony_ci 18638c2ecf20Sopenharmony_cistatic int decode_object_map_header(void **p, void *end, u64 *object_map_size) 18648c2ecf20Sopenharmony_ci{ 18658c2ecf20Sopenharmony_ci u8 struct_v; 18668c2ecf20Sopenharmony_ci u32 struct_len; 18678c2ecf20Sopenharmony_ci u32 header_len; 18688c2ecf20Sopenharmony_ci void *header_end; 18698c2ecf20Sopenharmony_ci int ret; 18708c2ecf20Sopenharmony_ci 18718c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, header_len, e_inval); 18728c2ecf20Sopenharmony_ci header_end = *p + header_len; 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_ci ret = ceph_start_decoding(p, end, 1, "BitVector header", &struct_v, 18758c2ecf20Sopenharmony_ci &struct_len); 18768c2ecf20Sopenharmony_ci if (ret) 18778c2ecf20Sopenharmony_ci return ret; 18788c2ecf20Sopenharmony_ci 18798c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, *object_map_size, e_inval); 18808c2ecf20Sopenharmony_ci 18818c2ecf20Sopenharmony_ci *p = header_end; 18828c2ecf20Sopenharmony_ci return 0; 18838c2ecf20Sopenharmony_ci 18848c2ecf20Sopenharmony_cie_inval: 18858c2ecf20Sopenharmony_ci return -EINVAL; 18868c2ecf20Sopenharmony_ci} 18878c2ecf20Sopenharmony_ci 18888c2ecf20Sopenharmony_cistatic int __rbd_object_map_load(struct rbd_device *rbd_dev) 18898c2ecf20Sopenharmony_ci{ 18908c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 18918c2ecf20Sopenharmony_ci CEPH_DEFINE_OID_ONSTACK(oid); 18928c2ecf20Sopenharmony_ci struct page **pages; 18938c2ecf20Sopenharmony_ci void *p, *end; 18948c2ecf20Sopenharmony_ci size_t reply_len; 18958c2ecf20Sopenharmony_ci u64 num_objects; 18968c2ecf20Sopenharmony_ci u64 object_map_bytes; 18978c2ecf20Sopenharmony_ci u64 object_map_size; 18988c2ecf20Sopenharmony_ci int num_pages; 18998c2ecf20Sopenharmony_ci int ret; 19008c2ecf20Sopenharmony_ci 19018c2ecf20Sopenharmony_ci rbd_assert(!rbd_dev->object_map && !rbd_dev->object_map_size); 19028c2ecf20Sopenharmony_ci 19038c2ecf20Sopenharmony_ci num_objects = ceph_get_num_objects(&rbd_dev->layout, 19048c2ecf20Sopenharmony_ci rbd_dev->mapping.size); 19058c2ecf20Sopenharmony_ci object_map_bytes = DIV_ROUND_UP_ULL(num_objects * BITS_PER_OBJ, 19068c2ecf20Sopenharmony_ci BITS_PER_BYTE); 19078c2ecf20Sopenharmony_ci num_pages = calc_pages_for(0, object_map_bytes) + 1; 19088c2ecf20Sopenharmony_ci pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 19098c2ecf20Sopenharmony_ci if (IS_ERR(pages)) 19108c2ecf20Sopenharmony_ci return PTR_ERR(pages); 19118c2ecf20Sopenharmony_ci 19128c2ecf20Sopenharmony_ci reply_len = num_pages * PAGE_SIZE; 19138c2ecf20Sopenharmony_ci rbd_object_map_name(rbd_dev, rbd_dev->spec->snap_id, &oid); 19148c2ecf20Sopenharmony_ci ret = ceph_osdc_call(osdc, &oid, &rbd_dev->header_oloc, 19158c2ecf20Sopenharmony_ci "rbd", "object_map_load", CEPH_OSD_FLAG_READ, 19168c2ecf20Sopenharmony_ci NULL, 0, pages, &reply_len); 19178c2ecf20Sopenharmony_ci if (ret) 19188c2ecf20Sopenharmony_ci goto out; 19198c2ecf20Sopenharmony_ci 19208c2ecf20Sopenharmony_ci p = page_address(pages[0]); 19218c2ecf20Sopenharmony_ci end = p + min(reply_len, (size_t)PAGE_SIZE); 19228c2ecf20Sopenharmony_ci ret = decode_object_map_header(&p, end, &object_map_size); 19238c2ecf20Sopenharmony_ci if (ret) 19248c2ecf20Sopenharmony_ci goto out; 19258c2ecf20Sopenharmony_ci 19268c2ecf20Sopenharmony_ci if (object_map_size != num_objects) { 19278c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "object map size mismatch: %llu vs %llu", 19288c2ecf20Sopenharmony_ci object_map_size, num_objects); 19298c2ecf20Sopenharmony_ci ret = -EINVAL; 19308c2ecf20Sopenharmony_ci goto out; 19318c2ecf20Sopenharmony_ci } 19328c2ecf20Sopenharmony_ci 19338c2ecf20Sopenharmony_ci if (offset_in_page(p) + object_map_bytes > reply_len) { 19348c2ecf20Sopenharmony_ci ret = -EINVAL; 19358c2ecf20Sopenharmony_ci goto out; 19368c2ecf20Sopenharmony_ci } 19378c2ecf20Sopenharmony_ci 19388c2ecf20Sopenharmony_ci rbd_dev->object_map = kvmalloc(object_map_bytes, GFP_KERNEL); 19398c2ecf20Sopenharmony_ci if (!rbd_dev->object_map) { 19408c2ecf20Sopenharmony_ci ret = -ENOMEM; 19418c2ecf20Sopenharmony_ci goto out; 19428c2ecf20Sopenharmony_ci } 19438c2ecf20Sopenharmony_ci 19448c2ecf20Sopenharmony_ci rbd_dev->object_map_size = object_map_size; 19458c2ecf20Sopenharmony_ci ceph_copy_from_page_vector(pages, rbd_dev->object_map, 19468c2ecf20Sopenharmony_ci offset_in_page(p), object_map_bytes); 19478c2ecf20Sopenharmony_ci 19488c2ecf20Sopenharmony_ciout: 19498c2ecf20Sopenharmony_ci ceph_release_page_vector(pages, num_pages); 19508c2ecf20Sopenharmony_ci return ret; 19518c2ecf20Sopenharmony_ci} 19528c2ecf20Sopenharmony_ci 19538c2ecf20Sopenharmony_cistatic void rbd_object_map_free(struct rbd_device *rbd_dev) 19548c2ecf20Sopenharmony_ci{ 19558c2ecf20Sopenharmony_ci kvfree(rbd_dev->object_map); 19568c2ecf20Sopenharmony_ci rbd_dev->object_map = NULL; 19578c2ecf20Sopenharmony_ci rbd_dev->object_map_size = 0; 19588c2ecf20Sopenharmony_ci} 19598c2ecf20Sopenharmony_ci 19608c2ecf20Sopenharmony_cistatic int rbd_object_map_load(struct rbd_device *rbd_dev) 19618c2ecf20Sopenharmony_ci{ 19628c2ecf20Sopenharmony_ci int ret; 19638c2ecf20Sopenharmony_ci 19648c2ecf20Sopenharmony_ci ret = __rbd_object_map_load(rbd_dev); 19658c2ecf20Sopenharmony_ci if (ret) 19668c2ecf20Sopenharmony_ci return ret; 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_ci ret = rbd_dev_v2_get_flags(rbd_dev); 19698c2ecf20Sopenharmony_ci if (ret) { 19708c2ecf20Sopenharmony_ci rbd_object_map_free(rbd_dev); 19718c2ecf20Sopenharmony_ci return ret; 19728c2ecf20Sopenharmony_ci } 19738c2ecf20Sopenharmony_ci 19748c2ecf20Sopenharmony_ci if (rbd_dev->object_map_flags & RBD_FLAG_OBJECT_MAP_INVALID) 19758c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "object map is invalid"); 19768c2ecf20Sopenharmony_ci 19778c2ecf20Sopenharmony_ci return 0; 19788c2ecf20Sopenharmony_ci} 19798c2ecf20Sopenharmony_ci 19808c2ecf20Sopenharmony_cistatic int rbd_object_map_open(struct rbd_device *rbd_dev) 19818c2ecf20Sopenharmony_ci{ 19828c2ecf20Sopenharmony_ci int ret; 19838c2ecf20Sopenharmony_ci 19848c2ecf20Sopenharmony_ci ret = rbd_object_map_lock(rbd_dev); 19858c2ecf20Sopenharmony_ci if (ret) 19868c2ecf20Sopenharmony_ci return ret; 19878c2ecf20Sopenharmony_ci 19888c2ecf20Sopenharmony_ci ret = rbd_object_map_load(rbd_dev); 19898c2ecf20Sopenharmony_ci if (ret) { 19908c2ecf20Sopenharmony_ci rbd_object_map_unlock(rbd_dev); 19918c2ecf20Sopenharmony_ci return ret; 19928c2ecf20Sopenharmony_ci } 19938c2ecf20Sopenharmony_ci 19948c2ecf20Sopenharmony_ci return 0; 19958c2ecf20Sopenharmony_ci} 19968c2ecf20Sopenharmony_ci 19978c2ecf20Sopenharmony_cistatic void rbd_object_map_close(struct rbd_device *rbd_dev) 19988c2ecf20Sopenharmony_ci{ 19998c2ecf20Sopenharmony_ci rbd_object_map_free(rbd_dev); 20008c2ecf20Sopenharmony_ci rbd_object_map_unlock(rbd_dev); 20018c2ecf20Sopenharmony_ci} 20028c2ecf20Sopenharmony_ci 20038c2ecf20Sopenharmony_ci/* 20048c2ecf20Sopenharmony_ci * This function needs snap_id (or more precisely just something to 20058c2ecf20Sopenharmony_ci * distinguish between HEAD and snapshot object maps), new_state and 20068c2ecf20Sopenharmony_ci * current_state that were passed to rbd_object_map_update(). 20078c2ecf20Sopenharmony_ci * 20088c2ecf20Sopenharmony_ci * To avoid allocating and stashing a context we piggyback on the OSD 20098c2ecf20Sopenharmony_ci * request. A HEAD update has two ops (assert_locked). For new_state 20108c2ecf20Sopenharmony_ci * and current_state we decode our own object_map_update op, encoded in 20118c2ecf20Sopenharmony_ci * rbd_cls_object_map_update(). 20128c2ecf20Sopenharmony_ci */ 20138c2ecf20Sopenharmony_cistatic int rbd_object_map_update_finish(struct rbd_obj_request *obj_req, 20148c2ecf20Sopenharmony_ci struct ceph_osd_request *osd_req) 20158c2ecf20Sopenharmony_ci{ 20168c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 20178c2ecf20Sopenharmony_ci struct ceph_osd_data *osd_data; 20188c2ecf20Sopenharmony_ci u64 objno; 20198c2ecf20Sopenharmony_ci u8 state, new_state, current_state; 20208c2ecf20Sopenharmony_ci bool has_current_state; 20218c2ecf20Sopenharmony_ci void *p; 20228c2ecf20Sopenharmony_ci 20238c2ecf20Sopenharmony_ci if (osd_req->r_result) 20248c2ecf20Sopenharmony_ci return osd_req->r_result; 20258c2ecf20Sopenharmony_ci 20268c2ecf20Sopenharmony_ci /* 20278c2ecf20Sopenharmony_ci * Nothing to do for a snapshot object map. 20288c2ecf20Sopenharmony_ci */ 20298c2ecf20Sopenharmony_ci if (osd_req->r_num_ops == 1) 20308c2ecf20Sopenharmony_ci return 0; 20318c2ecf20Sopenharmony_ci 20328c2ecf20Sopenharmony_ci /* 20338c2ecf20Sopenharmony_ci * Update in-memory HEAD object map. 20348c2ecf20Sopenharmony_ci */ 20358c2ecf20Sopenharmony_ci rbd_assert(osd_req->r_num_ops == 2); 20368c2ecf20Sopenharmony_ci osd_data = osd_req_op_data(osd_req, 1, cls, request_data); 20378c2ecf20Sopenharmony_ci rbd_assert(osd_data->type == CEPH_OSD_DATA_TYPE_PAGES); 20388c2ecf20Sopenharmony_ci 20398c2ecf20Sopenharmony_ci p = page_address(osd_data->pages[0]); 20408c2ecf20Sopenharmony_ci objno = ceph_decode_64(&p); 20418c2ecf20Sopenharmony_ci rbd_assert(objno == obj_req->ex.oe_objno); 20428c2ecf20Sopenharmony_ci rbd_assert(ceph_decode_64(&p) == objno + 1); 20438c2ecf20Sopenharmony_ci new_state = ceph_decode_8(&p); 20448c2ecf20Sopenharmony_ci has_current_state = ceph_decode_8(&p); 20458c2ecf20Sopenharmony_ci if (has_current_state) 20468c2ecf20Sopenharmony_ci current_state = ceph_decode_8(&p); 20478c2ecf20Sopenharmony_ci 20488c2ecf20Sopenharmony_ci spin_lock(&rbd_dev->object_map_lock); 20498c2ecf20Sopenharmony_ci state = __rbd_object_map_get(rbd_dev, objno); 20508c2ecf20Sopenharmony_ci if (!has_current_state || current_state == state || 20518c2ecf20Sopenharmony_ci (current_state == OBJECT_EXISTS && state == OBJECT_EXISTS_CLEAN)) 20528c2ecf20Sopenharmony_ci __rbd_object_map_set(rbd_dev, objno, new_state); 20538c2ecf20Sopenharmony_ci spin_unlock(&rbd_dev->object_map_lock); 20548c2ecf20Sopenharmony_ci 20558c2ecf20Sopenharmony_ci return 0; 20568c2ecf20Sopenharmony_ci} 20578c2ecf20Sopenharmony_ci 20588c2ecf20Sopenharmony_cistatic void rbd_object_map_callback(struct ceph_osd_request *osd_req) 20598c2ecf20Sopenharmony_ci{ 20608c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = osd_req->r_priv; 20618c2ecf20Sopenharmony_ci int result; 20628c2ecf20Sopenharmony_ci 20638c2ecf20Sopenharmony_ci dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req, 20648c2ecf20Sopenharmony_ci osd_req->r_result, obj_req); 20658c2ecf20Sopenharmony_ci 20668c2ecf20Sopenharmony_ci result = rbd_object_map_update_finish(obj_req, osd_req); 20678c2ecf20Sopenharmony_ci rbd_obj_handle_request(obj_req, result); 20688c2ecf20Sopenharmony_ci} 20698c2ecf20Sopenharmony_ci 20708c2ecf20Sopenharmony_cistatic bool update_needed(struct rbd_device *rbd_dev, u64 objno, u8 new_state) 20718c2ecf20Sopenharmony_ci{ 20728c2ecf20Sopenharmony_ci u8 state = rbd_object_map_get(rbd_dev, objno); 20738c2ecf20Sopenharmony_ci 20748c2ecf20Sopenharmony_ci if (state == new_state || 20758c2ecf20Sopenharmony_ci (new_state == OBJECT_PENDING && state == OBJECT_NONEXISTENT) || 20768c2ecf20Sopenharmony_ci (new_state == OBJECT_NONEXISTENT && state != OBJECT_PENDING)) 20778c2ecf20Sopenharmony_ci return false; 20788c2ecf20Sopenharmony_ci 20798c2ecf20Sopenharmony_ci return true; 20808c2ecf20Sopenharmony_ci} 20818c2ecf20Sopenharmony_ci 20828c2ecf20Sopenharmony_cistatic int rbd_cls_object_map_update(struct ceph_osd_request *req, 20838c2ecf20Sopenharmony_ci int which, u64 objno, u8 new_state, 20848c2ecf20Sopenharmony_ci const u8 *current_state) 20858c2ecf20Sopenharmony_ci{ 20868c2ecf20Sopenharmony_ci struct page **pages; 20878c2ecf20Sopenharmony_ci void *p, *start; 20888c2ecf20Sopenharmony_ci int ret; 20898c2ecf20Sopenharmony_ci 20908c2ecf20Sopenharmony_ci ret = osd_req_op_cls_init(req, which, "rbd", "object_map_update"); 20918c2ecf20Sopenharmony_ci if (ret) 20928c2ecf20Sopenharmony_ci return ret; 20938c2ecf20Sopenharmony_ci 20948c2ecf20Sopenharmony_ci pages = ceph_alloc_page_vector(1, GFP_NOIO); 20958c2ecf20Sopenharmony_ci if (IS_ERR(pages)) 20968c2ecf20Sopenharmony_ci return PTR_ERR(pages); 20978c2ecf20Sopenharmony_ci 20988c2ecf20Sopenharmony_ci p = start = page_address(pages[0]); 20998c2ecf20Sopenharmony_ci ceph_encode_64(&p, objno); 21008c2ecf20Sopenharmony_ci ceph_encode_64(&p, objno + 1); 21018c2ecf20Sopenharmony_ci ceph_encode_8(&p, new_state); 21028c2ecf20Sopenharmony_ci if (current_state) { 21038c2ecf20Sopenharmony_ci ceph_encode_8(&p, 1); 21048c2ecf20Sopenharmony_ci ceph_encode_8(&p, *current_state); 21058c2ecf20Sopenharmony_ci } else { 21068c2ecf20Sopenharmony_ci ceph_encode_8(&p, 0); 21078c2ecf20Sopenharmony_ci } 21088c2ecf20Sopenharmony_ci 21098c2ecf20Sopenharmony_ci osd_req_op_cls_request_data_pages(req, which, pages, p - start, 0, 21108c2ecf20Sopenharmony_ci false, true); 21118c2ecf20Sopenharmony_ci return 0; 21128c2ecf20Sopenharmony_ci} 21138c2ecf20Sopenharmony_ci 21148c2ecf20Sopenharmony_ci/* 21158c2ecf20Sopenharmony_ci * Return: 21168c2ecf20Sopenharmony_ci * 0 - object map update sent 21178c2ecf20Sopenharmony_ci * 1 - object map update isn't needed 21188c2ecf20Sopenharmony_ci * <0 - error 21198c2ecf20Sopenharmony_ci */ 21208c2ecf20Sopenharmony_cistatic int rbd_object_map_update(struct rbd_obj_request *obj_req, u64 snap_id, 21218c2ecf20Sopenharmony_ci u8 new_state, const u8 *current_state) 21228c2ecf20Sopenharmony_ci{ 21238c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 21248c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 21258c2ecf20Sopenharmony_ci struct ceph_osd_request *req; 21268c2ecf20Sopenharmony_ci int num_ops = 1; 21278c2ecf20Sopenharmony_ci int which = 0; 21288c2ecf20Sopenharmony_ci int ret; 21298c2ecf20Sopenharmony_ci 21308c2ecf20Sopenharmony_ci if (snap_id == CEPH_NOSNAP) { 21318c2ecf20Sopenharmony_ci if (!update_needed(rbd_dev, obj_req->ex.oe_objno, new_state)) 21328c2ecf20Sopenharmony_ci return 1; 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_ci num_ops++; /* assert_locked */ 21358c2ecf20Sopenharmony_ci } 21368c2ecf20Sopenharmony_ci 21378c2ecf20Sopenharmony_ci req = ceph_osdc_alloc_request(osdc, NULL, num_ops, false, GFP_NOIO); 21388c2ecf20Sopenharmony_ci if (!req) 21398c2ecf20Sopenharmony_ci return -ENOMEM; 21408c2ecf20Sopenharmony_ci 21418c2ecf20Sopenharmony_ci list_add_tail(&req->r_private_item, &obj_req->osd_reqs); 21428c2ecf20Sopenharmony_ci req->r_callback = rbd_object_map_callback; 21438c2ecf20Sopenharmony_ci req->r_priv = obj_req; 21448c2ecf20Sopenharmony_ci 21458c2ecf20Sopenharmony_ci rbd_object_map_name(rbd_dev, snap_id, &req->r_base_oid); 21468c2ecf20Sopenharmony_ci ceph_oloc_copy(&req->r_base_oloc, &rbd_dev->header_oloc); 21478c2ecf20Sopenharmony_ci req->r_flags = CEPH_OSD_FLAG_WRITE; 21488c2ecf20Sopenharmony_ci ktime_get_real_ts64(&req->r_mtime); 21498c2ecf20Sopenharmony_ci 21508c2ecf20Sopenharmony_ci if (snap_id == CEPH_NOSNAP) { 21518c2ecf20Sopenharmony_ci /* 21528c2ecf20Sopenharmony_ci * Protect against possible race conditions during lock 21538c2ecf20Sopenharmony_ci * ownership transitions. 21548c2ecf20Sopenharmony_ci */ 21558c2ecf20Sopenharmony_ci ret = ceph_cls_assert_locked(req, which++, RBD_LOCK_NAME, 21568c2ecf20Sopenharmony_ci CEPH_CLS_LOCK_EXCLUSIVE, "", ""); 21578c2ecf20Sopenharmony_ci if (ret) 21588c2ecf20Sopenharmony_ci return ret; 21598c2ecf20Sopenharmony_ci } 21608c2ecf20Sopenharmony_ci 21618c2ecf20Sopenharmony_ci ret = rbd_cls_object_map_update(req, which, obj_req->ex.oe_objno, 21628c2ecf20Sopenharmony_ci new_state, current_state); 21638c2ecf20Sopenharmony_ci if (ret) 21648c2ecf20Sopenharmony_ci return ret; 21658c2ecf20Sopenharmony_ci 21668c2ecf20Sopenharmony_ci ret = ceph_osdc_alloc_messages(req, GFP_NOIO); 21678c2ecf20Sopenharmony_ci if (ret) 21688c2ecf20Sopenharmony_ci return ret; 21698c2ecf20Sopenharmony_ci 21708c2ecf20Sopenharmony_ci ceph_osdc_start_request(osdc, req, false); 21718c2ecf20Sopenharmony_ci return 0; 21728c2ecf20Sopenharmony_ci} 21738c2ecf20Sopenharmony_ci 21748c2ecf20Sopenharmony_cistatic void prune_extents(struct ceph_file_extent *img_extents, 21758c2ecf20Sopenharmony_ci u32 *num_img_extents, u64 overlap) 21768c2ecf20Sopenharmony_ci{ 21778c2ecf20Sopenharmony_ci u32 cnt = *num_img_extents; 21788c2ecf20Sopenharmony_ci 21798c2ecf20Sopenharmony_ci /* drop extents completely beyond the overlap */ 21808c2ecf20Sopenharmony_ci while (cnt && img_extents[cnt - 1].fe_off >= overlap) 21818c2ecf20Sopenharmony_ci cnt--; 21828c2ecf20Sopenharmony_ci 21838c2ecf20Sopenharmony_ci if (cnt) { 21848c2ecf20Sopenharmony_ci struct ceph_file_extent *ex = &img_extents[cnt - 1]; 21858c2ecf20Sopenharmony_ci 21868c2ecf20Sopenharmony_ci /* trim final overlapping extent */ 21878c2ecf20Sopenharmony_ci if (ex->fe_off + ex->fe_len > overlap) 21888c2ecf20Sopenharmony_ci ex->fe_len = overlap - ex->fe_off; 21898c2ecf20Sopenharmony_ci } 21908c2ecf20Sopenharmony_ci 21918c2ecf20Sopenharmony_ci *num_img_extents = cnt; 21928c2ecf20Sopenharmony_ci} 21938c2ecf20Sopenharmony_ci 21948c2ecf20Sopenharmony_ci/* 21958c2ecf20Sopenharmony_ci * Determine the byte range(s) covered by either just the object extent 21968c2ecf20Sopenharmony_ci * or the entire object in the parent image. 21978c2ecf20Sopenharmony_ci */ 21988c2ecf20Sopenharmony_cistatic int rbd_obj_calc_img_extents(struct rbd_obj_request *obj_req, 21998c2ecf20Sopenharmony_ci bool entire) 22008c2ecf20Sopenharmony_ci{ 22018c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 22028c2ecf20Sopenharmony_ci int ret; 22038c2ecf20Sopenharmony_ci 22048c2ecf20Sopenharmony_ci if (!rbd_dev->parent_overlap) 22058c2ecf20Sopenharmony_ci return 0; 22068c2ecf20Sopenharmony_ci 22078c2ecf20Sopenharmony_ci ret = ceph_extent_to_file(&rbd_dev->layout, obj_req->ex.oe_objno, 22088c2ecf20Sopenharmony_ci entire ? 0 : obj_req->ex.oe_off, 22098c2ecf20Sopenharmony_ci entire ? rbd_dev->layout.object_size : 22108c2ecf20Sopenharmony_ci obj_req->ex.oe_len, 22118c2ecf20Sopenharmony_ci &obj_req->img_extents, 22128c2ecf20Sopenharmony_ci &obj_req->num_img_extents); 22138c2ecf20Sopenharmony_ci if (ret) 22148c2ecf20Sopenharmony_ci return ret; 22158c2ecf20Sopenharmony_ci 22168c2ecf20Sopenharmony_ci prune_extents(obj_req->img_extents, &obj_req->num_img_extents, 22178c2ecf20Sopenharmony_ci rbd_dev->parent_overlap); 22188c2ecf20Sopenharmony_ci return 0; 22198c2ecf20Sopenharmony_ci} 22208c2ecf20Sopenharmony_ci 22218c2ecf20Sopenharmony_cistatic void rbd_osd_setup_data(struct ceph_osd_request *osd_req, int which) 22228c2ecf20Sopenharmony_ci{ 22238c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = osd_req->r_priv; 22248c2ecf20Sopenharmony_ci 22258c2ecf20Sopenharmony_ci switch (obj_req->img_request->data_type) { 22268c2ecf20Sopenharmony_ci case OBJ_REQUEST_BIO: 22278c2ecf20Sopenharmony_ci osd_req_op_extent_osd_data_bio(osd_req, which, 22288c2ecf20Sopenharmony_ci &obj_req->bio_pos, 22298c2ecf20Sopenharmony_ci obj_req->ex.oe_len); 22308c2ecf20Sopenharmony_ci break; 22318c2ecf20Sopenharmony_ci case OBJ_REQUEST_BVECS: 22328c2ecf20Sopenharmony_ci case OBJ_REQUEST_OWN_BVECS: 22338c2ecf20Sopenharmony_ci rbd_assert(obj_req->bvec_pos.iter.bi_size == 22348c2ecf20Sopenharmony_ci obj_req->ex.oe_len); 22358c2ecf20Sopenharmony_ci rbd_assert(obj_req->bvec_idx == obj_req->bvec_count); 22368c2ecf20Sopenharmony_ci osd_req_op_extent_osd_data_bvec_pos(osd_req, which, 22378c2ecf20Sopenharmony_ci &obj_req->bvec_pos); 22388c2ecf20Sopenharmony_ci break; 22398c2ecf20Sopenharmony_ci default: 22408c2ecf20Sopenharmony_ci BUG(); 22418c2ecf20Sopenharmony_ci } 22428c2ecf20Sopenharmony_ci} 22438c2ecf20Sopenharmony_ci 22448c2ecf20Sopenharmony_cistatic int rbd_osd_setup_stat(struct ceph_osd_request *osd_req, int which) 22458c2ecf20Sopenharmony_ci{ 22468c2ecf20Sopenharmony_ci struct page **pages; 22478c2ecf20Sopenharmony_ci 22488c2ecf20Sopenharmony_ci /* 22498c2ecf20Sopenharmony_ci * The response data for a STAT call consists of: 22508c2ecf20Sopenharmony_ci * le64 length; 22518c2ecf20Sopenharmony_ci * struct { 22528c2ecf20Sopenharmony_ci * le32 tv_sec; 22538c2ecf20Sopenharmony_ci * le32 tv_nsec; 22548c2ecf20Sopenharmony_ci * } mtime; 22558c2ecf20Sopenharmony_ci */ 22568c2ecf20Sopenharmony_ci pages = ceph_alloc_page_vector(1, GFP_NOIO); 22578c2ecf20Sopenharmony_ci if (IS_ERR(pages)) 22588c2ecf20Sopenharmony_ci return PTR_ERR(pages); 22598c2ecf20Sopenharmony_ci 22608c2ecf20Sopenharmony_ci osd_req_op_init(osd_req, which, CEPH_OSD_OP_STAT, 0); 22618c2ecf20Sopenharmony_ci osd_req_op_raw_data_in_pages(osd_req, which, pages, 22628c2ecf20Sopenharmony_ci 8 + sizeof(struct ceph_timespec), 22638c2ecf20Sopenharmony_ci 0, false, true); 22648c2ecf20Sopenharmony_ci return 0; 22658c2ecf20Sopenharmony_ci} 22668c2ecf20Sopenharmony_ci 22678c2ecf20Sopenharmony_cistatic int rbd_osd_setup_copyup(struct ceph_osd_request *osd_req, int which, 22688c2ecf20Sopenharmony_ci u32 bytes) 22698c2ecf20Sopenharmony_ci{ 22708c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = osd_req->r_priv; 22718c2ecf20Sopenharmony_ci int ret; 22728c2ecf20Sopenharmony_ci 22738c2ecf20Sopenharmony_ci ret = osd_req_op_cls_init(osd_req, which, "rbd", "copyup"); 22748c2ecf20Sopenharmony_ci if (ret) 22758c2ecf20Sopenharmony_ci return ret; 22768c2ecf20Sopenharmony_ci 22778c2ecf20Sopenharmony_ci osd_req_op_cls_request_data_bvecs(osd_req, which, obj_req->copyup_bvecs, 22788c2ecf20Sopenharmony_ci obj_req->copyup_bvec_count, bytes); 22798c2ecf20Sopenharmony_ci return 0; 22808c2ecf20Sopenharmony_ci} 22818c2ecf20Sopenharmony_ci 22828c2ecf20Sopenharmony_cistatic int rbd_obj_init_read(struct rbd_obj_request *obj_req) 22838c2ecf20Sopenharmony_ci{ 22848c2ecf20Sopenharmony_ci obj_req->read_state = RBD_OBJ_READ_START; 22858c2ecf20Sopenharmony_ci return 0; 22868c2ecf20Sopenharmony_ci} 22878c2ecf20Sopenharmony_ci 22888c2ecf20Sopenharmony_cistatic void __rbd_osd_setup_write_ops(struct ceph_osd_request *osd_req, 22898c2ecf20Sopenharmony_ci int which) 22908c2ecf20Sopenharmony_ci{ 22918c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = osd_req->r_priv; 22928c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 22938c2ecf20Sopenharmony_ci u16 opcode; 22948c2ecf20Sopenharmony_ci 22958c2ecf20Sopenharmony_ci if (!use_object_map(rbd_dev) || 22968c2ecf20Sopenharmony_ci !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) { 22978c2ecf20Sopenharmony_ci osd_req_op_alloc_hint_init(osd_req, which++, 22988c2ecf20Sopenharmony_ci rbd_dev->layout.object_size, 22998c2ecf20Sopenharmony_ci rbd_dev->layout.object_size, 23008c2ecf20Sopenharmony_ci rbd_dev->opts->alloc_hint_flags); 23018c2ecf20Sopenharmony_ci } 23028c2ecf20Sopenharmony_ci 23038c2ecf20Sopenharmony_ci if (rbd_obj_is_entire(obj_req)) 23048c2ecf20Sopenharmony_ci opcode = CEPH_OSD_OP_WRITEFULL; 23058c2ecf20Sopenharmony_ci else 23068c2ecf20Sopenharmony_ci opcode = CEPH_OSD_OP_WRITE; 23078c2ecf20Sopenharmony_ci 23088c2ecf20Sopenharmony_ci osd_req_op_extent_init(osd_req, which, opcode, 23098c2ecf20Sopenharmony_ci obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0); 23108c2ecf20Sopenharmony_ci rbd_osd_setup_data(osd_req, which); 23118c2ecf20Sopenharmony_ci} 23128c2ecf20Sopenharmony_ci 23138c2ecf20Sopenharmony_cistatic int rbd_obj_init_write(struct rbd_obj_request *obj_req) 23148c2ecf20Sopenharmony_ci{ 23158c2ecf20Sopenharmony_ci int ret; 23168c2ecf20Sopenharmony_ci 23178c2ecf20Sopenharmony_ci /* reverse map the entire object onto the parent */ 23188c2ecf20Sopenharmony_ci ret = rbd_obj_calc_img_extents(obj_req, true); 23198c2ecf20Sopenharmony_ci if (ret) 23208c2ecf20Sopenharmony_ci return ret; 23218c2ecf20Sopenharmony_ci 23228c2ecf20Sopenharmony_ci obj_req->write_state = RBD_OBJ_WRITE_START; 23238c2ecf20Sopenharmony_ci return 0; 23248c2ecf20Sopenharmony_ci} 23258c2ecf20Sopenharmony_ci 23268c2ecf20Sopenharmony_cistatic u16 truncate_or_zero_opcode(struct rbd_obj_request *obj_req) 23278c2ecf20Sopenharmony_ci{ 23288c2ecf20Sopenharmony_ci return rbd_obj_is_tail(obj_req) ? CEPH_OSD_OP_TRUNCATE : 23298c2ecf20Sopenharmony_ci CEPH_OSD_OP_ZERO; 23308c2ecf20Sopenharmony_ci} 23318c2ecf20Sopenharmony_ci 23328c2ecf20Sopenharmony_cistatic void __rbd_osd_setup_discard_ops(struct ceph_osd_request *osd_req, 23338c2ecf20Sopenharmony_ci int which) 23348c2ecf20Sopenharmony_ci{ 23358c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = osd_req->r_priv; 23368c2ecf20Sopenharmony_ci 23378c2ecf20Sopenharmony_ci if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) { 23388c2ecf20Sopenharmony_ci rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION); 23398c2ecf20Sopenharmony_ci osd_req_op_init(osd_req, which, CEPH_OSD_OP_DELETE, 0); 23408c2ecf20Sopenharmony_ci } else { 23418c2ecf20Sopenharmony_ci osd_req_op_extent_init(osd_req, which, 23428c2ecf20Sopenharmony_ci truncate_or_zero_opcode(obj_req), 23438c2ecf20Sopenharmony_ci obj_req->ex.oe_off, obj_req->ex.oe_len, 23448c2ecf20Sopenharmony_ci 0, 0); 23458c2ecf20Sopenharmony_ci } 23468c2ecf20Sopenharmony_ci} 23478c2ecf20Sopenharmony_ci 23488c2ecf20Sopenharmony_cistatic int rbd_obj_init_discard(struct rbd_obj_request *obj_req) 23498c2ecf20Sopenharmony_ci{ 23508c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 23518c2ecf20Sopenharmony_ci u64 off, next_off; 23528c2ecf20Sopenharmony_ci int ret; 23538c2ecf20Sopenharmony_ci 23548c2ecf20Sopenharmony_ci /* 23558c2ecf20Sopenharmony_ci * Align the range to alloc_size boundary and punt on discards 23568c2ecf20Sopenharmony_ci * that are too small to free up any space. 23578c2ecf20Sopenharmony_ci * 23588c2ecf20Sopenharmony_ci * alloc_size == object_size && is_tail() is a special case for 23598c2ecf20Sopenharmony_ci * filestore with filestore_punch_hole = false, needed to allow 23608c2ecf20Sopenharmony_ci * truncate (in addition to delete). 23618c2ecf20Sopenharmony_ci */ 23628c2ecf20Sopenharmony_ci if (rbd_dev->opts->alloc_size != rbd_dev->layout.object_size || 23638c2ecf20Sopenharmony_ci !rbd_obj_is_tail(obj_req)) { 23648c2ecf20Sopenharmony_ci off = round_up(obj_req->ex.oe_off, rbd_dev->opts->alloc_size); 23658c2ecf20Sopenharmony_ci next_off = round_down(obj_req->ex.oe_off + obj_req->ex.oe_len, 23668c2ecf20Sopenharmony_ci rbd_dev->opts->alloc_size); 23678c2ecf20Sopenharmony_ci if (off >= next_off) 23688c2ecf20Sopenharmony_ci return 1; 23698c2ecf20Sopenharmony_ci 23708c2ecf20Sopenharmony_ci dout("%s %p %llu~%llu -> %llu~%llu\n", __func__, 23718c2ecf20Sopenharmony_ci obj_req, obj_req->ex.oe_off, obj_req->ex.oe_len, 23728c2ecf20Sopenharmony_ci off, next_off - off); 23738c2ecf20Sopenharmony_ci obj_req->ex.oe_off = off; 23748c2ecf20Sopenharmony_ci obj_req->ex.oe_len = next_off - off; 23758c2ecf20Sopenharmony_ci } 23768c2ecf20Sopenharmony_ci 23778c2ecf20Sopenharmony_ci /* reverse map the entire object onto the parent */ 23788c2ecf20Sopenharmony_ci ret = rbd_obj_calc_img_extents(obj_req, true); 23798c2ecf20Sopenharmony_ci if (ret) 23808c2ecf20Sopenharmony_ci return ret; 23818c2ecf20Sopenharmony_ci 23828c2ecf20Sopenharmony_ci obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT; 23838c2ecf20Sopenharmony_ci if (rbd_obj_is_entire(obj_req) && !obj_req->num_img_extents) 23848c2ecf20Sopenharmony_ci obj_req->flags |= RBD_OBJ_FLAG_DELETION; 23858c2ecf20Sopenharmony_ci 23868c2ecf20Sopenharmony_ci obj_req->write_state = RBD_OBJ_WRITE_START; 23878c2ecf20Sopenharmony_ci return 0; 23888c2ecf20Sopenharmony_ci} 23898c2ecf20Sopenharmony_ci 23908c2ecf20Sopenharmony_cistatic void __rbd_osd_setup_zeroout_ops(struct ceph_osd_request *osd_req, 23918c2ecf20Sopenharmony_ci int which) 23928c2ecf20Sopenharmony_ci{ 23938c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = osd_req->r_priv; 23948c2ecf20Sopenharmony_ci u16 opcode; 23958c2ecf20Sopenharmony_ci 23968c2ecf20Sopenharmony_ci if (rbd_obj_is_entire(obj_req)) { 23978c2ecf20Sopenharmony_ci if (obj_req->num_img_extents) { 23988c2ecf20Sopenharmony_ci if (!(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED)) 23998c2ecf20Sopenharmony_ci osd_req_op_init(osd_req, which++, 24008c2ecf20Sopenharmony_ci CEPH_OSD_OP_CREATE, 0); 24018c2ecf20Sopenharmony_ci opcode = CEPH_OSD_OP_TRUNCATE; 24028c2ecf20Sopenharmony_ci } else { 24038c2ecf20Sopenharmony_ci rbd_assert(obj_req->flags & RBD_OBJ_FLAG_DELETION); 24048c2ecf20Sopenharmony_ci osd_req_op_init(osd_req, which++, 24058c2ecf20Sopenharmony_ci CEPH_OSD_OP_DELETE, 0); 24068c2ecf20Sopenharmony_ci opcode = 0; 24078c2ecf20Sopenharmony_ci } 24088c2ecf20Sopenharmony_ci } else { 24098c2ecf20Sopenharmony_ci opcode = truncate_or_zero_opcode(obj_req); 24108c2ecf20Sopenharmony_ci } 24118c2ecf20Sopenharmony_ci 24128c2ecf20Sopenharmony_ci if (opcode) 24138c2ecf20Sopenharmony_ci osd_req_op_extent_init(osd_req, which, opcode, 24148c2ecf20Sopenharmony_ci obj_req->ex.oe_off, obj_req->ex.oe_len, 24158c2ecf20Sopenharmony_ci 0, 0); 24168c2ecf20Sopenharmony_ci} 24178c2ecf20Sopenharmony_ci 24188c2ecf20Sopenharmony_cistatic int rbd_obj_init_zeroout(struct rbd_obj_request *obj_req) 24198c2ecf20Sopenharmony_ci{ 24208c2ecf20Sopenharmony_ci int ret; 24218c2ecf20Sopenharmony_ci 24228c2ecf20Sopenharmony_ci /* reverse map the entire object onto the parent */ 24238c2ecf20Sopenharmony_ci ret = rbd_obj_calc_img_extents(obj_req, true); 24248c2ecf20Sopenharmony_ci if (ret) 24258c2ecf20Sopenharmony_ci return ret; 24268c2ecf20Sopenharmony_ci 24278c2ecf20Sopenharmony_ci if (!obj_req->num_img_extents) { 24288c2ecf20Sopenharmony_ci obj_req->flags |= RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT; 24298c2ecf20Sopenharmony_ci if (rbd_obj_is_entire(obj_req)) 24308c2ecf20Sopenharmony_ci obj_req->flags |= RBD_OBJ_FLAG_DELETION; 24318c2ecf20Sopenharmony_ci } 24328c2ecf20Sopenharmony_ci 24338c2ecf20Sopenharmony_ci obj_req->write_state = RBD_OBJ_WRITE_START; 24348c2ecf20Sopenharmony_ci return 0; 24358c2ecf20Sopenharmony_ci} 24368c2ecf20Sopenharmony_ci 24378c2ecf20Sopenharmony_cistatic int count_write_ops(struct rbd_obj_request *obj_req) 24388c2ecf20Sopenharmony_ci{ 24398c2ecf20Sopenharmony_ci struct rbd_img_request *img_req = obj_req->img_request; 24408c2ecf20Sopenharmony_ci 24418c2ecf20Sopenharmony_ci switch (img_req->op_type) { 24428c2ecf20Sopenharmony_ci case OBJ_OP_WRITE: 24438c2ecf20Sopenharmony_ci if (!use_object_map(img_req->rbd_dev) || 24448c2ecf20Sopenharmony_ci !(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST)) 24458c2ecf20Sopenharmony_ci return 2; /* setallochint + write/writefull */ 24468c2ecf20Sopenharmony_ci 24478c2ecf20Sopenharmony_ci return 1; /* write/writefull */ 24488c2ecf20Sopenharmony_ci case OBJ_OP_DISCARD: 24498c2ecf20Sopenharmony_ci return 1; /* delete/truncate/zero */ 24508c2ecf20Sopenharmony_ci case OBJ_OP_ZEROOUT: 24518c2ecf20Sopenharmony_ci if (rbd_obj_is_entire(obj_req) && obj_req->num_img_extents && 24528c2ecf20Sopenharmony_ci !(obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED)) 24538c2ecf20Sopenharmony_ci return 2; /* create + truncate */ 24548c2ecf20Sopenharmony_ci 24558c2ecf20Sopenharmony_ci return 1; /* delete/truncate/zero */ 24568c2ecf20Sopenharmony_ci default: 24578c2ecf20Sopenharmony_ci BUG(); 24588c2ecf20Sopenharmony_ci } 24598c2ecf20Sopenharmony_ci} 24608c2ecf20Sopenharmony_ci 24618c2ecf20Sopenharmony_cistatic void rbd_osd_setup_write_ops(struct ceph_osd_request *osd_req, 24628c2ecf20Sopenharmony_ci int which) 24638c2ecf20Sopenharmony_ci{ 24648c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = osd_req->r_priv; 24658c2ecf20Sopenharmony_ci 24668c2ecf20Sopenharmony_ci switch (obj_req->img_request->op_type) { 24678c2ecf20Sopenharmony_ci case OBJ_OP_WRITE: 24688c2ecf20Sopenharmony_ci __rbd_osd_setup_write_ops(osd_req, which); 24698c2ecf20Sopenharmony_ci break; 24708c2ecf20Sopenharmony_ci case OBJ_OP_DISCARD: 24718c2ecf20Sopenharmony_ci __rbd_osd_setup_discard_ops(osd_req, which); 24728c2ecf20Sopenharmony_ci break; 24738c2ecf20Sopenharmony_ci case OBJ_OP_ZEROOUT: 24748c2ecf20Sopenharmony_ci __rbd_osd_setup_zeroout_ops(osd_req, which); 24758c2ecf20Sopenharmony_ci break; 24768c2ecf20Sopenharmony_ci default: 24778c2ecf20Sopenharmony_ci BUG(); 24788c2ecf20Sopenharmony_ci } 24798c2ecf20Sopenharmony_ci} 24808c2ecf20Sopenharmony_ci 24818c2ecf20Sopenharmony_ci/* 24828c2ecf20Sopenharmony_ci * Prune the list of object requests (adjust offset and/or length, drop 24838c2ecf20Sopenharmony_ci * redundant requests). Prepare object request state machines and image 24848c2ecf20Sopenharmony_ci * request state machine for execution. 24858c2ecf20Sopenharmony_ci */ 24868c2ecf20Sopenharmony_cistatic int __rbd_img_fill_request(struct rbd_img_request *img_req) 24878c2ecf20Sopenharmony_ci{ 24888c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req, *next_obj_req; 24898c2ecf20Sopenharmony_ci int ret; 24908c2ecf20Sopenharmony_ci 24918c2ecf20Sopenharmony_ci for_each_obj_request_safe(img_req, obj_req, next_obj_req) { 24928c2ecf20Sopenharmony_ci switch (img_req->op_type) { 24938c2ecf20Sopenharmony_ci case OBJ_OP_READ: 24948c2ecf20Sopenharmony_ci ret = rbd_obj_init_read(obj_req); 24958c2ecf20Sopenharmony_ci break; 24968c2ecf20Sopenharmony_ci case OBJ_OP_WRITE: 24978c2ecf20Sopenharmony_ci ret = rbd_obj_init_write(obj_req); 24988c2ecf20Sopenharmony_ci break; 24998c2ecf20Sopenharmony_ci case OBJ_OP_DISCARD: 25008c2ecf20Sopenharmony_ci ret = rbd_obj_init_discard(obj_req); 25018c2ecf20Sopenharmony_ci break; 25028c2ecf20Sopenharmony_ci case OBJ_OP_ZEROOUT: 25038c2ecf20Sopenharmony_ci ret = rbd_obj_init_zeroout(obj_req); 25048c2ecf20Sopenharmony_ci break; 25058c2ecf20Sopenharmony_ci default: 25068c2ecf20Sopenharmony_ci BUG(); 25078c2ecf20Sopenharmony_ci } 25088c2ecf20Sopenharmony_ci if (ret < 0) 25098c2ecf20Sopenharmony_ci return ret; 25108c2ecf20Sopenharmony_ci if (ret > 0) { 25118c2ecf20Sopenharmony_ci rbd_img_obj_request_del(img_req, obj_req); 25128c2ecf20Sopenharmony_ci continue; 25138c2ecf20Sopenharmony_ci } 25148c2ecf20Sopenharmony_ci } 25158c2ecf20Sopenharmony_ci 25168c2ecf20Sopenharmony_ci img_req->state = RBD_IMG_START; 25178c2ecf20Sopenharmony_ci return 0; 25188c2ecf20Sopenharmony_ci} 25198c2ecf20Sopenharmony_ci 25208c2ecf20Sopenharmony_ciunion rbd_img_fill_iter { 25218c2ecf20Sopenharmony_ci struct ceph_bio_iter bio_iter; 25228c2ecf20Sopenharmony_ci struct ceph_bvec_iter bvec_iter; 25238c2ecf20Sopenharmony_ci}; 25248c2ecf20Sopenharmony_ci 25258c2ecf20Sopenharmony_cistruct rbd_img_fill_ctx { 25268c2ecf20Sopenharmony_ci enum obj_request_type pos_type; 25278c2ecf20Sopenharmony_ci union rbd_img_fill_iter *pos; 25288c2ecf20Sopenharmony_ci union rbd_img_fill_iter iter; 25298c2ecf20Sopenharmony_ci ceph_object_extent_fn_t set_pos_fn; 25308c2ecf20Sopenharmony_ci ceph_object_extent_fn_t count_fn; 25318c2ecf20Sopenharmony_ci ceph_object_extent_fn_t copy_fn; 25328c2ecf20Sopenharmony_ci}; 25338c2ecf20Sopenharmony_ci 25348c2ecf20Sopenharmony_cistatic struct ceph_object_extent *alloc_object_extent(void *arg) 25358c2ecf20Sopenharmony_ci{ 25368c2ecf20Sopenharmony_ci struct rbd_img_request *img_req = arg; 25378c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req; 25388c2ecf20Sopenharmony_ci 25398c2ecf20Sopenharmony_ci obj_req = rbd_obj_request_create(); 25408c2ecf20Sopenharmony_ci if (!obj_req) 25418c2ecf20Sopenharmony_ci return NULL; 25428c2ecf20Sopenharmony_ci 25438c2ecf20Sopenharmony_ci rbd_img_obj_request_add(img_req, obj_req); 25448c2ecf20Sopenharmony_ci return &obj_req->ex; 25458c2ecf20Sopenharmony_ci} 25468c2ecf20Sopenharmony_ci 25478c2ecf20Sopenharmony_ci/* 25488c2ecf20Sopenharmony_ci * While su != os && sc == 1 is technically not fancy (it's the same 25498c2ecf20Sopenharmony_ci * layout as su == os && sc == 1), we can't use the nocopy path for it 25508c2ecf20Sopenharmony_ci * because ->set_pos_fn() should be called only once per object. 25518c2ecf20Sopenharmony_ci * ceph_file_to_extents() invokes action_fn once per stripe unit, so 25528c2ecf20Sopenharmony_ci * treat su != os && sc == 1 as fancy. 25538c2ecf20Sopenharmony_ci */ 25548c2ecf20Sopenharmony_cistatic bool rbd_layout_is_fancy(struct ceph_file_layout *l) 25558c2ecf20Sopenharmony_ci{ 25568c2ecf20Sopenharmony_ci return l->stripe_unit != l->object_size; 25578c2ecf20Sopenharmony_ci} 25588c2ecf20Sopenharmony_ci 25598c2ecf20Sopenharmony_cistatic int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req, 25608c2ecf20Sopenharmony_ci struct ceph_file_extent *img_extents, 25618c2ecf20Sopenharmony_ci u32 num_img_extents, 25628c2ecf20Sopenharmony_ci struct rbd_img_fill_ctx *fctx) 25638c2ecf20Sopenharmony_ci{ 25648c2ecf20Sopenharmony_ci u32 i; 25658c2ecf20Sopenharmony_ci int ret; 25668c2ecf20Sopenharmony_ci 25678c2ecf20Sopenharmony_ci img_req->data_type = fctx->pos_type; 25688c2ecf20Sopenharmony_ci 25698c2ecf20Sopenharmony_ci /* 25708c2ecf20Sopenharmony_ci * Create object requests and set each object request's starting 25718c2ecf20Sopenharmony_ci * position in the provided bio (list) or bio_vec array. 25728c2ecf20Sopenharmony_ci */ 25738c2ecf20Sopenharmony_ci fctx->iter = *fctx->pos; 25748c2ecf20Sopenharmony_ci for (i = 0; i < num_img_extents; i++) { 25758c2ecf20Sopenharmony_ci ret = ceph_file_to_extents(&img_req->rbd_dev->layout, 25768c2ecf20Sopenharmony_ci img_extents[i].fe_off, 25778c2ecf20Sopenharmony_ci img_extents[i].fe_len, 25788c2ecf20Sopenharmony_ci &img_req->object_extents, 25798c2ecf20Sopenharmony_ci alloc_object_extent, img_req, 25808c2ecf20Sopenharmony_ci fctx->set_pos_fn, &fctx->iter); 25818c2ecf20Sopenharmony_ci if (ret) 25828c2ecf20Sopenharmony_ci return ret; 25838c2ecf20Sopenharmony_ci } 25848c2ecf20Sopenharmony_ci 25858c2ecf20Sopenharmony_ci return __rbd_img_fill_request(img_req); 25868c2ecf20Sopenharmony_ci} 25878c2ecf20Sopenharmony_ci 25888c2ecf20Sopenharmony_ci/* 25898c2ecf20Sopenharmony_ci * Map a list of image extents to a list of object extents, create the 25908c2ecf20Sopenharmony_ci * corresponding object requests (normally each to a different object, 25918c2ecf20Sopenharmony_ci * but not always) and add them to @img_req. For each object request, 25928c2ecf20Sopenharmony_ci * set up its data descriptor to point to the corresponding chunk(s) of 25938c2ecf20Sopenharmony_ci * @fctx->pos data buffer. 25948c2ecf20Sopenharmony_ci * 25958c2ecf20Sopenharmony_ci * Because ceph_file_to_extents() will merge adjacent object extents 25968c2ecf20Sopenharmony_ci * together, each object request's data descriptor may point to multiple 25978c2ecf20Sopenharmony_ci * different chunks of @fctx->pos data buffer. 25988c2ecf20Sopenharmony_ci * 25998c2ecf20Sopenharmony_ci * @fctx->pos data buffer is assumed to be large enough. 26008c2ecf20Sopenharmony_ci */ 26018c2ecf20Sopenharmony_cistatic int rbd_img_fill_request(struct rbd_img_request *img_req, 26028c2ecf20Sopenharmony_ci struct ceph_file_extent *img_extents, 26038c2ecf20Sopenharmony_ci u32 num_img_extents, 26048c2ecf20Sopenharmony_ci struct rbd_img_fill_ctx *fctx) 26058c2ecf20Sopenharmony_ci{ 26068c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = img_req->rbd_dev; 26078c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req; 26088c2ecf20Sopenharmony_ci u32 i; 26098c2ecf20Sopenharmony_ci int ret; 26108c2ecf20Sopenharmony_ci 26118c2ecf20Sopenharmony_ci if (fctx->pos_type == OBJ_REQUEST_NODATA || 26128c2ecf20Sopenharmony_ci !rbd_layout_is_fancy(&rbd_dev->layout)) 26138c2ecf20Sopenharmony_ci return rbd_img_fill_request_nocopy(img_req, img_extents, 26148c2ecf20Sopenharmony_ci num_img_extents, fctx); 26158c2ecf20Sopenharmony_ci 26168c2ecf20Sopenharmony_ci img_req->data_type = OBJ_REQUEST_OWN_BVECS; 26178c2ecf20Sopenharmony_ci 26188c2ecf20Sopenharmony_ci /* 26198c2ecf20Sopenharmony_ci * Create object requests and determine ->bvec_count for each object 26208c2ecf20Sopenharmony_ci * request. Note that ->bvec_count sum over all object requests may 26218c2ecf20Sopenharmony_ci * be greater than the number of bio_vecs in the provided bio (list) 26228c2ecf20Sopenharmony_ci * or bio_vec array because when mapped, those bio_vecs can straddle 26238c2ecf20Sopenharmony_ci * stripe unit boundaries. 26248c2ecf20Sopenharmony_ci */ 26258c2ecf20Sopenharmony_ci fctx->iter = *fctx->pos; 26268c2ecf20Sopenharmony_ci for (i = 0; i < num_img_extents; i++) { 26278c2ecf20Sopenharmony_ci ret = ceph_file_to_extents(&rbd_dev->layout, 26288c2ecf20Sopenharmony_ci img_extents[i].fe_off, 26298c2ecf20Sopenharmony_ci img_extents[i].fe_len, 26308c2ecf20Sopenharmony_ci &img_req->object_extents, 26318c2ecf20Sopenharmony_ci alloc_object_extent, img_req, 26328c2ecf20Sopenharmony_ci fctx->count_fn, &fctx->iter); 26338c2ecf20Sopenharmony_ci if (ret) 26348c2ecf20Sopenharmony_ci return ret; 26358c2ecf20Sopenharmony_ci } 26368c2ecf20Sopenharmony_ci 26378c2ecf20Sopenharmony_ci for_each_obj_request(img_req, obj_req) { 26388c2ecf20Sopenharmony_ci obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count, 26398c2ecf20Sopenharmony_ci sizeof(*obj_req->bvec_pos.bvecs), 26408c2ecf20Sopenharmony_ci GFP_NOIO); 26418c2ecf20Sopenharmony_ci if (!obj_req->bvec_pos.bvecs) 26428c2ecf20Sopenharmony_ci return -ENOMEM; 26438c2ecf20Sopenharmony_ci } 26448c2ecf20Sopenharmony_ci 26458c2ecf20Sopenharmony_ci /* 26468c2ecf20Sopenharmony_ci * Fill in each object request's private bio_vec array, splitting and 26478c2ecf20Sopenharmony_ci * rearranging the provided bio_vecs in stripe unit chunks as needed. 26488c2ecf20Sopenharmony_ci */ 26498c2ecf20Sopenharmony_ci fctx->iter = *fctx->pos; 26508c2ecf20Sopenharmony_ci for (i = 0; i < num_img_extents; i++) { 26518c2ecf20Sopenharmony_ci ret = ceph_iterate_extents(&rbd_dev->layout, 26528c2ecf20Sopenharmony_ci img_extents[i].fe_off, 26538c2ecf20Sopenharmony_ci img_extents[i].fe_len, 26548c2ecf20Sopenharmony_ci &img_req->object_extents, 26558c2ecf20Sopenharmony_ci fctx->copy_fn, &fctx->iter); 26568c2ecf20Sopenharmony_ci if (ret) 26578c2ecf20Sopenharmony_ci return ret; 26588c2ecf20Sopenharmony_ci } 26598c2ecf20Sopenharmony_ci 26608c2ecf20Sopenharmony_ci return __rbd_img_fill_request(img_req); 26618c2ecf20Sopenharmony_ci} 26628c2ecf20Sopenharmony_ci 26638c2ecf20Sopenharmony_cistatic int rbd_img_fill_nodata(struct rbd_img_request *img_req, 26648c2ecf20Sopenharmony_ci u64 off, u64 len) 26658c2ecf20Sopenharmony_ci{ 26668c2ecf20Sopenharmony_ci struct ceph_file_extent ex = { off, len }; 26678c2ecf20Sopenharmony_ci union rbd_img_fill_iter dummy = {}; 26688c2ecf20Sopenharmony_ci struct rbd_img_fill_ctx fctx = { 26698c2ecf20Sopenharmony_ci .pos_type = OBJ_REQUEST_NODATA, 26708c2ecf20Sopenharmony_ci .pos = &dummy, 26718c2ecf20Sopenharmony_ci }; 26728c2ecf20Sopenharmony_ci 26738c2ecf20Sopenharmony_ci return rbd_img_fill_request(img_req, &ex, 1, &fctx); 26748c2ecf20Sopenharmony_ci} 26758c2ecf20Sopenharmony_ci 26768c2ecf20Sopenharmony_cistatic void set_bio_pos(struct ceph_object_extent *ex, u32 bytes, void *arg) 26778c2ecf20Sopenharmony_ci{ 26788c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = 26798c2ecf20Sopenharmony_ci container_of(ex, struct rbd_obj_request, ex); 26808c2ecf20Sopenharmony_ci struct ceph_bio_iter *it = arg; 26818c2ecf20Sopenharmony_ci 26828c2ecf20Sopenharmony_ci dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); 26838c2ecf20Sopenharmony_ci obj_req->bio_pos = *it; 26848c2ecf20Sopenharmony_ci ceph_bio_iter_advance(it, bytes); 26858c2ecf20Sopenharmony_ci} 26868c2ecf20Sopenharmony_ci 26878c2ecf20Sopenharmony_cistatic void count_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg) 26888c2ecf20Sopenharmony_ci{ 26898c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = 26908c2ecf20Sopenharmony_ci container_of(ex, struct rbd_obj_request, ex); 26918c2ecf20Sopenharmony_ci struct ceph_bio_iter *it = arg; 26928c2ecf20Sopenharmony_ci 26938c2ecf20Sopenharmony_ci dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); 26948c2ecf20Sopenharmony_ci ceph_bio_iter_advance_step(it, bytes, ({ 26958c2ecf20Sopenharmony_ci obj_req->bvec_count++; 26968c2ecf20Sopenharmony_ci })); 26978c2ecf20Sopenharmony_ci 26988c2ecf20Sopenharmony_ci} 26998c2ecf20Sopenharmony_ci 27008c2ecf20Sopenharmony_cistatic void copy_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg) 27018c2ecf20Sopenharmony_ci{ 27028c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = 27038c2ecf20Sopenharmony_ci container_of(ex, struct rbd_obj_request, ex); 27048c2ecf20Sopenharmony_ci struct ceph_bio_iter *it = arg; 27058c2ecf20Sopenharmony_ci 27068c2ecf20Sopenharmony_ci dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes); 27078c2ecf20Sopenharmony_ci ceph_bio_iter_advance_step(it, bytes, ({ 27088c2ecf20Sopenharmony_ci obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv; 27098c2ecf20Sopenharmony_ci obj_req->bvec_pos.iter.bi_size += bv.bv_len; 27108c2ecf20Sopenharmony_ci })); 27118c2ecf20Sopenharmony_ci} 27128c2ecf20Sopenharmony_ci 27138c2ecf20Sopenharmony_cistatic int __rbd_img_fill_from_bio(struct rbd_img_request *img_req, 27148c2ecf20Sopenharmony_ci struct ceph_file_extent *img_extents, 27158c2ecf20Sopenharmony_ci u32 num_img_extents, 27168c2ecf20Sopenharmony_ci struct ceph_bio_iter *bio_pos) 27178c2ecf20Sopenharmony_ci{ 27188c2ecf20Sopenharmony_ci struct rbd_img_fill_ctx fctx = { 27198c2ecf20Sopenharmony_ci .pos_type = OBJ_REQUEST_BIO, 27208c2ecf20Sopenharmony_ci .pos = (union rbd_img_fill_iter *)bio_pos, 27218c2ecf20Sopenharmony_ci .set_pos_fn = set_bio_pos, 27228c2ecf20Sopenharmony_ci .count_fn = count_bio_bvecs, 27238c2ecf20Sopenharmony_ci .copy_fn = copy_bio_bvecs, 27248c2ecf20Sopenharmony_ci }; 27258c2ecf20Sopenharmony_ci 27268c2ecf20Sopenharmony_ci return rbd_img_fill_request(img_req, img_extents, num_img_extents, 27278c2ecf20Sopenharmony_ci &fctx); 27288c2ecf20Sopenharmony_ci} 27298c2ecf20Sopenharmony_ci 27308c2ecf20Sopenharmony_cistatic int rbd_img_fill_from_bio(struct rbd_img_request *img_req, 27318c2ecf20Sopenharmony_ci u64 off, u64 len, struct bio *bio) 27328c2ecf20Sopenharmony_ci{ 27338c2ecf20Sopenharmony_ci struct ceph_file_extent ex = { off, len }; 27348c2ecf20Sopenharmony_ci struct ceph_bio_iter it = { .bio = bio, .iter = bio->bi_iter }; 27358c2ecf20Sopenharmony_ci 27368c2ecf20Sopenharmony_ci return __rbd_img_fill_from_bio(img_req, &ex, 1, &it); 27378c2ecf20Sopenharmony_ci} 27388c2ecf20Sopenharmony_ci 27398c2ecf20Sopenharmony_cistatic void set_bvec_pos(struct ceph_object_extent *ex, u32 bytes, void *arg) 27408c2ecf20Sopenharmony_ci{ 27418c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = 27428c2ecf20Sopenharmony_ci container_of(ex, struct rbd_obj_request, ex); 27438c2ecf20Sopenharmony_ci struct ceph_bvec_iter *it = arg; 27448c2ecf20Sopenharmony_ci 27458c2ecf20Sopenharmony_ci obj_req->bvec_pos = *it; 27468c2ecf20Sopenharmony_ci ceph_bvec_iter_shorten(&obj_req->bvec_pos, bytes); 27478c2ecf20Sopenharmony_ci ceph_bvec_iter_advance(it, bytes); 27488c2ecf20Sopenharmony_ci} 27498c2ecf20Sopenharmony_ci 27508c2ecf20Sopenharmony_cistatic void count_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg) 27518c2ecf20Sopenharmony_ci{ 27528c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = 27538c2ecf20Sopenharmony_ci container_of(ex, struct rbd_obj_request, ex); 27548c2ecf20Sopenharmony_ci struct ceph_bvec_iter *it = arg; 27558c2ecf20Sopenharmony_ci 27568c2ecf20Sopenharmony_ci ceph_bvec_iter_advance_step(it, bytes, ({ 27578c2ecf20Sopenharmony_ci obj_req->bvec_count++; 27588c2ecf20Sopenharmony_ci })); 27598c2ecf20Sopenharmony_ci} 27608c2ecf20Sopenharmony_ci 27618c2ecf20Sopenharmony_cistatic void copy_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg) 27628c2ecf20Sopenharmony_ci{ 27638c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = 27648c2ecf20Sopenharmony_ci container_of(ex, struct rbd_obj_request, ex); 27658c2ecf20Sopenharmony_ci struct ceph_bvec_iter *it = arg; 27668c2ecf20Sopenharmony_ci 27678c2ecf20Sopenharmony_ci ceph_bvec_iter_advance_step(it, bytes, ({ 27688c2ecf20Sopenharmony_ci obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv; 27698c2ecf20Sopenharmony_ci obj_req->bvec_pos.iter.bi_size += bv.bv_len; 27708c2ecf20Sopenharmony_ci })); 27718c2ecf20Sopenharmony_ci} 27728c2ecf20Sopenharmony_ci 27738c2ecf20Sopenharmony_cistatic int __rbd_img_fill_from_bvecs(struct rbd_img_request *img_req, 27748c2ecf20Sopenharmony_ci struct ceph_file_extent *img_extents, 27758c2ecf20Sopenharmony_ci u32 num_img_extents, 27768c2ecf20Sopenharmony_ci struct ceph_bvec_iter *bvec_pos) 27778c2ecf20Sopenharmony_ci{ 27788c2ecf20Sopenharmony_ci struct rbd_img_fill_ctx fctx = { 27798c2ecf20Sopenharmony_ci .pos_type = OBJ_REQUEST_BVECS, 27808c2ecf20Sopenharmony_ci .pos = (union rbd_img_fill_iter *)bvec_pos, 27818c2ecf20Sopenharmony_ci .set_pos_fn = set_bvec_pos, 27828c2ecf20Sopenharmony_ci .count_fn = count_bvecs, 27838c2ecf20Sopenharmony_ci .copy_fn = copy_bvecs, 27848c2ecf20Sopenharmony_ci }; 27858c2ecf20Sopenharmony_ci 27868c2ecf20Sopenharmony_ci return rbd_img_fill_request(img_req, img_extents, num_img_extents, 27878c2ecf20Sopenharmony_ci &fctx); 27888c2ecf20Sopenharmony_ci} 27898c2ecf20Sopenharmony_ci 27908c2ecf20Sopenharmony_cistatic int rbd_img_fill_from_bvecs(struct rbd_img_request *img_req, 27918c2ecf20Sopenharmony_ci struct ceph_file_extent *img_extents, 27928c2ecf20Sopenharmony_ci u32 num_img_extents, 27938c2ecf20Sopenharmony_ci struct bio_vec *bvecs) 27948c2ecf20Sopenharmony_ci{ 27958c2ecf20Sopenharmony_ci struct ceph_bvec_iter it = { 27968c2ecf20Sopenharmony_ci .bvecs = bvecs, 27978c2ecf20Sopenharmony_ci .iter = { .bi_size = ceph_file_extents_bytes(img_extents, 27988c2ecf20Sopenharmony_ci num_img_extents) }, 27998c2ecf20Sopenharmony_ci }; 28008c2ecf20Sopenharmony_ci 28018c2ecf20Sopenharmony_ci return __rbd_img_fill_from_bvecs(img_req, img_extents, num_img_extents, 28028c2ecf20Sopenharmony_ci &it); 28038c2ecf20Sopenharmony_ci} 28048c2ecf20Sopenharmony_ci 28058c2ecf20Sopenharmony_cistatic void rbd_img_handle_request_work(struct work_struct *work) 28068c2ecf20Sopenharmony_ci{ 28078c2ecf20Sopenharmony_ci struct rbd_img_request *img_req = 28088c2ecf20Sopenharmony_ci container_of(work, struct rbd_img_request, work); 28098c2ecf20Sopenharmony_ci 28108c2ecf20Sopenharmony_ci rbd_img_handle_request(img_req, img_req->work_result); 28118c2ecf20Sopenharmony_ci} 28128c2ecf20Sopenharmony_ci 28138c2ecf20Sopenharmony_cistatic void rbd_img_schedule(struct rbd_img_request *img_req, int result) 28148c2ecf20Sopenharmony_ci{ 28158c2ecf20Sopenharmony_ci INIT_WORK(&img_req->work, rbd_img_handle_request_work); 28168c2ecf20Sopenharmony_ci img_req->work_result = result; 28178c2ecf20Sopenharmony_ci queue_work(rbd_wq, &img_req->work); 28188c2ecf20Sopenharmony_ci} 28198c2ecf20Sopenharmony_ci 28208c2ecf20Sopenharmony_cistatic bool rbd_obj_may_exist(struct rbd_obj_request *obj_req) 28218c2ecf20Sopenharmony_ci{ 28228c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 28238c2ecf20Sopenharmony_ci 28248c2ecf20Sopenharmony_ci if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno)) { 28258c2ecf20Sopenharmony_ci obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST; 28268c2ecf20Sopenharmony_ci return true; 28278c2ecf20Sopenharmony_ci } 28288c2ecf20Sopenharmony_ci 28298c2ecf20Sopenharmony_ci dout("%s %p objno %llu assuming dne\n", __func__, obj_req, 28308c2ecf20Sopenharmony_ci obj_req->ex.oe_objno); 28318c2ecf20Sopenharmony_ci return false; 28328c2ecf20Sopenharmony_ci} 28338c2ecf20Sopenharmony_ci 28348c2ecf20Sopenharmony_cistatic int rbd_obj_read_object(struct rbd_obj_request *obj_req) 28358c2ecf20Sopenharmony_ci{ 28368c2ecf20Sopenharmony_ci struct ceph_osd_request *osd_req; 28378c2ecf20Sopenharmony_ci int ret; 28388c2ecf20Sopenharmony_ci 28398c2ecf20Sopenharmony_ci osd_req = __rbd_obj_add_osd_request(obj_req, NULL, 1); 28408c2ecf20Sopenharmony_ci if (IS_ERR(osd_req)) 28418c2ecf20Sopenharmony_ci return PTR_ERR(osd_req); 28428c2ecf20Sopenharmony_ci 28438c2ecf20Sopenharmony_ci osd_req_op_extent_init(osd_req, 0, CEPH_OSD_OP_READ, 28448c2ecf20Sopenharmony_ci obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0); 28458c2ecf20Sopenharmony_ci rbd_osd_setup_data(osd_req, 0); 28468c2ecf20Sopenharmony_ci rbd_osd_format_read(osd_req); 28478c2ecf20Sopenharmony_ci 28488c2ecf20Sopenharmony_ci ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO); 28498c2ecf20Sopenharmony_ci if (ret) 28508c2ecf20Sopenharmony_ci return ret; 28518c2ecf20Sopenharmony_ci 28528c2ecf20Sopenharmony_ci rbd_osd_submit(osd_req); 28538c2ecf20Sopenharmony_ci return 0; 28548c2ecf20Sopenharmony_ci} 28558c2ecf20Sopenharmony_ci 28568c2ecf20Sopenharmony_cistatic int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req) 28578c2ecf20Sopenharmony_ci{ 28588c2ecf20Sopenharmony_ci struct rbd_img_request *img_req = obj_req->img_request; 28598c2ecf20Sopenharmony_ci struct rbd_device *parent = img_req->rbd_dev->parent; 28608c2ecf20Sopenharmony_ci struct rbd_img_request *child_img_req; 28618c2ecf20Sopenharmony_ci int ret; 28628c2ecf20Sopenharmony_ci 28638c2ecf20Sopenharmony_ci child_img_req = kmem_cache_alloc(rbd_img_request_cache, GFP_NOIO); 28648c2ecf20Sopenharmony_ci if (!child_img_req) 28658c2ecf20Sopenharmony_ci return -ENOMEM; 28668c2ecf20Sopenharmony_ci 28678c2ecf20Sopenharmony_ci rbd_img_request_init(child_img_req, parent, OBJ_OP_READ); 28688c2ecf20Sopenharmony_ci __set_bit(IMG_REQ_CHILD, &child_img_req->flags); 28698c2ecf20Sopenharmony_ci child_img_req->obj_request = obj_req; 28708c2ecf20Sopenharmony_ci 28718c2ecf20Sopenharmony_ci down_read(&parent->header_rwsem); 28728c2ecf20Sopenharmony_ci rbd_img_capture_header(child_img_req); 28738c2ecf20Sopenharmony_ci up_read(&parent->header_rwsem); 28748c2ecf20Sopenharmony_ci 28758c2ecf20Sopenharmony_ci dout("%s child_img_req %p for obj_req %p\n", __func__, child_img_req, 28768c2ecf20Sopenharmony_ci obj_req); 28778c2ecf20Sopenharmony_ci 28788c2ecf20Sopenharmony_ci if (!rbd_img_is_write(img_req)) { 28798c2ecf20Sopenharmony_ci switch (img_req->data_type) { 28808c2ecf20Sopenharmony_ci case OBJ_REQUEST_BIO: 28818c2ecf20Sopenharmony_ci ret = __rbd_img_fill_from_bio(child_img_req, 28828c2ecf20Sopenharmony_ci obj_req->img_extents, 28838c2ecf20Sopenharmony_ci obj_req->num_img_extents, 28848c2ecf20Sopenharmony_ci &obj_req->bio_pos); 28858c2ecf20Sopenharmony_ci break; 28868c2ecf20Sopenharmony_ci case OBJ_REQUEST_BVECS: 28878c2ecf20Sopenharmony_ci case OBJ_REQUEST_OWN_BVECS: 28888c2ecf20Sopenharmony_ci ret = __rbd_img_fill_from_bvecs(child_img_req, 28898c2ecf20Sopenharmony_ci obj_req->img_extents, 28908c2ecf20Sopenharmony_ci obj_req->num_img_extents, 28918c2ecf20Sopenharmony_ci &obj_req->bvec_pos); 28928c2ecf20Sopenharmony_ci break; 28938c2ecf20Sopenharmony_ci default: 28948c2ecf20Sopenharmony_ci BUG(); 28958c2ecf20Sopenharmony_ci } 28968c2ecf20Sopenharmony_ci } else { 28978c2ecf20Sopenharmony_ci ret = rbd_img_fill_from_bvecs(child_img_req, 28988c2ecf20Sopenharmony_ci obj_req->img_extents, 28998c2ecf20Sopenharmony_ci obj_req->num_img_extents, 29008c2ecf20Sopenharmony_ci obj_req->copyup_bvecs); 29018c2ecf20Sopenharmony_ci } 29028c2ecf20Sopenharmony_ci if (ret) { 29038c2ecf20Sopenharmony_ci rbd_img_request_destroy(child_img_req); 29048c2ecf20Sopenharmony_ci return ret; 29058c2ecf20Sopenharmony_ci } 29068c2ecf20Sopenharmony_ci 29078c2ecf20Sopenharmony_ci /* avoid parent chain recursion */ 29088c2ecf20Sopenharmony_ci rbd_img_schedule(child_img_req, 0); 29098c2ecf20Sopenharmony_ci return 0; 29108c2ecf20Sopenharmony_ci} 29118c2ecf20Sopenharmony_ci 29128c2ecf20Sopenharmony_cistatic bool rbd_obj_advance_read(struct rbd_obj_request *obj_req, int *result) 29138c2ecf20Sopenharmony_ci{ 29148c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 29158c2ecf20Sopenharmony_ci int ret; 29168c2ecf20Sopenharmony_ci 29178c2ecf20Sopenharmony_ciagain: 29188c2ecf20Sopenharmony_ci switch (obj_req->read_state) { 29198c2ecf20Sopenharmony_ci case RBD_OBJ_READ_START: 29208c2ecf20Sopenharmony_ci rbd_assert(!*result); 29218c2ecf20Sopenharmony_ci 29228c2ecf20Sopenharmony_ci if (!rbd_obj_may_exist(obj_req)) { 29238c2ecf20Sopenharmony_ci *result = -ENOENT; 29248c2ecf20Sopenharmony_ci obj_req->read_state = RBD_OBJ_READ_OBJECT; 29258c2ecf20Sopenharmony_ci goto again; 29268c2ecf20Sopenharmony_ci } 29278c2ecf20Sopenharmony_ci 29288c2ecf20Sopenharmony_ci ret = rbd_obj_read_object(obj_req); 29298c2ecf20Sopenharmony_ci if (ret) { 29308c2ecf20Sopenharmony_ci *result = ret; 29318c2ecf20Sopenharmony_ci return true; 29328c2ecf20Sopenharmony_ci } 29338c2ecf20Sopenharmony_ci obj_req->read_state = RBD_OBJ_READ_OBJECT; 29348c2ecf20Sopenharmony_ci return false; 29358c2ecf20Sopenharmony_ci case RBD_OBJ_READ_OBJECT: 29368c2ecf20Sopenharmony_ci if (*result == -ENOENT && rbd_dev->parent_overlap) { 29378c2ecf20Sopenharmony_ci /* reverse map this object extent onto the parent */ 29388c2ecf20Sopenharmony_ci ret = rbd_obj_calc_img_extents(obj_req, false); 29398c2ecf20Sopenharmony_ci if (ret) { 29408c2ecf20Sopenharmony_ci *result = ret; 29418c2ecf20Sopenharmony_ci return true; 29428c2ecf20Sopenharmony_ci } 29438c2ecf20Sopenharmony_ci if (obj_req->num_img_extents) { 29448c2ecf20Sopenharmony_ci ret = rbd_obj_read_from_parent(obj_req); 29458c2ecf20Sopenharmony_ci if (ret) { 29468c2ecf20Sopenharmony_ci *result = ret; 29478c2ecf20Sopenharmony_ci return true; 29488c2ecf20Sopenharmony_ci } 29498c2ecf20Sopenharmony_ci obj_req->read_state = RBD_OBJ_READ_PARENT; 29508c2ecf20Sopenharmony_ci return false; 29518c2ecf20Sopenharmony_ci } 29528c2ecf20Sopenharmony_ci } 29538c2ecf20Sopenharmony_ci 29548c2ecf20Sopenharmony_ci /* 29558c2ecf20Sopenharmony_ci * -ENOENT means a hole in the image -- zero-fill the entire 29568c2ecf20Sopenharmony_ci * length of the request. A short read also implies zero-fill 29578c2ecf20Sopenharmony_ci * to the end of the request. 29588c2ecf20Sopenharmony_ci */ 29598c2ecf20Sopenharmony_ci if (*result == -ENOENT) { 29608c2ecf20Sopenharmony_ci rbd_obj_zero_range(obj_req, 0, obj_req->ex.oe_len); 29618c2ecf20Sopenharmony_ci *result = 0; 29628c2ecf20Sopenharmony_ci } else if (*result >= 0) { 29638c2ecf20Sopenharmony_ci if (*result < obj_req->ex.oe_len) 29648c2ecf20Sopenharmony_ci rbd_obj_zero_range(obj_req, *result, 29658c2ecf20Sopenharmony_ci obj_req->ex.oe_len - *result); 29668c2ecf20Sopenharmony_ci else 29678c2ecf20Sopenharmony_ci rbd_assert(*result == obj_req->ex.oe_len); 29688c2ecf20Sopenharmony_ci *result = 0; 29698c2ecf20Sopenharmony_ci } 29708c2ecf20Sopenharmony_ci return true; 29718c2ecf20Sopenharmony_ci case RBD_OBJ_READ_PARENT: 29728c2ecf20Sopenharmony_ci /* 29738c2ecf20Sopenharmony_ci * The parent image is read only up to the overlap -- zero-fill 29748c2ecf20Sopenharmony_ci * from the overlap to the end of the request. 29758c2ecf20Sopenharmony_ci */ 29768c2ecf20Sopenharmony_ci if (!*result) { 29778c2ecf20Sopenharmony_ci u32 obj_overlap = rbd_obj_img_extents_bytes(obj_req); 29788c2ecf20Sopenharmony_ci 29798c2ecf20Sopenharmony_ci if (obj_overlap < obj_req->ex.oe_len) 29808c2ecf20Sopenharmony_ci rbd_obj_zero_range(obj_req, obj_overlap, 29818c2ecf20Sopenharmony_ci obj_req->ex.oe_len - obj_overlap); 29828c2ecf20Sopenharmony_ci } 29838c2ecf20Sopenharmony_ci return true; 29848c2ecf20Sopenharmony_ci default: 29858c2ecf20Sopenharmony_ci BUG(); 29868c2ecf20Sopenharmony_ci } 29878c2ecf20Sopenharmony_ci} 29888c2ecf20Sopenharmony_ci 29898c2ecf20Sopenharmony_cistatic bool rbd_obj_write_is_noop(struct rbd_obj_request *obj_req) 29908c2ecf20Sopenharmony_ci{ 29918c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 29928c2ecf20Sopenharmony_ci 29938c2ecf20Sopenharmony_ci if (rbd_object_map_may_exist(rbd_dev, obj_req->ex.oe_objno)) 29948c2ecf20Sopenharmony_ci obj_req->flags |= RBD_OBJ_FLAG_MAY_EXIST; 29958c2ecf20Sopenharmony_ci 29968c2ecf20Sopenharmony_ci if (!(obj_req->flags & RBD_OBJ_FLAG_MAY_EXIST) && 29978c2ecf20Sopenharmony_ci (obj_req->flags & RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT)) { 29988c2ecf20Sopenharmony_ci dout("%s %p noop for nonexistent\n", __func__, obj_req); 29998c2ecf20Sopenharmony_ci return true; 30008c2ecf20Sopenharmony_ci } 30018c2ecf20Sopenharmony_ci 30028c2ecf20Sopenharmony_ci return false; 30038c2ecf20Sopenharmony_ci} 30048c2ecf20Sopenharmony_ci 30058c2ecf20Sopenharmony_ci/* 30068c2ecf20Sopenharmony_ci * Return: 30078c2ecf20Sopenharmony_ci * 0 - object map update sent 30088c2ecf20Sopenharmony_ci * 1 - object map update isn't needed 30098c2ecf20Sopenharmony_ci * <0 - error 30108c2ecf20Sopenharmony_ci */ 30118c2ecf20Sopenharmony_cistatic int rbd_obj_write_pre_object_map(struct rbd_obj_request *obj_req) 30128c2ecf20Sopenharmony_ci{ 30138c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 30148c2ecf20Sopenharmony_ci u8 new_state; 30158c2ecf20Sopenharmony_ci 30168c2ecf20Sopenharmony_ci if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) 30178c2ecf20Sopenharmony_ci return 1; 30188c2ecf20Sopenharmony_ci 30198c2ecf20Sopenharmony_ci if (obj_req->flags & RBD_OBJ_FLAG_DELETION) 30208c2ecf20Sopenharmony_ci new_state = OBJECT_PENDING; 30218c2ecf20Sopenharmony_ci else 30228c2ecf20Sopenharmony_ci new_state = OBJECT_EXISTS; 30238c2ecf20Sopenharmony_ci 30248c2ecf20Sopenharmony_ci return rbd_object_map_update(obj_req, CEPH_NOSNAP, new_state, NULL); 30258c2ecf20Sopenharmony_ci} 30268c2ecf20Sopenharmony_ci 30278c2ecf20Sopenharmony_cistatic int rbd_obj_write_object(struct rbd_obj_request *obj_req) 30288c2ecf20Sopenharmony_ci{ 30298c2ecf20Sopenharmony_ci struct ceph_osd_request *osd_req; 30308c2ecf20Sopenharmony_ci int num_ops = count_write_ops(obj_req); 30318c2ecf20Sopenharmony_ci int which = 0; 30328c2ecf20Sopenharmony_ci int ret; 30338c2ecf20Sopenharmony_ci 30348c2ecf20Sopenharmony_ci if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) 30358c2ecf20Sopenharmony_ci num_ops++; /* stat */ 30368c2ecf20Sopenharmony_ci 30378c2ecf20Sopenharmony_ci osd_req = rbd_obj_add_osd_request(obj_req, num_ops); 30388c2ecf20Sopenharmony_ci if (IS_ERR(osd_req)) 30398c2ecf20Sopenharmony_ci return PTR_ERR(osd_req); 30408c2ecf20Sopenharmony_ci 30418c2ecf20Sopenharmony_ci if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) { 30428c2ecf20Sopenharmony_ci ret = rbd_osd_setup_stat(osd_req, which++); 30438c2ecf20Sopenharmony_ci if (ret) 30448c2ecf20Sopenharmony_ci return ret; 30458c2ecf20Sopenharmony_ci } 30468c2ecf20Sopenharmony_ci 30478c2ecf20Sopenharmony_ci rbd_osd_setup_write_ops(osd_req, which); 30488c2ecf20Sopenharmony_ci rbd_osd_format_write(osd_req); 30498c2ecf20Sopenharmony_ci 30508c2ecf20Sopenharmony_ci ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO); 30518c2ecf20Sopenharmony_ci if (ret) 30528c2ecf20Sopenharmony_ci return ret; 30538c2ecf20Sopenharmony_ci 30548c2ecf20Sopenharmony_ci rbd_osd_submit(osd_req); 30558c2ecf20Sopenharmony_ci return 0; 30568c2ecf20Sopenharmony_ci} 30578c2ecf20Sopenharmony_ci 30588c2ecf20Sopenharmony_ci/* 30598c2ecf20Sopenharmony_ci * copyup_bvecs pages are never highmem pages 30608c2ecf20Sopenharmony_ci */ 30618c2ecf20Sopenharmony_cistatic bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes) 30628c2ecf20Sopenharmony_ci{ 30638c2ecf20Sopenharmony_ci struct ceph_bvec_iter it = { 30648c2ecf20Sopenharmony_ci .bvecs = bvecs, 30658c2ecf20Sopenharmony_ci .iter = { .bi_size = bytes }, 30668c2ecf20Sopenharmony_ci }; 30678c2ecf20Sopenharmony_ci 30688c2ecf20Sopenharmony_ci ceph_bvec_iter_advance_step(&it, bytes, ({ 30698c2ecf20Sopenharmony_ci if (memchr_inv(page_address(bv.bv_page) + bv.bv_offset, 0, 30708c2ecf20Sopenharmony_ci bv.bv_len)) 30718c2ecf20Sopenharmony_ci return false; 30728c2ecf20Sopenharmony_ci })); 30738c2ecf20Sopenharmony_ci return true; 30748c2ecf20Sopenharmony_ci} 30758c2ecf20Sopenharmony_ci 30768c2ecf20Sopenharmony_ci#define MODS_ONLY U32_MAX 30778c2ecf20Sopenharmony_ci 30788c2ecf20Sopenharmony_cistatic int rbd_obj_copyup_empty_snapc(struct rbd_obj_request *obj_req, 30798c2ecf20Sopenharmony_ci u32 bytes) 30808c2ecf20Sopenharmony_ci{ 30818c2ecf20Sopenharmony_ci struct ceph_osd_request *osd_req; 30828c2ecf20Sopenharmony_ci int ret; 30838c2ecf20Sopenharmony_ci 30848c2ecf20Sopenharmony_ci dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); 30858c2ecf20Sopenharmony_ci rbd_assert(bytes > 0 && bytes != MODS_ONLY); 30868c2ecf20Sopenharmony_ci 30878c2ecf20Sopenharmony_ci osd_req = __rbd_obj_add_osd_request(obj_req, &rbd_empty_snapc, 1); 30888c2ecf20Sopenharmony_ci if (IS_ERR(osd_req)) 30898c2ecf20Sopenharmony_ci return PTR_ERR(osd_req); 30908c2ecf20Sopenharmony_ci 30918c2ecf20Sopenharmony_ci ret = rbd_osd_setup_copyup(osd_req, 0, bytes); 30928c2ecf20Sopenharmony_ci if (ret) 30938c2ecf20Sopenharmony_ci return ret; 30948c2ecf20Sopenharmony_ci 30958c2ecf20Sopenharmony_ci rbd_osd_format_write(osd_req); 30968c2ecf20Sopenharmony_ci 30978c2ecf20Sopenharmony_ci ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO); 30988c2ecf20Sopenharmony_ci if (ret) 30998c2ecf20Sopenharmony_ci return ret; 31008c2ecf20Sopenharmony_ci 31018c2ecf20Sopenharmony_ci rbd_osd_submit(osd_req); 31028c2ecf20Sopenharmony_ci return 0; 31038c2ecf20Sopenharmony_ci} 31048c2ecf20Sopenharmony_ci 31058c2ecf20Sopenharmony_cistatic int rbd_obj_copyup_current_snapc(struct rbd_obj_request *obj_req, 31068c2ecf20Sopenharmony_ci u32 bytes) 31078c2ecf20Sopenharmony_ci{ 31088c2ecf20Sopenharmony_ci struct ceph_osd_request *osd_req; 31098c2ecf20Sopenharmony_ci int num_ops = count_write_ops(obj_req); 31108c2ecf20Sopenharmony_ci int which = 0; 31118c2ecf20Sopenharmony_ci int ret; 31128c2ecf20Sopenharmony_ci 31138c2ecf20Sopenharmony_ci dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); 31148c2ecf20Sopenharmony_ci 31158c2ecf20Sopenharmony_ci if (bytes != MODS_ONLY) 31168c2ecf20Sopenharmony_ci num_ops++; /* copyup */ 31178c2ecf20Sopenharmony_ci 31188c2ecf20Sopenharmony_ci osd_req = rbd_obj_add_osd_request(obj_req, num_ops); 31198c2ecf20Sopenharmony_ci if (IS_ERR(osd_req)) 31208c2ecf20Sopenharmony_ci return PTR_ERR(osd_req); 31218c2ecf20Sopenharmony_ci 31228c2ecf20Sopenharmony_ci if (bytes != MODS_ONLY) { 31238c2ecf20Sopenharmony_ci ret = rbd_osd_setup_copyup(osd_req, which++, bytes); 31248c2ecf20Sopenharmony_ci if (ret) 31258c2ecf20Sopenharmony_ci return ret; 31268c2ecf20Sopenharmony_ci } 31278c2ecf20Sopenharmony_ci 31288c2ecf20Sopenharmony_ci rbd_osd_setup_write_ops(osd_req, which); 31298c2ecf20Sopenharmony_ci rbd_osd_format_write(osd_req); 31308c2ecf20Sopenharmony_ci 31318c2ecf20Sopenharmony_ci ret = ceph_osdc_alloc_messages(osd_req, GFP_NOIO); 31328c2ecf20Sopenharmony_ci if (ret) 31338c2ecf20Sopenharmony_ci return ret; 31348c2ecf20Sopenharmony_ci 31358c2ecf20Sopenharmony_ci rbd_osd_submit(osd_req); 31368c2ecf20Sopenharmony_ci return 0; 31378c2ecf20Sopenharmony_ci} 31388c2ecf20Sopenharmony_ci 31398c2ecf20Sopenharmony_cistatic int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) 31408c2ecf20Sopenharmony_ci{ 31418c2ecf20Sopenharmony_ci u32 i; 31428c2ecf20Sopenharmony_ci 31438c2ecf20Sopenharmony_ci rbd_assert(!obj_req->copyup_bvecs); 31448c2ecf20Sopenharmony_ci obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap); 31458c2ecf20Sopenharmony_ci obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count, 31468c2ecf20Sopenharmony_ci sizeof(*obj_req->copyup_bvecs), 31478c2ecf20Sopenharmony_ci GFP_NOIO); 31488c2ecf20Sopenharmony_ci if (!obj_req->copyup_bvecs) 31498c2ecf20Sopenharmony_ci return -ENOMEM; 31508c2ecf20Sopenharmony_ci 31518c2ecf20Sopenharmony_ci for (i = 0; i < obj_req->copyup_bvec_count; i++) { 31528c2ecf20Sopenharmony_ci unsigned int len = min(obj_overlap, (u64)PAGE_SIZE); 31538c2ecf20Sopenharmony_ci 31548c2ecf20Sopenharmony_ci obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO); 31558c2ecf20Sopenharmony_ci if (!obj_req->copyup_bvecs[i].bv_page) 31568c2ecf20Sopenharmony_ci return -ENOMEM; 31578c2ecf20Sopenharmony_ci 31588c2ecf20Sopenharmony_ci obj_req->copyup_bvecs[i].bv_offset = 0; 31598c2ecf20Sopenharmony_ci obj_req->copyup_bvecs[i].bv_len = len; 31608c2ecf20Sopenharmony_ci obj_overlap -= len; 31618c2ecf20Sopenharmony_ci } 31628c2ecf20Sopenharmony_ci 31638c2ecf20Sopenharmony_ci rbd_assert(!obj_overlap); 31648c2ecf20Sopenharmony_ci return 0; 31658c2ecf20Sopenharmony_ci} 31668c2ecf20Sopenharmony_ci 31678c2ecf20Sopenharmony_ci/* 31688c2ecf20Sopenharmony_ci * The target object doesn't exist. Read the data for the entire 31698c2ecf20Sopenharmony_ci * target object up to the overlap point (if any) from the parent, 31708c2ecf20Sopenharmony_ci * so we can use it for a copyup. 31718c2ecf20Sopenharmony_ci */ 31728c2ecf20Sopenharmony_cistatic int rbd_obj_copyup_read_parent(struct rbd_obj_request *obj_req) 31738c2ecf20Sopenharmony_ci{ 31748c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 31758c2ecf20Sopenharmony_ci int ret; 31768c2ecf20Sopenharmony_ci 31778c2ecf20Sopenharmony_ci rbd_assert(obj_req->num_img_extents); 31788c2ecf20Sopenharmony_ci prune_extents(obj_req->img_extents, &obj_req->num_img_extents, 31798c2ecf20Sopenharmony_ci rbd_dev->parent_overlap); 31808c2ecf20Sopenharmony_ci if (!obj_req->num_img_extents) { 31818c2ecf20Sopenharmony_ci /* 31828c2ecf20Sopenharmony_ci * The overlap has become 0 (most likely because the 31838c2ecf20Sopenharmony_ci * image has been flattened). Re-submit the original write 31848c2ecf20Sopenharmony_ci * request -- pass MODS_ONLY since the copyup isn't needed 31858c2ecf20Sopenharmony_ci * anymore. 31868c2ecf20Sopenharmony_ci */ 31878c2ecf20Sopenharmony_ci return rbd_obj_copyup_current_snapc(obj_req, MODS_ONLY); 31888c2ecf20Sopenharmony_ci } 31898c2ecf20Sopenharmony_ci 31908c2ecf20Sopenharmony_ci ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req)); 31918c2ecf20Sopenharmony_ci if (ret) 31928c2ecf20Sopenharmony_ci return ret; 31938c2ecf20Sopenharmony_ci 31948c2ecf20Sopenharmony_ci return rbd_obj_read_from_parent(obj_req); 31958c2ecf20Sopenharmony_ci} 31968c2ecf20Sopenharmony_ci 31978c2ecf20Sopenharmony_cistatic void rbd_obj_copyup_object_maps(struct rbd_obj_request *obj_req) 31988c2ecf20Sopenharmony_ci{ 31998c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 32008c2ecf20Sopenharmony_ci struct ceph_snap_context *snapc = obj_req->img_request->snapc; 32018c2ecf20Sopenharmony_ci u8 new_state; 32028c2ecf20Sopenharmony_ci u32 i; 32038c2ecf20Sopenharmony_ci int ret; 32048c2ecf20Sopenharmony_ci 32058c2ecf20Sopenharmony_ci rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending); 32068c2ecf20Sopenharmony_ci 32078c2ecf20Sopenharmony_ci if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) 32088c2ecf20Sopenharmony_ci return; 32098c2ecf20Sopenharmony_ci 32108c2ecf20Sopenharmony_ci if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS) 32118c2ecf20Sopenharmony_ci return; 32128c2ecf20Sopenharmony_ci 32138c2ecf20Sopenharmony_ci for (i = 0; i < snapc->num_snaps; i++) { 32148c2ecf20Sopenharmony_ci if ((rbd_dev->header.features & RBD_FEATURE_FAST_DIFF) && 32158c2ecf20Sopenharmony_ci i + 1 < snapc->num_snaps) 32168c2ecf20Sopenharmony_ci new_state = OBJECT_EXISTS_CLEAN; 32178c2ecf20Sopenharmony_ci else 32188c2ecf20Sopenharmony_ci new_state = OBJECT_EXISTS; 32198c2ecf20Sopenharmony_ci 32208c2ecf20Sopenharmony_ci ret = rbd_object_map_update(obj_req, snapc->snaps[i], 32218c2ecf20Sopenharmony_ci new_state, NULL); 32228c2ecf20Sopenharmony_ci if (ret < 0) { 32238c2ecf20Sopenharmony_ci obj_req->pending.result = ret; 32248c2ecf20Sopenharmony_ci return; 32258c2ecf20Sopenharmony_ci } 32268c2ecf20Sopenharmony_ci 32278c2ecf20Sopenharmony_ci rbd_assert(!ret); 32288c2ecf20Sopenharmony_ci obj_req->pending.num_pending++; 32298c2ecf20Sopenharmony_ci } 32308c2ecf20Sopenharmony_ci} 32318c2ecf20Sopenharmony_ci 32328c2ecf20Sopenharmony_cistatic void rbd_obj_copyup_write_object(struct rbd_obj_request *obj_req) 32338c2ecf20Sopenharmony_ci{ 32348c2ecf20Sopenharmony_ci u32 bytes = rbd_obj_img_extents_bytes(obj_req); 32358c2ecf20Sopenharmony_ci int ret; 32368c2ecf20Sopenharmony_ci 32378c2ecf20Sopenharmony_ci rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending); 32388c2ecf20Sopenharmony_ci 32398c2ecf20Sopenharmony_ci /* 32408c2ecf20Sopenharmony_ci * Only send non-zero copyup data to save some I/O and network 32418c2ecf20Sopenharmony_ci * bandwidth -- zero copyup data is equivalent to the object not 32428c2ecf20Sopenharmony_ci * existing. 32438c2ecf20Sopenharmony_ci */ 32448c2ecf20Sopenharmony_ci if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS) 32458c2ecf20Sopenharmony_ci bytes = 0; 32468c2ecf20Sopenharmony_ci 32478c2ecf20Sopenharmony_ci if (obj_req->img_request->snapc->num_snaps && bytes > 0) { 32488c2ecf20Sopenharmony_ci /* 32498c2ecf20Sopenharmony_ci * Send a copyup request with an empty snapshot context to 32508c2ecf20Sopenharmony_ci * deep-copyup the object through all existing snapshots. 32518c2ecf20Sopenharmony_ci * A second request with the current snapshot context will be 32528c2ecf20Sopenharmony_ci * sent for the actual modification. 32538c2ecf20Sopenharmony_ci */ 32548c2ecf20Sopenharmony_ci ret = rbd_obj_copyup_empty_snapc(obj_req, bytes); 32558c2ecf20Sopenharmony_ci if (ret) { 32568c2ecf20Sopenharmony_ci obj_req->pending.result = ret; 32578c2ecf20Sopenharmony_ci return; 32588c2ecf20Sopenharmony_ci } 32598c2ecf20Sopenharmony_ci 32608c2ecf20Sopenharmony_ci obj_req->pending.num_pending++; 32618c2ecf20Sopenharmony_ci bytes = MODS_ONLY; 32628c2ecf20Sopenharmony_ci } 32638c2ecf20Sopenharmony_ci 32648c2ecf20Sopenharmony_ci ret = rbd_obj_copyup_current_snapc(obj_req, bytes); 32658c2ecf20Sopenharmony_ci if (ret) { 32668c2ecf20Sopenharmony_ci obj_req->pending.result = ret; 32678c2ecf20Sopenharmony_ci return; 32688c2ecf20Sopenharmony_ci } 32698c2ecf20Sopenharmony_ci 32708c2ecf20Sopenharmony_ci obj_req->pending.num_pending++; 32718c2ecf20Sopenharmony_ci} 32728c2ecf20Sopenharmony_ci 32738c2ecf20Sopenharmony_cistatic bool rbd_obj_advance_copyup(struct rbd_obj_request *obj_req, int *result) 32748c2ecf20Sopenharmony_ci{ 32758c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 32768c2ecf20Sopenharmony_ci int ret; 32778c2ecf20Sopenharmony_ci 32788c2ecf20Sopenharmony_ciagain: 32798c2ecf20Sopenharmony_ci switch (obj_req->copyup_state) { 32808c2ecf20Sopenharmony_ci case RBD_OBJ_COPYUP_START: 32818c2ecf20Sopenharmony_ci rbd_assert(!*result); 32828c2ecf20Sopenharmony_ci 32838c2ecf20Sopenharmony_ci ret = rbd_obj_copyup_read_parent(obj_req); 32848c2ecf20Sopenharmony_ci if (ret) { 32858c2ecf20Sopenharmony_ci *result = ret; 32868c2ecf20Sopenharmony_ci return true; 32878c2ecf20Sopenharmony_ci } 32888c2ecf20Sopenharmony_ci if (obj_req->num_img_extents) 32898c2ecf20Sopenharmony_ci obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT; 32908c2ecf20Sopenharmony_ci else 32918c2ecf20Sopenharmony_ci obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; 32928c2ecf20Sopenharmony_ci return false; 32938c2ecf20Sopenharmony_ci case RBD_OBJ_COPYUP_READ_PARENT: 32948c2ecf20Sopenharmony_ci if (*result) 32958c2ecf20Sopenharmony_ci return true; 32968c2ecf20Sopenharmony_ci 32978c2ecf20Sopenharmony_ci if (is_zero_bvecs(obj_req->copyup_bvecs, 32988c2ecf20Sopenharmony_ci rbd_obj_img_extents_bytes(obj_req))) { 32998c2ecf20Sopenharmony_ci dout("%s %p detected zeros\n", __func__, obj_req); 33008c2ecf20Sopenharmony_ci obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS; 33018c2ecf20Sopenharmony_ci } 33028c2ecf20Sopenharmony_ci 33038c2ecf20Sopenharmony_ci rbd_obj_copyup_object_maps(obj_req); 33048c2ecf20Sopenharmony_ci if (!obj_req->pending.num_pending) { 33058c2ecf20Sopenharmony_ci *result = obj_req->pending.result; 33068c2ecf20Sopenharmony_ci obj_req->copyup_state = RBD_OBJ_COPYUP_OBJECT_MAPS; 33078c2ecf20Sopenharmony_ci goto again; 33088c2ecf20Sopenharmony_ci } 33098c2ecf20Sopenharmony_ci obj_req->copyup_state = __RBD_OBJ_COPYUP_OBJECT_MAPS; 33108c2ecf20Sopenharmony_ci return false; 33118c2ecf20Sopenharmony_ci case __RBD_OBJ_COPYUP_OBJECT_MAPS: 33128c2ecf20Sopenharmony_ci if (!pending_result_dec(&obj_req->pending, result)) 33138c2ecf20Sopenharmony_ci return false; 33148c2ecf20Sopenharmony_ci fallthrough; 33158c2ecf20Sopenharmony_ci case RBD_OBJ_COPYUP_OBJECT_MAPS: 33168c2ecf20Sopenharmony_ci if (*result) { 33178c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "snap object map update failed: %d", 33188c2ecf20Sopenharmony_ci *result); 33198c2ecf20Sopenharmony_ci return true; 33208c2ecf20Sopenharmony_ci } 33218c2ecf20Sopenharmony_ci 33228c2ecf20Sopenharmony_ci rbd_obj_copyup_write_object(obj_req); 33238c2ecf20Sopenharmony_ci if (!obj_req->pending.num_pending) { 33248c2ecf20Sopenharmony_ci *result = obj_req->pending.result; 33258c2ecf20Sopenharmony_ci obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; 33268c2ecf20Sopenharmony_ci goto again; 33278c2ecf20Sopenharmony_ci } 33288c2ecf20Sopenharmony_ci obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT; 33298c2ecf20Sopenharmony_ci return false; 33308c2ecf20Sopenharmony_ci case __RBD_OBJ_COPYUP_WRITE_OBJECT: 33318c2ecf20Sopenharmony_ci if (!pending_result_dec(&obj_req->pending, result)) 33328c2ecf20Sopenharmony_ci return false; 33338c2ecf20Sopenharmony_ci fallthrough; 33348c2ecf20Sopenharmony_ci case RBD_OBJ_COPYUP_WRITE_OBJECT: 33358c2ecf20Sopenharmony_ci return true; 33368c2ecf20Sopenharmony_ci default: 33378c2ecf20Sopenharmony_ci BUG(); 33388c2ecf20Sopenharmony_ci } 33398c2ecf20Sopenharmony_ci} 33408c2ecf20Sopenharmony_ci 33418c2ecf20Sopenharmony_ci/* 33428c2ecf20Sopenharmony_ci * Return: 33438c2ecf20Sopenharmony_ci * 0 - object map update sent 33448c2ecf20Sopenharmony_ci * 1 - object map update isn't needed 33458c2ecf20Sopenharmony_ci * <0 - error 33468c2ecf20Sopenharmony_ci */ 33478c2ecf20Sopenharmony_cistatic int rbd_obj_write_post_object_map(struct rbd_obj_request *obj_req) 33488c2ecf20Sopenharmony_ci{ 33498c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 33508c2ecf20Sopenharmony_ci u8 current_state = OBJECT_PENDING; 33518c2ecf20Sopenharmony_ci 33528c2ecf20Sopenharmony_ci if (!(rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) 33538c2ecf20Sopenharmony_ci return 1; 33548c2ecf20Sopenharmony_ci 33558c2ecf20Sopenharmony_ci if (!(obj_req->flags & RBD_OBJ_FLAG_DELETION)) 33568c2ecf20Sopenharmony_ci return 1; 33578c2ecf20Sopenharmony_ci 33588c2ecf20Sopenharmony_ci return rbd_object_map_update(obj_req, CEPH_NOSNAP, OBJECT_NONEXISTENT, 33598c2ecf20Sopenharmony_ci ¤t_state); 33608c2ecf20Sopenharmony_ci} 33618c2ecf20Sopenharmony_ci 33628c2ecf20Sopenharmony_cistatic bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result) 33638c2ecf20Sopenharmony_ci{ 33648c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 33658c2ecf20Sopenharmony_ci int ret; 33668c2ecf20Sopenharmony_ci 33678c2ecf20Sopenharmony_ciagain: 33688c2ecf20Sopenharmony_ci switch (obj_req->write_state) { 33698c2ecf20Sopenharmony_ci case RBD_OBJ_WRITE_START: 33708c2ecf20Sopenharmony_ci rbd_assert(!*result); 33718c2ecf20Sopenharmony_ci 33728c2ecf20Sopenharmony_ci rbd_obj_set_copyup_enabled(obj_req); 33738c2ecf20Sopenharmony_ci if (rbd_obj_write_is_noop(obj_req)) 33748c2ecf20Sopenharmony_ci return true; 33758c2ecf20Sopenharmony_ci 33768c2ecf20Sopenharmony_ci ret = rbd_obj_write_pre_object_map(obj_req); 33778c2ecf20Sopenharmony_ci if (ret < 0) { 33788c2ecf20Sopenharmony_ci *result = ret; 33798c2ecf20Sopenharmony_ci return true; 33808c2ecf20Sopenharmony_ci } 33818c2ecf20Sopenharmony_ci obj_req->write_state = RBD_OBJ_WRITE_PRE_OBJECT_MAP; 33828c2ecf20Sopenharmony_ci if (ret > 0) 33838c2ecf20Sopenharmony_ci goto again; 33848c2ecf20Sopenharmony_ci return false; 33858c2ecf20Sopenharmony_ci case RBD_OBJ_WRITE_PRE_OBJECT_MAP: 33868c2ecf20Sopenharmony_ci if (*result) { 33878c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "pre object map update failed: %d", 33888c2ecf20Sopenharmony_ci *result); 33898c2ecf20Sopenharmony_ci return true; 33908c2ecf20Sopenharmony_ci } 33918c2ecf20Sopenharmony_ci ret = rbd_obj_write_object(obj_req); 33928c2ecf20Sopenharmony_ci if (ret) { 33938c2ecf20Sopenharmony_ci *result = ret; 33948c2ecf20Sopenharmony_ci return true; 33958c2ecf20Sopenharmony_ci } 33968c2ecf20Sopenharmony_ci obj_req->write_state = RBD_OBJ_WRITE_OBJECT; 33978c2ecf20Sopenharmony_ci return false; 33988c2ecf20Sopenharmony_ci case RBD_OBJ_WRITE_OBJECT: 33998c2ecf20Sopenharmony_ci if (*result == -ENOENT) { 34008c2ecf20Sopenharmony_ci if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) { 34018c2ecf20Sopenharmony_ci *result = 0; 34028c2ecf20Sopenharmony_ci obj_req->copyup_state = RBD_OBJ_COPYUP_START; 34038c2ecf20Sopenharmony_ci obj_req->write_state = __RBD_OBJ_WRITE_COPYUP; 34048c2ecf20Sopenharmony_ci goto again; 34058c2ecf20Sopenharmony_ci } 34068c2ecf20Sopenharmony_ci /* 34078c2ecf20Sopenharmony_ci * On a non-existent object: 34088c2ecf20Sopenharmony_ci * delete - -ENOENT, truncate/zero - 0 34098c2ecf20Sopenharmony_ci */ 34108c2ecf20Sopenharmony_ci if (obj_req->flags & RBD_OBJ_FLAG_DELETION) 34118c2ecf20Sopenharmony_ci *result = 0; 34128c2ecf20Sopenharmony_ci } 34138c2ecf20Sopenharmony_ci if (*result) 34148c2ecf20Sopenharmony_ci return true; 34158c2ecf20Sopenharmony_ci 34168c2ecf20Sopenharmony_ci obj_req->write_state = RBD_OBJ_WRITE_COPYUP; 34178c2ecf20Sopenharmony_ci goto again; 34188c2ecf20Sopenharmony_ci case __RBD_OBJ_WRITE_COPYUP: 34198c2ecf20Sopenharmony_ci if (!rbd_obj_advance_copyup(obj_req, result)) 34208c2ecf20Sopenharmony_ci return false; 34218c2ecf20Sopenharmony_ci fallthrough; 34228c2ecf20Sopenharmony_ci case RBD_OBJ_WRITE_COPYUP: 34238c2ecf20Sopenharmony_ci if (*result) { 34248c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "copyup failed: %d", *result); 34258c2ecf20Sopenharmony_ci return true; 34268c2ecf20Sopenharmony_ci } 34278c2ecf20Sopenharmony_ci ret = rbd_obj_write_post_object_map(obj_req); 34288c2ecf20Sopenharmony_ci if (ret < 0) { 34298c2ecf20Sopenharmony_ci *result = ret; 34308c2ecf20Sopenharmony_ci return true; 34318c2ecf20Sopenharmony_ci } 34328c2ecf20Sopenharmony_ci obj_req->write_state = RBD_OBJ_WRITE_POST_OBJECT_MAP; 34338c2ecf20Sopenharmony_ci if (ret > 0) 34348c2ecf20Sopenharmony_ci goto again; 34358c2ecf20Sopenharmony_ci return false; 34368c2ecf20Sopenharmony_ci case RBD_OBJ_WRITE_POST_OBJECT_MAP: 34378c2ecf20Sopenharmony_ci if (*result) 34388c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "post object map update failed: %d", 34398c2ecf20Sopenharmony_ci *result); 34408c2ecf20Sopenharmony_ci return true; 34418c2ecf20Sopenharmony_ci default: 34428c2ecf20Sopenharmony_ci BUG(); 34438c2ecf20Sopenharmony_ci } 34448c2ecf20Sopenharmony_ci} 34458c2ecf20Sopenharmony_ci 34468c2ecf20Sopenharmony_ci/* 34478c2ecf20Sopenharmony_ci * Return true if @obj_req is completed. 34488c2ecf20Sopenharmony_ci */ 34498c2ecf20Sopenharmony_cistatic bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req, 34508c2ecf20Sopenharmony_ci int *result) 34518c2ecf20Sopenharmony_ci{ 34528c2ecf20Sopenharmony_ci struct rbd_img_request *img_req = obj_req->img_request; 34538c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = img_req->rbd_dev; 34548c2ecf20Sopenharmony_ci bool done; 34558c2ecf20Sopenharmony_ci 34568c2ecf20Sopenharmony_ci mutex_lock(&obj_req->state_mutex); 34578c2ecf20Sopenharmony_ci if (!rbd_img_is_write(img_req)) 34588c2ecf20Sopenharmony_ci done = rbd_obj_advance_read(obj_req, result); 34598c2ecf20Sopenharmony_ci else 34608c2ecf20Sopenharmony_ci done = rbd_obj_advance_write(obj_req, result); 34618c2ecf20Sopenharmony_ci mutex_unlock(&obj_req->state_mutex); 34628c2ecf20Sopenharmony_ci 34638c2ecf20Sopenharmony_ci if (done && *result) { 34648c2ecf20Sopenharmony_ci rbd_assert(*result < 0); 34658c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "%s at objno %llu %llu~%llu result %d", 34668c2ecf20Sopenharmony_ci obj_op_name(img_req->op_type), obj_req->ex.oe_objno, 34678c2ecf20Sopenharmony_ci obj_req->ex.oe_off, obj_req->ex.oe_len, *result); 34688c2ecf20Sopenharmony_ci } 34698c2ecf20Sopenharmony_ci return done; 34708c2ecf20Sopenharmony_ci} 34718c2ecf20Sopenharmony_ci 34728c2ecf20Sopenharmony_ci/* 34738c2ecf20Sopenharmony_ci * This is open-coded in rbd_img_handle_request() to avoid parent chain 34748c2ecf20Sopenharmony_ci * recursion. 34758c2ecf20Sopenharmony_ci */ 34768c2ecf20Sopenharmony_cistatic void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result) 34778c2ecf20Sopenharmony_ci{ 34788c2ecf20Sopenharmony_ci if (__rbd_obj_handle_request(obj_req, &result)) 34798c2ecf20Sopenharmony_ci rbd_img_handle_request(obj_req->img_request, result); 34808c2ecf20Sopenharmony_ci} 34818c2ecf20Sopenharmony_ci 34828c2ecf20Sopenharmony_cistatic bool need_exclusive_lock(struct rbd_img_request *img_req) 34838c2ecf20Sopenharmony_ci{ 34848c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = img_req->rbd_dev; 34858c2ecf20Sopenharmony_ci 34868c2ecf20Sopenharmony_ci if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) 34878c2ecf20Sopenharmony_ci return false; 34888c2ecf20Sopenharmony_ci 34898c2ecf20Sopenharmony_ci if (rbd_is_ro(rbd_dev)) 34908c2ecf20Sopenharmony_ci return false; 34918c2ecf20Sopenharmony_ci 34928c2ecf20Sopenharmony_ci rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags)); 34938c2ecf20Sopenharmony_ci if (rbd_dev->opts->lock_on_read || 34948c2ecf20Sopenharmony_ci (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) 34958c2ecf20Sopenharmony_ci return true; 34968c2ecf20Sopenharmony_ci 34978c2ecf20Sopenharmony_ci return rbd_img_is_write(img_req); 34988c2ecf20Sopenharmony_ci} 34998c2ecf20Sopenharmony_ci 35008c2ecf20Sopenharmony_cistatic bool rbd_lock_add_request(struct rbd_img_request *img_req) 35018c2ecf20Sopenharmony_ci{ 35028c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = img_req->rbd_dev; 35038c2ecf20Sopenharmony_ci bool locked; 35048c2ecf20Sopenharmony_ci 35058c2ecf20Sopenharmony_ci lockdep_assert_held(&rbd_dev->lock_rwsem); 35068c2ecf20Sopenharmony_ci locked = rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED; 35078c2ecf20Sopenharmony_ci spin_lock(&rbd_dev->lock_lists_lock); 35088c2ecf20Sopenharmony_ci rbd_assert(list_empty(&img_req->lock_item)); 35098c2ecf20Sopenharmony_ci if (!locked) 35108c2ecf20Sopenharmony_ci list_add_tail(&img_req->lock_item, &rbd_dev->acquiring_list); 35118c2ecf20Sopenharmony_ci else 35128c2ecf20Sopenharmony_ci list_add_tail(&img_req->lock_item, &rbd_dev->running_list); 35138c2ecf20Sopenharmony_ci spin_unlock(&rbd_dev->lock_lists_lock); 35148c2ecf20Sopenharmony_ci return locked; 35158c2ecf20Sopenharmony_ci} 35168c2ecf20Sopenharmony_ci 35178c2ecf20Sopenharmony_cistatic void rbd_lock_del_request(struct rbd_img_request *img_req) 35188c2ecf20Sopenharmony_ci{ 35198c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = img_req->rbd_dev; 35208c2ecf20Sopenharmony_ci bool need_wakeup = false; 35218c2ecf20Sopenharmony_ci 35228c2ecf20Sopenharmony_ci lockdep_assert_held(&rbd_dev->lock_rwsem); 35238c2ecf20Sopenharmony_ci spin_lock(&rbd_dev->lock_lists_lock); 35248c2ecf20Sopenharmony_ci if (!list_empty(&img_req->lock_item)) { 35258c2ecf20Sopenharmony_ci list_del_init(&img_req->lock_item); 35268c2ecf20Sopenharmony_ci need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && 35278c2ecf20Sopenharmony_ci list_empty(&rbd_dev->running_list)); 35288c2ecf20Sopenharmony_ci } 35298c2ecf20Sopenharmony_ci spin_unlock(&rbd_dev->lock_lists_lock); 35308c2ecf20Sopenharmony_ci if (need_wakeup) 35318c2ecf20Sopenharmony_ci complete(&rbd_dev->releasing_wait); 35328c2ecf20Sopenharmony_ci} 35338c2ecf20Sopenharmony_ci 35348c2ecf20Sopenharmony_cistatic int rbd_img_exclusive_lock(struct rbd_img_request *img_req) 35358c2ecf20Sopenharmony_ci{ 35368c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = img_req->rbd_dev; 35378c2ecf20Sopenharmony_ci 35388c2ecf20Sopenharmony_ci if (!need_exclusive_lock(img_req)) 35398c2ecf20Sopenharmony_ci return 1; 35408c2ecf20Sopenharmony_ci 35418c2ecf20Sopenharmony_ci if (rbd_lock_add_request(img_req)) 35428c2ecf20Sopenharmony_ci return 1; 35438c2ecf20Sopenharmony_ci 35448c2ecf20Sopenharmony_ci if (rbd_dev->opts->exclusive) { 35458c2ecf20Sopenharmony_ci WARN_ON(1); /* lock got released? */ 35468c2ecf20Sopenharmony_ci return -EROFS; 35478c2ecf20Sopenharmony_ci } 35488c2ecf20Sopenharmony_ci 35498c2ecf20Sopenharmony_ci /* 35508c2ecf20Sopenharmony_ci * Note the use of mod_delayed_work() in rbd_acquire_lock() 35518c2ecf20Sopenharmony_ci * and cancel_delayed_work() in wake_lock_waiters(). 35528c2ecf20Sopenharmony_ci */ 35538c2ecf20Sopenharmony_ci dout("%s rbd_dev %p queueing lock_dwork\n", __func__, rbd_dev); 35548c2ecf20Sopenharmony_ci queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); 35558c2ecf20Sopenharmony_ci return 0; 35568c2ecf20Sopenharmony_ci} 35578c2ecf20Sopenharmony_ci 35588c2ecf20Sopenharmony_cistatic void rbd_img_object_requests(struct rbd_img_request *img_req) 35598c2ecf20Sopenharmony_ci{ 35608c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = img_req->rbd_dev; 35618c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req; 35628c2ecf20Sopenharmony_ci 35638c2ecf20Sopenharmony_ci rbd_assert(!img_req->pending.result && !img_req->pending.num_pending); 35648c2ecf20Sopenharmony_ci rbd_assert(!need_exclusive_lock(img_req) || 35658c2ecf20Sopenharmony_ci __rbd_is_lock_owner(rbd_dev)); 35668c2ecf20Sopenharmony_ci 35678c2ecf20Sopenharmony_ci if (rbd_img_is_write(img_req)) { 35688c2ecf20Sopenharmony_ci rbd_assert(!img_req->snapc); 35698c2ecf20Sopenharmony_ci down_read(&rbd_dev->header_rwsem); 35708c2ecf20Sopenharmony_ci img_req->snapc = ceph_get_snap_context(rbd_dev->header.snapc); 35718c2ecf20Sopenharmony_ci up_read(&rbd_dev->header_rwsem); 35728c2ecf20Sopenharmony_ci } 35738c2ecf20Sopenharmony_ci 35748c2ecf20Sopenharmony_ci for_each_obj_request(img_req, obj_req) { 35758c2ecf20Sopenharmony_ci int result = 0; 35768c2ecf20Sopenharmony_ci 35778c2ecf20Sopenharmony_ci if (__rbd_obj_handle_request(obj_req, &result)) { 35788c2ecf20Sopenharmony_ci if (result) { 35798c2ecf20Sopenharmony_ci img_req->pending.result = result; 35808c2ecf20Sopenharmony_ci return; 35818c2ecf20Sopenharmony_ci } 35828c2ecf20Sopenharmony_ci } else { 35838c2ecf20Sopenharmony_ci img_req->pending.num_pending++; 35848c2ecf20Sopenharmony_ci } 35858c2ecf20Sopenharmony_ci } 35868c2ecf20Sopenharmony_ci} 35878c2ecf20Sopenharmony_ci 35888c2ecf20Sopenharmony_cistatic bool rbd_img_advance(struct rbd_img_request *img_req, int *result) 35898c2ecf20Sopenharmony_ci{ 35908c2ecf20Sopenharmony_ci int ret; 35918c2ecf20Sopenharmony_ci 35928c2ecf20Sopenharmony_ciagain: 35938c2ecf20Sopenharmony_ci switch (img_req->state) { 35948c2ecf20Sopenharmony_ci case RBD_IMG_START: 35958c2ecf20Sopenharmony_ci rbd_assert(!*result); 35968c2ecf20Sopenharmony_ci 35978c2ecf20Sopenharmony_ci ret = rbd_img_exclusive_lock(img_req); 35988c2ecf20Sopenharmony_ci if (ret < 0) { 35998c2ecf20Sopenharmony_ci *result = ret; 36008c2ecf20Sopenharmony_ci return true; 36018c2ecf20Sopenharmony_ci } 36028c2ecf20Sopenharmony_ci img_req->state = RBD_IMG_EXCLUSIVE_LOCK; 36038c2ecf20Sopenharmony_ci if (ret > 0) 36048c2ecf20Sopenharmony_ci goto again; 36058c2ecf20Sopenharmony_ci return false; 36068c2ecf20Sopenharmony_ci case RBD_IMG_EXCLUSIVE_LOCK: 36078c2ecf20Sopenharmony_ci if (*result) 36088c2ecf20Sopenharmony_ci return true; 36098c2ecf20Sopenharmony_ci 36108c2ecf20Sopenharmony_ci rbd_img_object_requests(img_req); 36118c2ecf20Sopenharmony_ci if (!img_req->pending.num_pending) { 36128c2ecf20Sopenharmony_ci *result = img_req->pending.result; 36138c2ecf20Sopenharmony_ci img_req->state = RBD_IMG_OBJECT_REQUESTS; 36148c2ecf20Sopenharmony_ci goto again; 36158c2ecf20Sopenharmony_ci } 36168c2ecf20Sopenharmony_ci img_req->state = __RBD_IMG_OBJECT_REQUESTS; 36178c2ecf20Sopenharmony_ci return false; 36188c2ecf20Sopenharmony_ci case __RBD_IMG_OBJECT_REQUESTS: 36198c2ecf20Sopenharmony_ci if (!pending_result_dec(&img_req->pending, result)) 36208c2ecf20Sopenharmony_ci return false; 36218c2ecf20Sopenharmony_ci fallthrough; 36228c2ecf20Sopenharmony_ci case RBD_IMG_OBJECT_REQUESTS: 36238c2ecf20Sopenharmony_ci return true; 36248c2ecf20Sopenharmony_ci default: 36258c2ecf20Sopenharmony_ci BUG(); 36268c2ecf20Sopenharmony_ci } 36278c2ecf20Sopenharmony_ci} 36288c2ecf20Sopenharmony_ci 36298c2ecf20Sopenharmony_ci/* 36308c2ecf20Sopenharmony_ci * Return true if @img_req is completed. 36318c2ecf20Sopenharmony_ci */ 36328c2ecf20Sopenharmony_cistatic bool __rbd_img_handle_request(struct rbd_img_request *img_req, 36338c2ecf20Sopenharmony_ci int *result) 36348c2ecf20Sopenharmony_ci{ 36358c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = img_req->rbd_dev; 36368c2ecf20Sopenharmony_ci bool done; 36378c2ecf20Sopenharmony_ci 36388c2ecf20Sopenharmony_ci if (need_exclusive_lock(img_req)) { 36398c2ecf20Sopenharmony_ci down_read(&rbd_dev->lock_rwsem); 36408c2ecf20Sopenharmony_ci mutex_lock(&img_req->state_mutex); 36418c2ecf20Sopenharmony_ci done = rbd_img_advance(img_req, result); 36428c2ecf20Sopenharmony_ci if (done) 36438c2ecf20Sopenharmony_ci rbd_lock_del_request(img_req); 36448c2ecf20Sopenharmony_ci mutex_unlock(&img_req->state_mutex); 36458c2ecf20Sopenharmony_ci up_read(&rbd_dev->lock_rwsem); 36468c2ecf20Sopenharmony_ci } else { 36478c2ecf20Sopenharmony_ci mutex_lock(&img_req->state_mutex); 36488c2ecf20Sopenharmony_ci done = rbd_img_advance(img_req, result); 36498c2ecf20Sopenharmony_ci mutex_unlock(&img_req->state_mutex); 36508c2ecf20Sopenharmony_ci } 36518c2ecf20Sopenharmony_ci 36528c2ecf20Sopenharmony_ci if (done && *result) { 36538c2ecf20Sopenharmony_ci rbd_assert(*result < 0); 36548c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "%s%s result %d", 36558c2ecf20Sopenharmony_ci test_bit(IMG_REQ_CHILD, &img_req->flags) ? "child " : "", 36568c2ecf20Sopenharmony_ci obj_op_name(img_req->op_type), *result); 36578c2ecf20Sopenharmony_ci } 36588c2ecf20Sopenharmony_ci return done; 36598c2ecf20Sopenharmony_ci} 36608c2ecf20Sopenharmony_ci 36618c2ecf20Sopenharmony_cistatic void rbd_img_handle_request(struct rbd_img_request *img_req, int result) 36628c2ecf20Sopenharmony_ci{ 36638c2ecf20Sopenharmony_ciagain: 36648c2ecf20Sopenharmony_ci if (!__rbd_img_handle_request(img_req, &result)) 36658c2ecf20Sopenharmony_ci return; 36668c2ecf20Sopenharmony_ci 36678c2ecf20Sopenharmony_ci if (test_bit(IMG_REQ_CHILD, &img_req->flags)) { 36688c2ecf20Sopenharmony_ci struct rbd_obj_request *obj_req = img_req->obj_request; 36698c2ecf20Sopenharmony_ci 36708c2ecf20Sopenharmony_ci rbd_img_request_destroy(img_req); 36718c2ecf20Sopenharmony_ci if (__rbd_obj_handle_request(obj_req, &result)) { 36728c2ecf20Sopenharmony_ci img_req = obj_req->img_request; 36738c2ecf20Sopenharmony_ci goto again; 36748c2ecf20Sopenharmony_ci } 36758c2ecf20Sopenharmony_ci } else { 36768c2ecf20Sopenharmony_ci struct request *rq = blk_mq_rq_from_pdu(img_req); 36778c2ecf20Sopenharmony_ci 36788c2ecf20Sopenharmony_ci rbd_img_request_destroy(img_req); 36798c2ecf20Sopenharmony_ci blk_mq_end_request(rq, errno_to_blk_status(result)); 36808c2ecf20Sopenharmony_ci } 36818c2ecf20Sopenharmony_ci} 36828c2ecf20Sopenharmony_ci 36838c2ecf20Sopenharmony_cistatic const struct rbd_client_id rbd_empty_cid; 36848c2ecf20Sopenharmony_ci 36858c2ecf20Sopenharmony_cistatic bool rbd_cid_equal(const struct rbd_client_id *lhs, 36868c2ecf20Sopenharmony_ci const struct rbd_client_id *rhs) 36878c2ecf20Sopenharmony_ci{ 36888c2ecf20Sopenharmony_ci return lhs->gid == rhs->gid && lhs->handle == rhs->handle; 36898c2ecf20Sopenharmony_ci} 36908c2ecf20Sopenharmony_ci 36918c2ecf20Sopenharmony_cistatic struct rbd_client_id rbd_get_cid(struct rbd_device *rbd_dev) 36928c2ecf20Sopenharmony_ci{ 36938c2ecf20Sopenharmony_ci struct rbd_client_id cid; 36948c2ecf20Sopenharmony_ci 36958c2ecf20Sopenharmony_ci mutex_lock(&rbd_dev->watch_mutex); 36968c2ecf20Sopenharmony_ci cid.gid = ceph_client_gid(rbd_dev->rbd_client->client); 36978c2ecf20Sopenharmony_ci cid.handle = rbd_dev->watch_cookie; 36988c2ecf20Sopenharmony_ci mutex_unlock(&rbd_dev->watch_mutex); 36998c2ecf20Sopenharmony_ci return cid; 37008c2ecf20Sopenharmony_ci} 37018c2ecf20Sopenharmony_ci 37028c2ecf20Sopenharmony_ci/* 37038c2ecf20Sopenharmony_ci * lock_rwsem must be held for write 37048c2ecf20Sopenharmony_ci */ 37058c2ecf20Sopenharmony_cistatic void rbd_set_owner_cid(struct rbd_device *rbd_dev, 37068c2ecf20Sopenharmony_ci const struct rbd_client_id *cid) 37078c2ecf20Sopenharmony_ci{ 37088c2ecf20Sopenharmony_ci dout("%s rbd_dev %p %llu-%llu -> %llu-%llu\n", __func__, rbd_dev, 37098c2ecf20Sopenharmony_ci rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle, 37108c2ecf20Sopenharmony_ci cid->gid, cid->handle); 37118c2ecf20Sopenharmony_ci rbd_dev->owner_cid = *cid; /* struct */ 37128c2ecf20Sopenharmony_ci} 37138c2ecf20Sopenharmony_ci 37148c2ecf20Sopenharmony_cistatic void format_lock_cookie(struct rbd_device *rbd_dev, char *buf) 37158c2ecf20Sopenharmony_ci{ 37168c2ecf20Sopenharmony_ci mutex_lock(&rbd_dev->watch_mutex); 37178c2ecf20Sopenharmony_ci sprintf(buf, "%s %llu", RBD_LOCK_COOKIE_PREFIX, rbd_dev->watch_cookie); 37188c2ecf20Sopenharmony_ci mutex_unlock(&rbd_dev->watch_mutex); 37198c2ecf20Sopenharmony_ci} 37208c2ecf20Sopenharmony_ci 37218c2ecf20Sopenharmony_cistatic void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie) 37228c2ecf20Sopenharmony_ci{ 37238c2ecf20Sopenharmony_ci struct rbd_client_id cid = rbd_get_cid(rbd_dev); 37248c2ecf20Sopenharmony_ci 37258c2ecf20Sopenharmony_ci rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED; 37268c2ecf20Sopenharmony_ci strcpy(rbd_dev->lock_cookie, cookie); 37278c2ecf20Sopenharmony_ci rbd_set_owner_cid(rbd_dev, &cid); 37288c2ecf20Sopenharmony_ci queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work); 37298c2ecf20Sopenharmony_ci} 37308c2ecf20Sopenharmony_ci 37318c2ecf20Sopenharmony_ci/* 37328c2ecf20Sopenharmony_ci * lock_rwsem must be held for write 37338c2ecf20Sopenharmony_ci */ 37348c2ecf20Sopenharmony_cistatic int rbd_lock(struct rbd_device *rbd_dev) 37358c2ecf20Sopenharmony_ci{ 37368c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 37378c2ecf20Sopenharmony_ci char cookie[32]; 37388c2ecf20Sopenharmony_ci int ret; 37398c2ecf20Sopenharmony_ci 37408c2ecf20Sopenharmony_ci WARN_ON(__rbd_is_lock_owner(rbd_dev) || 37418c2ecf20Sopenharmony_ci rbd_dev->lock_cookie[0] != '\0'); 37428c2ecf20Sopenharmony_ci 37438c2ecf20Sopenharmony_ci format_lock_cookie(rbd_dev, cookie); 37448c2ecf20Sopenharmony_ci ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, 37458c2ecf20Sopenharmony_ci RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie, 37468c2ecf20Sopenharmony_ci RBD_LOCK_TAG, "", 0); 37478c2ecf20Sopenharmony_ci if (ret && ret != -EEXIST) 37488c2ecf20Sopenharmony_ci return ret; 37498c2ecf20Sopenharmony_ci 37508c2ecf20Sopenharmony_ci __rbd_lock(rbd_dev, cookie); 37518c2ecf20Sopenharmony_ci return 0; 37528c2ecf20Sopenharmony_ci} 37538c2ecf20Sopenharmony_ci 37548c2ecf20Sopenharmony_ci/* 37558c2ecf20Sopenharmony_ci * lock_rwsem must be held for write 37568c2ecf20Sopenharmony_ci */ 37578c2ecf20Sopenharmony_cistatic void rbd_unlock(struct rbd_device *rbd_dev) 37588c2ecf20Sopenharmony_ci{ 37598c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 37608c2ecf20Sopenharmony_ci int ret; 37618c2ecf20Sopenharmony_ci 37628c2ecf20Sopenharmony_ci WARN_ON(!__rbd_is_lock_owner(rbd_dev) || 37638c2ecf20Sopenharmony_ci rbd_dev->lock_cookie[0] == '\0'); 37648c2ecf20Sopenharmony_ci 37658c2ecf20Sopenharmony_ci ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, 37668c2ecf20Sopenharmony_ci RBD_LOCK_NAME, rbd_dev->lock_cookie); 37678c2ecf20Sopenharmony_ci if (ret && ret != -ENOENT) 37688c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to unlock header: %d", ret); 37698c2ecf20Sopenharmony_ci 37708c2ecf20Sopenharmony_ci /* treat errors as the image is unlocked */ 37718c2ecf20Sopenharmony_ci rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED; 37728c2ecf20Sopenharmony_ci rbd_dev->lock_cookie[0] = '\0'; 37738c2ecf20Sopenharmony_ci rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); 37748c2ecf20Sopenharmony_ci queue_work(rbd_dev->task_wq, &rbd_dev->released_lock_work); 37758c2ecf20Sopenharmony_ci} 37768c2ecf20Sopenharmony_ci 37778c2ecf20Sopenharmony_cistatic int __rbd_notify_op_lock(struct rbd_device *rbd_dev, 37788c2ecf20Sopenharmony_ci enum rbd_notify_op notify_op, 37798c2ecf20Sopenharmony_ci struct page ***preply_pages, 37808c2ecf20Sopenharmony_ci size_t *preply_len) 37818c2ecf20Sopenharmony_ci{ 37828c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 37838c2ecf20Sopenharmony_ci struct rbd_client_id cid = rbd_get_cid(rbd_dev); 37848c2ecf20Sopenharmony_ci char buf[4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN]; 37858c2ecf20Sopenharmony_ci int buf_size = sizeof(buf); 37868c2ecf20Sopenharmony_ci void *p = buf; 37878c2ecf20Sopenharmony_ci 37888c2ecf20Sopenharmony_ci dout("%s rbd_dev %p notify_op %d\n", __func__, rbd_dev, notify_op); 37898c2ecf20Sopenharmony_ci 37908c2ecf20Sopenharmony_ci /* encode *LockPayload NotifyMessage (op + ClientId) */ 37918c2ecf20Sopenharmony_ci ceph_start_encoding(&p, 2, 1, buf_size - CEPH_ENCODING_START_BLK_LEN); 37928c2ecf20Sopenharmony_ci ceph_encode_32(&p, notify_op); 37938c2ecf20Sopenharmony_ci ceph_encode_64(&p, cid.gid); 37948c2ecf20Sopenharmony_ci ceph_encode_64(&p, cid.handle); 37958c2ecf20Sopenharmony_ci 37968c2ecf20Sopenharmony_ci return ceph_osdc_notify(osdc, &rbd_dev->header_oid, 37978c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, buf, buf_size, 37988c2ecf20Sopenharmony_ci RBD_NOTIFY_TIMEOUT, preply_pages, preply_len); 37998c2ecf20Sopenharmony_ci} 38008c2ecf20Sopenharmony_ci 38018c2ecf20Sopenharmony_cistatic void rbd_notify_op_lock(struct rbd_device *rbd_dev, 38028c2ecf20Sopenharmony_ci enum rbd_notify_op notify_op) 38038c2ecf20Sopenharmony_ci{ 38048c2ecf20Sopenharmony_ci __rbd_notify_op_lock(rbd_dev, notify_op, NULL, NULL); 38058c2ecf20Sopenharmony_ci} 38068c2ecf20Sopenharmony_ci 38078c2ecf20Sopenharmony_cistatic void rbd_notify_acquired_lock(struct work_struct *work) 38088c2ecf20Sopenharmony_ci{ 38098c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = container_of(work, struct rbd_device, 38108c2ecf20Sopenharmony_ci acquired_lock_work); 38118c2ecf20Sopenharmony_ci 38128c2ecf20Sopenharmony_ci rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_ACQUIRED_LOCK); 38138c2ecf20Sopenharmony_ci} 38148c2ecf20Sopenharmony_ci 38158c2ecf20Sopenharmony_cistatic void rbd_notify_released_lock(struct work_struct *work) 38168c2ecf20Sopenharmony_ci{ 38178c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = container_of(work, struct rbd_device, 38188c2ecf20Sopenharmony_ci released_lock_work); 38198c2ecf20Sopenharmony_ci 38208c2ecf20Sopenharmony_ci rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_RELEASED_LOCK); 38218c2ecf20Sopenharmony_ci} 38228c2ecf20Sopenharmony_ci 38238c2ecf20Sopenharmony_cistatic int rbd_request_lock(struct rbd_device *rbd_dev) 38248c2ecf20Sopenharmony_ci{ 38258c2ecf20Sopenharmony_ci struct page **reply_pages; 38268c2ecf20Sopenharmony_ci size_t reply_len; 38278c2ecf20Sopenharmony_ci bool lock_owner_responded = false; 38288c2ecf20Sopenharmony_ci int ret; 38298c2ecf20Sopenharmony_ci 38308c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 38318c2ecf20Sopenharmony_ci 38328c2ecf20Sopenharmony_ci ret = __rbd_notify_op_lock(rbd_dev, RBD_NOTIFY_OP_REQUEST_LOCK, 38338c2ecf20Sopenharmony_ci &reply_pages, &reply_len); 38348c2ecf20Sopenharmony_ci if (ret && ret != -ETIMEDOUT) { 38358c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to request lock: %d", ret); 38368c2ecf20Sopenharmony_ci goto out; 38378c2ecf20Sopenharmony_ci } 38388c2ecf20Sopenharmony_ci 38398c2ecf20Sopenharmony_ci if (reply_len > 0 && reply_len <= PAGE_SIZE) { 38408c2ecf20Sopenharmony_ci void *p = page_address(reply_pages[0]); 38418c2ecf20Sopenharmony_ci void *const end = p + reply_len; 38428c2ecf20Sopenharmony_ci u32 n; 38438c2ecf20Sopenharmony_ci 38448c2ecf20Sopenharmony_ci ceph_decode_32_safe(&p, end, n, e_inval); /* num_acks */ 38458c2ecf20Sopenharmony_ci while (n--) { 38468c2ecf20Sopenharmony_ci u8 struct_v; 38478c2ecf20Sopenharmony_ci u32 len; 38488c2ecf20Sopenharmony_ci 38498c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, 8 + 8, e_inval); 38508c2ecf20Sopenharmony_ci p += 8 + 8; /* skip gid and cookie */ 38518c2ecf20Sopenharmony_ci 38528c2ecf20Sopenharmony_ci ceph_decode_32_safe(&p, end, len, e_inval); 38538c2ecf20Sopenharmony_ci if (!len) 38548c2ecf20Sopenharmony_ci continue; 38558c2ecf20Sopenharmony_ci 38568c2ecf20Sopenharmony_ci if (lock_owner_responded) { 38578c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, 38588c2ecf20Sopenharmony_ci "duplicate lock owners detected"); 38598c2ecf20Sopenharmony_ci ret = -EIO; 38608c2ecf20Sopenharmony_ci goto out; 38618c2ecf20Sopenharmony_ci } 38628c2ecf20Sopenharmony_ci 38638c2ecf20Sopenharmony_ci lock_owner_responded = true; 38648c2ecf20Sopenharmony_ci ret = ceph_start_decoding(&p, end, 1, "ResponseMessage", 38658c2ecf20Sopenharmony_ci &struct_v, &len); 38668c2ecf20Sopenharmony_ci if (ret) { 38678c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, 38688c2ecf20Sopenharmony_ci "failed to decode ResponseMessage: %d", 38698c2ecf20Sopenharmony_ci ret); 38708c2ecf20Sopenharmony_ci goto e_inval; 38718c2ecf20Sopenharmony_ci } 38728c2ecf20Sopenharmony_ci 38738c2ecf20Sopenharmony_ci ret = ceph_decode_32(&p); 38748c2ecf20Sopenharmony_ci } 38758c2ecf20Sopenharmony_ci } 38768c2ecf20Sopenharmony_ci 38778c2ecf20Sopenharmony_ci if (!lock_owner_responded) { 38788c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "no lock owners detected"); 38798c2ecf20Sopenharmony_ci ret = -ETIMEDOUT; 38808c2ecf20Sopenharmony_ci } 38818c2ecf20Sopenharmony_ci 38828c2ecf20Sopenharmony_ciout: 38838c2ecf20Sopenharmony_ci ceph_release_page_vector(reply_pages, calc_pages_for(0, reply_len)); 38848c2ecf20Sopenharmony_ci return ret; 38858c2ecf20Sopenharmony_ci 38868c2ecf20Sopenharmony_cie_inval: 38878c2ecf20Sopenharmony_ci ret = -EINVAL; 38888c2ecf20Sopenharmony_ci goto out; 38898c2ecf20Sopenharmony_ci} 38908c2ecf20Sopenharmony_ci 38918c2ecf20Sopenharmony_ci/* 38928c2ecf20Sopenharmony_ci * Either image request state machine(s) or rbd_add_acquire_lock() 38938c2ecf20Sopenharmony_ci * (i.e. "rbd map"). 38948c2ecf20Sopenharmony_ci */ 38958c2ecf20Sopenharmony_cistatic void wake_lock_waiters(struct rbd_device *rbd_dev, int result) 38968c2ecf20Sopenharmony_ci{ 38978c2ecf20Sopenharmony_ci struct rbd_img_request *img_req; 38988c2ecf20Sopenharmony_ci 38998c2ecf20Sopenharmony_ci dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result); 39008c2ecf20Sopenharmony_ci lockdep_assert_held_write(&rbd_dev->lock_rwsem); 39018c2ecf20Sopenharmony_ci 39028c2ecf20Sopenharmony_ci cancel_delayed_work(&rbd_dev->lock_dwork); 39038c2ecf20Sopenharmony_ci if (!completion_done(&rbd_dev->acquire_wait)) { 39048c2ecf20Sopenharmony_ci rbd_assert(list_empty(&rbd_dev->acquiring_list) && 39058c2ecf20Sopenharmony_ci list_empty(&rbd_dev->running_list)); 39068c2ecf20Sopenharmony_ci rbd_dev->acquire_err = result; 39078c2ecf20Sopenharmony_ci complete_all(&rbd_dev->acquire_wait); 39088c2ecf20Sopenharmony_ci return; 39098c2ecf20Sopenharmony_ci } 39108c2ecf20Sopenharmony_ci 39118c2ecf20Sopenharmony_ci while (!list_empty(&rbd_dev->acquiring_list)) { 39128c2ecf20Sopenharmony_ci img_req = list_first_entry(&rbd_dev->acquiring_list, 39138c2ecf20Sopenharmony_ci struct rbd_img_request, lock_item); 39148c2ecf20Sopenharmony_ci mutex_lock(&img_req->state_mutex); 39158c2ecf20Sopenharmony_ci rbd_assert(img_req->state == RBD_IMG_EXCLUSIVE_LOCK); 39168c2ecf20Sopenharmony_ci if (!result) 39178c2ecf20Sopenharmony_ci list_move_tail(&img_req->lock_item, 39188c2ecf20Sopenharmony_ci &rbd_dev->running_list); 39198c2ecf20Sopenharmony_ci else 39208c2ecf20Sopenharmony_ci list_del_init(&img_req->lock_item); 39218c2ecf20Sopenharmony_ci rbd_img_schedule(img_req, result); 39228c2ecf20Sopenharmony_ci mutex_unlock(&img_req->state_mutex); 39238c2ecf20Sopenharmony_ci } 39248c2ecf20Sopenharmony_ci} 39258c2ecf20Sopenharmony_ci 39268c2ecf20Sopenharmony_cistatic bool locker_equal(const struct ceph_locker *lhs, 39278c2ecf20Sopenharmony_ci const struct ceph_locker *rhs) 39288c2ecf20Sopenharmony_ci{ 39298c2ecf20Sopenharmony_ci return lhs->id.name.type == rhs->id.name.type && 39308c2ecf20Sopenharmony_ci lhs->id.name.num == rhs->id.name.num && 39318c2ecf20Sopenharmony_ci !strcmp(lhs->id.cookie, rhs->id.cookie) && 39328c2ecf20Sopenharmony_ci ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr); 39338c2ecf20Sopenharmony_ci} 39348c2ecf20Sopenharmony_ci 39358c2ecf20Sopenharmony_cistatic void free_locker(struct ceph_locker *locker) 39368c2ecf20Sopenharmony_ci{ 39378c2ecf20Sopenharmony_ci if (locker) 39388c2ecf20Sopenharmony_ci ceph_free_lockers(locker, 1); 39398c2ecf20Sopenharmony_ci} 39408c2ecf20Sopenharmony_ci 39418c2ecf20Sopenharmony_cistatic struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev) 39428c2ecf20Sopenharmony_ci{ 39438c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 39448c2ecf20Sopenharmony_ci struct ceph_locker *lockers; 39458c2ecf20Sopenharmony_ci u32 num_lockers; 39468c2ecf20Sopenharmony_ci u8 lock_type; 39478c2ecf20Sopenharmony_ci char *lock_tag; 39488c2ecf20Sopenharmony_ci int ret; 39498c2ecf20Sopenharmony_ci 39508c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 39518c2ecf20Sopenharmony_ci 39528c2ecf20Sopenharmony_ci ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid, 39538c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, RBD_LOCK_NAME, 39548c2ecf20Sopenharmony_ci &lock_type, &lock_tag, &lockers, &num_lockers); 39558c2ecf20Sopenharmony_ci if (ret) { 39568c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to get header lockers: %d", ret); 39578c2ecf20Sopenharmony_ci return ERR_PTR(ret); 39588c2ecf20Sopenharmony_ci } 39598c2ecf20Sopenharmony_ci 39608c2ecf20Sopenharmony_ci if (num_lockers == 0) { 39618c2ecf20Sopenharmony_ci dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev); 39628c2ecf20Sopenharmony_ci lockers = NULL; 39638c2ecf20Sopenharmony_ci goto out; 39648c2ecf20Sopenharmony_ci } 39658c2ecf20Sopenharmony_ci 39668c2ecf20Sopenharmony_ci if (strcmp(lock_tag, RBD_LOCK_TAG)) { 39678c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "locked by external mechanism, tag %s", 39688c2ecf20Sopenharmony_ci lock_tag); 39698c2ecf20Sopenharmony_ci goto err_busy; 39708c2ecf20Sopenharmony_ci } 39718c2ecf20Sopenharmony_ci 39728c2ecf20Sopenharmony_ci if (lock_type == CEPH_CLS_LOCK_SHARED) { 39738c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "shared lock type detected"); 39748c2ecf20Sopenharmony_ci goto err_busy; 39758c2ecf20Sopenharmony_ci } 39768c2ecf20Sopenharmony_ci 39778c2ecf20Sopenharmony_ci WARN_ON(num_lockers != 1); 39788c2ecf20Sopenharmony_ci if (strncmp(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX, 39798c2ecf20Sopenharmony_ci strlen(RBD_LOCK_COOKIE_PREFIX))) { 39808c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "locked by external mechanism, cookie %s", 39818c2ecf20Sopenharmony_ci lockers[0].id.cookie); 39828c2ecf20Sopenharmony_ci goto err_busy; 39838c2ecf20Sopenharmony_ci } 39848c2ecf20Sopenharmony_ci 39858c2ecf20Sopenharmony_ciout: 39868c2ecf20Sopenharmony_ci kfree(lock_tag); 39878c2ecf20Sopenharmony_ci return lockers; 39888c2ecf20Sopenharmony_ci 39898c2ecf20Sopenharmony_cierr_busy: 39908c2ecf20Sopenharmony_ci kfree(lock_tag); 39918c2ecf20Sopenharmony_ci ceph_free_lockers(lockers, num_lockers); 39928c2ecf20Sopenharmony_ci return ERR_PTR(-EBUSY); 39938c2ecf20Sopenharmony_ci} 39948c2ecf20Sopenharmony_ci 39958c2ecf20Sopenharmony_cistatic int find_watcher(struct rbd_device *rbd_dev, 39968c2ecf20Sopenharmony_ci const struct ceph_locker *locker) 39978c2ecf20Sopenharmony_ci{ 39988c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 39998c2ecf20Sopenharmony_ci struct ceph_watch_item *watchers; 40008c2ecf20Sopenharmony_ci u32 num_watchers; 40018c2ecf20Sopenharmony_ci u64 cookie; 40028c2ecf20Sopenharmony_ci int i; 40038c2ecf20Sopenharmony_ci int ret; 40048c2ecf20Sopenharmony_ci 40058c2ecf20Sopenharmony_ci ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid, 40068c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, &watchers, 40078c2ecf20Sopenharmony_ci &num_watchers); 40088c2ecf20Sopenharmony_ci if (ret) { 40098c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to get watchers: %d", ret); 40108c2ecf20Sopenharmony_ci return ret; 40118c2ecf20Sopenharmony_ci } 40128c2ecf20Sopenharmony_ci 40138c2ecf20Sopenharmony_ci sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie); 40148c2ecf20Sopenharmony_ci for (i = 0; i < num_watchers; i++) { 40158c2ecf20Sopenharmony_ci /* 40168c2ecf20Sopenharmony_ci * Ignore addr->type while comparing. This mimics 40178c2ecf20Sopenharmony_ci * entity_addr_t::get_legacy_str() + strcmp(). 40188c2ecf20Sopenharmony_ci */ 40198c2ecf20Sopenharmony_ci if (ceph_addr_equal_no_type(&watchers[i].addr, 40208c2ecf20Sopenharmony_ci &locker->info.addr) && 40218c2ecf20Sopenharmony_ci watchers[i].cookie == cookie) { 40228c2ecf20Sopenharmony_ci struct rbd_client_id cid = { 40238c2ecf20Sopenharmony_ci .gid = le64_to_cpu(watchers[i].name.num), 40248c2ecf20Sopenharmony_ci .handle = cookie, 40258c2ecf20Sopenharmony_ci }; 40268c2ecf20Sopenharmony_ci 40278c2ecf20Sopenharmony_ci dout("%s rbd_dev %p found cid %llu-%llu\n", __func__, 40288c2ecf20Sopenharmony_ci rbd_dev, cid.gid, cid.handle); 40298c2ecf20Sopenharmony_ci rbd_set_owner_cid(rbd_dev, &cid); 40308c2ecf20Sopenharmony_ci ret = 1; 40318c2ecf20Sopenharmony_ci goto out; 40328c2ecf20Sopenharmony_ci } 40338c2ecf20Sopenharmony_ci } 40348c2ecf20Sopenharmony_ci 40358c2ecf20Sopenharmony_ci dout("%s rbd_dev %p no watchers\n", __func__, rbd_dev); 40368c2ecf20Sopenharmony_ci ret = 0; 40378c2ecf20Sopenharmony_ciout: 40388c2ecf20Sopenharmony_ci kfree(watchers); 40398c2ecf20Sopenharmony_ci return ret; 40408c2ecf20Sopenharmony_ci} 40418c2ecf20Sopenharmony_ci 40428c2ecf20Sopenharmony_ci/* 40438c2ecf20Sopenharmony_ci * lock_rwsem must be held for write 40448c2ecf20Sopenharmony_ci */ 40458c2ecf20Sopenharmony_cistatic int rbd_try_lock(struct rbd_device *rbd_dev) 40468c2ecf20Sopenharmony_ci{ 40478c2ecf20Sopenharmony_ci struct ceph_client *client = rbd_dev->rbd_client->client; 40488c2ecf20Sopenharmony_ci struct ceph_locker *locker, *refreshed_locker; 40498c2ecf20Sopenharmony_ci int ret; 40508c2ecf20Sopenharmony_ci 40518c2ecf20Sopenharmony_ci for (;;) { 40528c2ecf20Sopenharmony_ci locker = refreshed_locker = NULL; 40538c2ecf20Sopenharmony_ci 40548c2ecf20Sopenharmony_ci ret = rbd_lock(rbd_dev); 40558c2ecf20Sopenharmony_ci if (!ret) 40568c2ecf20Sopenharmony_ci goto out; 40578c2ecf20Sopenharmony_ci if (ret != -EBUSY) { 40588c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to lock header: %d", ret); 40598c2ecf20Sopenharmony_ci goto out; 40608c2ecf20Sopenharmony_ci } 40618c2ecf20Sopenharmony_ci 40628c2ecf20Sopenharmony_ci /* determine if the current lock holder is still alive */ 40638c2ecf20Sopenharmony_ci locker = get_lock_owner_info(rbd_dev); 40648c2ecf20Sopenharmony_ci if (IS_ERR(locker)) { 40658c2ecf20Sopenharmony_ci ret = PTR_ERR(locker); 40668c2ecf20Sopenharmony_ci locker = NULL; 40678c2ecf20Sopenharmony_ci goto out; 40688c2ecf20Sopenharmony_ci } 40698c2ecf20Sopenharmony_ci if (!locker) 40708c2ecf20Sopenharmony_ci goto again; 40718c2ecf20Sopenharmony_ci 40728c2ecf20Sopenharmony_ci ret = find_watcher(rbd_dev, locker); 40738c2ecf20Sopenharmony_ci if (ret) 40748c2ecf20Sopenharmony_ci goto out; /* request lock or error */ 40758c2ecf20Sopenharmony_ci 40768c2ecf20Sopenharmony_ci refreshed_locker = get_lock_owner_info(rbd_dev); 40778c2ecf20Sopenharmony_ci if (IS_ERR(refreshed_locker)) { 40788c2ecf20Sopenharmony_ci ret = PTR_ERR(refreshed_locker); 40798c2ecf20Sopenharmony_ci refreshed_locker = NULL; 40808c2ecf20Sopenharmony_ci goto out; 40818c2ecf20Sopenharmony_ci } 40828c2ecf20Sopenharmony_ci if (!refreshed_locker || 40838c2ecf20Sopenharmony_ci !locker_equal(locker, refreshed_locker)) 40848c2ecf20Sopenharmony_ci goto again; 40858c2ecf20Sopenharmony_ci 40868c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "breaking header lock owned by %s%llu", 40878c2ecf20Sopenharmony_ci ENTITY_NAME(locker->id.name)); 40888c2ecf20Sopenharmony_ci 40898c2ecf20Sopenharmony_ci ret = ceph_monc_blocklist_add(&client->monc, 40908c2ecf20Sopenharmony_ci &locker->info.addr); 40918c2ecf20Sopenharmony_ci if (ret) { 40928c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to blocklist %s%llu: %d", 40938c2ecf20Sopenharmony_ci ENTITY_NAME(locker->id.name), ret); 40948c2ecf20Sopenharmony_ci goto out; 40958c2ecf20Sopenharmony_ci } 40968c2ecf20Sopenharmony_ci 40978c2ecf20Sopenharmony_ci ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid, 40988c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, RBD_LOCK_NAME, 40998c2ecf20Sopenharmony_ci locker->id.cookie, &locker->id.name); 41008c2ecf20Sopenharmony_ci if (ret && ret != -ENOENT) { 41018c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to break header lock: %d", 41028c2ecf20Sopenharmony_ci ret); 41038c2ecf20Sopenharmony_ci goto out; 41048c2ecf20Sopenharmony_ci } 41058c2ecf20Sopenharmony_ci 41068c2ecf20Sopenharmony_ciagain: 41078c2ecf20Sopenharmony_ci free_locker(refreshed_locker); 41088c2ecf20Sopenharmony_ci free_locker(locker); 41098c2ecf20Sopenharmony_ci } 41108c2ecf20Sopenharmony_ci 41118c2ecf20Sopenharmony_ciout: 41128c2ecf20Sopenharmony_ci free_locker(refreshed_locker); 41138c2ecf20Sopenharmony_ci free_locker(locker); 41148c2ecf20Sopenharmony_ci return ret; 41158c2ecf20Sopenharmony_ci} 41168c2ecf20Sopenharmony_ci 41178c2ecf20Sopenharmony_cistatic int rbd_post_acquire_action(struct rbd_device *rbd_dev) 41188c2ecf20Sopenharmony_ci{ 41198c2ecf20Sopenharmony_ci int ret; 41208c2ecf20Sopenharmony_ci 41218c2ecf20Sopenharmony_ci ret = rbd_dev_refresh(rbd_dev); 41228c2ecf20Sopenharmony_ci if (ret) 41238c2ecf20Sopenharmony_ci return ret; 41248c2ecf20Sopenharmony_ci 41258c2ecf20Sopenharmony_ci if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) { 41268c2ecf20Sopenharmony_ci ret = rbd_object_map_open(rbd_dev); 41278c2ecf20Sopenharmony_ci if (ret) 41288c2ecf20Sopenharmony_ci return ret; 41298c2ecf20Sopenharmony_ci } 41308c2ecf20Sopenharmony_ci 41318c2ecf20Sopenharmony_ci return 0; 41328c2ecf20Sopenharmony_ci} 41338c2ecf20Sopenharmony_ci 41348c2ecf20Sopenharmony_ci/* 41358c2ecf20Sopenharmony_ci * Return: 41368c2ecf20Sopenharmony_ci * 0 - lock acquired 41378c2ecf20Sopenharmony_ci * 1 - caller should call rbd_request_lock() 41388c2ecf20Sopenharmony_ci * <0 - error 41398c2ecf20Sopenharmony_ci */ 41408c2ecf20Sopenharmony_cistatic int rbd_try_acquire_lock(struct rbd_device *rbd_dev) 41418c2ecf20Sopenharmony_ci{ 41428c2ecf20Sopenharmony_ci int ret; 41438c2ecf20Sopenharmony_ci 41448c2ecf20Sopenharmony_ci down_read(&rbd_dev->lock_rwsem); 41458c2ecf20Sopenharmony_ci dout("%s rbd_dev %p read lock_state %d\n", __func__, rbd_dev, 41468c2ecf20Sopenharmony_ci rbd_dev->lock_state); 41478c2ecf20Sopenharmony_ci if (__rbd_is_lock_owner(rbd_dev)) { 41488c2ecf20Sopenharmony_ci up_read(&rbd_dev->lock_rwsem); 41498c2ecf20Sopenharmony_ci return 0; 41508c2ecf20Sopenharmony_ci } 41518c2ecf20Sopenharmony_ci 41528c2ecf20Sopenharmony_ci up_read(&rbd_dev->lock_rwsem); 41538c2ecf20Sopenharmony_ci down_write(&rbd_dev->lock_rwsem); 41548c2ecf20Sopenharmony_ci dout("%s rbd_dev %p write lock_state %d\n", __func__, rbd_dev, 41558c2ecf20Sopenharmony_ci rbd_dev->lock_state); 41568c2ecf20Sopenharmony_ci if (__rbd_is_lock_owner(rbd_dev)) { 41578c2ecf20Sopenharmony_ci up_write(&rbd_dev->lock_rwsem); 41588c2ecf20Sopenharmony_ci return 0; 41598c2ecf20Sopenharmony_ci } 41608c2ecf20Sopenharmony_ci 41618c2ecf20Sopenharmony_ci ret = rbd_try_lock(rbd_dev); 41628c2ecf20Sopenharmony_ci if (ret < 0) { 41638c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to acquire lock: %d", ret); 41648c2ecf20Sopenharmony_ci goto out; 41658c2ecf20Sopenharmony_ci } 41668c2ecf20Sopenharmony_ci if (ret > 0) { 41678c2ecf20Sopenharmony_ci up_write(&rbd_dev->lock_rwsem); 41688c2ecf20Sopenharmony_ci return ret; 41698c2ecf20Sopenharmony_ci } 41708c2ecf20Sopenharmony_ci 41718c2ecf20Sopenharmony_ci rbd_assert(rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED); 41728c2ecf20Sopenharmony_ci rbd_assert(list_empty(&rbd_dev->running_list)); 41738c2ecf20Sopenharmony_ci 41748c2ecf20Sopenharmony_ci ret = rbd_post_acquire_action(rbd_dev); 41758c2ecf20Sopenharmony_ci if (ret) { 41768c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "post-acquire action failed: %d", ret); 41778c2ecf20Sopenharmony_ci /* 41788c2ecf20Sopenharmony_ci * Can't stay in RBD_LOCK_STATE_LOCKED because 41798c2ecf20Sopenharmony_ci * rbd_lock_add_request() would let the request through, 41808c2ecf20Sopenharmony_ci * assuming that e.g. object map is locked and loaded. 41818c2ecf20Sopenharmony_ci */ 41828c2ecf20Sopenharmony_ci rbd_unlock(rbd_dev); 41838c2ecf20Sopenharmony_ci } 41848c2ecf20Sopenharmony_ci 41858c2ecf20Sopenharmony_ciout: 41868c2ecf20Sopenharmony_ci wake_lock_waiters(rbd_dev, ret); 41878c2ecf20Sopenharmony_ci up_write(&rbd_dev->lock_rwsem); 41888c2ecf20Sopenharmony_ci return ret; 41898c2ecf20Sopenharmony_ci} 41908c2ecf20Sopenharmony_ci 41918c2ecf20Sopenharmony_cistatic void rbd_acquire_lock(struct work_struct *work) 41928c2ecf20Sopenharmony_ci{ 41938c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = container_of(to_delayed_work(work), 41948c2ecf20Sopenharmony_ci struct rbd_device, lock_dwork); 41958c2ecf20Sopenharmony_ci int ret; 41968c2ecf20Sopenharmony_ci 41978c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 41988c2ecf20Sopenharmony_ciagain: 41998c2ecf20Sopenharmony_ci ret = rbd_try_acquire_lock(rbd_dev); 42008c2ecf20Sopenharmony_ci if (ret <= 0) { 42018c2ecf20Sopenharmony_ci dout("%s rbd_dev %p ret %d - done\n", __func__, rbd_dev, ret); 42028c2ecf20Sopenharmony_ci return; 42038c2ecf20Sopenharmony_ci } 42048c2ecf20Sopenharmony_ci 42058c2ecf20Sopenharmony_ci ret = rbd_request_lock(rbd_dev); 42068c2ecf20Sopenharmony_ci if (ret == -ETIMEDOUT) { 42078c2ecf20Sopenharmony_ci goto again; /* treat this as a dead client */ 42088c2ecf20Sopenharmony_ci } else if (ret == -EROFS) { 42098c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "peer will not release lock"); 42108c2ecf20Sopenharmony_ci down_write(&rbd_dev->lock_rwsem); 42118c2ecf20Sopenharmony_ci wake_lock_waiters(rbd_dev, ret); 42128c2ecf20Sopenharmony_ci up_write(&rbd_dev->lock_rwsem); 42138c2ecf20Sopenharmony_ci } else if (ret < 0) { 42148c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "error requesting lock: %d", ret); 42158c2ecf20Sopenharmony_ci mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 42168c2ecf20Sopenharmony_ci RBD_RETRY_DELAY); 42178c2ecf20Sopenharmony_ci } else { 42188c2ecf20Sopenharmony_ci /* 42198c2ecf20Sopenharmony_ci * lock owner acked, but resend if we don't see them 42208c2ecf20Sopenharmony_ci * release the lock 42218c2ecf20Sopenharmony_ci */ 42228c2ecf20Sopenharmony_ci dout("%s rbd_dev %p requeuing lock_dwork\n", __func__, 42238c2ecf20Sopenharmony_ci rbd_dev); 42248c2ecf20Sopenharmony_ci mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 42258c2ecf20Sopenharmony_ci msecs_to_jiffies(2 * RBD_NOTIFY_TIMEOUT * MSEC_PER_SEC)); 42268c2ecf20Sopenharmony_ci } 42278c2ecf20Sopenharmony_ci} 42288c2ecf20Sopenharmony_ci 42298c2ecf20Sopenharmony_cistatic bool rbd_quiesce_lock(struct rbd_device *rbd_dev) 42308c2ecf20Sopenharmony_ci{ 42318c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 42328c2ecf20Sopenharmony_ci lockdep_assert_held_write(&rbd_dev->lock_rwsem); 42338c2ecf20Sopenharmony_ci 42348c2ecf20Sopenharmony_ci if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED) 42358c2ecf20Sopenharmony_ci return false; 42368c2ecf20Sopenharmony_ci 42378c2ecf20Sopenharmony_ci /* 42388c2ecf20Sopenharmony_ci * Ensure that all in-flight IO is flushed. 42398c2ecf20Sopenharmony_ci */ 42408c2ecf20Sopenharmony_ci rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; 42418c2ecf20Sopenharmony_ci rbd_assert(!completion_done(&rbd_dev->releasing_wait)); 42428c2ecf20Sopenharmony_ci if (list_empty(&rbd_dev->running_list)) 42438c2ecf20Sopenharmony_ci return true; 42448c2ecf20Sopenharmony_ci 42458c2ecf20Sopenharmony_ci up_write(&rbd_dev->lock_rwsem); 42468c2ecf20Sopenharmony_ci wait_for_completion(&rbd_dev->releasing_wait); 42478c2ecf20Sopenharmony_ci 42488c2ecf20Sopenharmony_ci down_write(&rbd_dev->lock_rwsem); 42498c2ecf20Sopenharmony_ci if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) 42508c2ecf20Sopenharmony_ci return false; 42518c2ecf20Sopenharmony_ci 42528c2ecf20Sopenharmony_ci rbd_assert(list_empty(&rbd_dev->running_list)); 42538c2ecf20Sopenharmony_ci return true; 42548c2ecf20Sopenharmony_ci} 42558c2ecf20Sopenharmony_ci 42568c2ecf20Sopenharmony_cistatic void rbd_pre_release_action(struct rbd_device *rbd_dev) 42578c2ecf20Sopenharmony_ci{ 42588c2ecf20Sopenharmony_ci if (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP) 42598c2ecf20Sopenharmony_ci rbd_object_map_close(rbd_dev); 42608c2ecf20Sopenharmony_ci} 42618c2ecf20Sopenharmony_ci 42628c2ecf20Sopenharmony_cistatic void __rbd_release_lock(struct rbd_device *rbd_dev) 42638c2ecf20Sopenharmony_ci{ 42648c2ecf20Sopenharmony_ci rbd_assert(list_empty(&rbd_dev->running_list)); 42658c2ecf20Sopenharmony_ci 42668c2ecf20Sopenharmony_ci rbd_pre_release_action(rbd_dev); 42678c2ecf20Sopenharmony_ci rbd_unlock(rbd_dev); 42688c2ecf20Sopenharmony_ci} 42698c2ecf20Sopenharmony_ci 42708c2ecf20Sopenharmony_ci/* 42718c2ecf20Sopenharmony_ci * lock_rwsem must be held for write 42728c2ecf20Sopenharmony_ci */ 42738c2ecf20Sopenharmony_cistatic void rbd_release_lock(struct rbd_device *rbd_dev) 42748c2ecf20Sopenharmony_ci{ 42758c2ecf20Sopenharmony_ci if (!rbd_quiesce_lock(rbd_dev)) 42768c2ecf20Sopenharmony_ci return; 42778c2ecf20Sopenharmony_ci 42788c2ecf20Sopenharmony_ci __rbd_release_lock(rbd_dev); 42798c2ecf20Sopenharmony_ci 42808c2ecf20Sopenharmony_ci /* 42818c2ecf20Sopenharmony_ci * Give others a chance to grab the lock - we would re-acquire 42828c2ecf20Sopenharmony_ci * almost immediately if we got new IO while draining the running 42838c2ecf20Sopenharmony_ci * list otherwise. We need to ack our own notifications, so this 42848c2ecf20Sopenharmony_ci * lock_dwork will be requeued from rbd_handle_released_lock() by 42858c2ecf20Sopenharmony_ci * way of maybe_kick_acquire(). 42868c2ecf20Sopenharmony_ci */ 42878c2ecf20Sopenharmony_ci cancel_delayed_work(&rbd_dev->lock_dwork); 42888c2ecf20Sopenharmony_ci} 42898c2ecf20Sopenharmony_ci 42908c2ecf20Sopenharmony_cistatic void rbd_release_lock_work(struct work_struct *work) 42918c2ecf20Sopenharmony_ci{ 42928c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = container_of(work, struct rbd_device, 42938c2ecf20Sopenharmony_ci unlock_work); 42948c2ecf20Sopenharmony_ci 42958c2ecf20Sopenharmony_ci down_write(&rbd_dev->lock_rwsem); 42968c2ecf20Sopenharmony_ci rbd_release_lock(rbd_dev); 42978c2ecf20Sopenharmony_ci up_write(&rbd_dev->lock_rwsem); 42988c2ecf20Sopenharmony_ci} 42998c2ecf20Sopenharmony_ci 43008c2ecf20Sopenharmony_cistatic void maybe_kick_acquire(struct rbd_device *rbd_dev) 43018c2ecf20Sopenharmony_ci{ 43028c2ecf20Sopenharmony_ci bool have_requests; 43038c2ecf20Sopenharmony_ci 43048c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 43058c2ecf20Sopenharmony_ci if (__rbd_is_lock_owner(rbd_dev)) 43068c2ecf20Sopenharmony_ci return; 43078c2ecf20Sopenharmony_ci 43088c2ecf20Sopenharmony_ci spin_lock(&rbd_dev->lock_lists_lock); 43098c2ecf20Sopenharmony_ci have_requests = !list_empty(&rbd_dev->acquiring_list); 43108c2ecf20Sopenharmony_ci spin_unlock(&rbd_dev->lock_lists_lock); 43118c2ecf20Sopenharmony_ci if (have_requests || delayed_work_pending(&rbd_dev->lock_dwork)) { 43128c2ecf20Sopenharmony_ci dout("%s rbd_dev %p kicking lock_dwork\n", __func__, rbd_dev); 43138c2ecf20Sopenharmony_ci mod_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); 43148c2ecf20Sopenharmony_ci } 43158c2ecf20Sopenharmony_ci} 43168c2ecf20Sopenharmony_ci 43178c2ecf20Sopenharmony_cistatic void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v, 43188c2ecf20Sopenharmony_ci void **p) 43198c2ecf20Sopenharmony_ci{ 43208c2ecf20Sopenharmony_ci struct rbd_client_id cid = { 0 }; 43218c2ecf20Sopenharmony_ci 43228c2ecf20Sopenharmony_ci if (struct_v >= 2) { 43238c2ecf20Sopenharmony_ci cid.gid = ceph_decode_64(p); 43248c2ecf20Sopenharmony_ci cid.handle = ceph_decode_64(p); 43258c2ecf20Sopenharmony_ci } 43268c2ecf20Sopenharmony_ci 43278c2ecf20Sopenharmony_ci dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid, 43288c2ecf20Sopenharmony_ci cid.handle); 43298c2ecf20Sopenharmony_ci if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { 43308c2ecf20Sopenharmony_ci down_write(&rbd_dev->lock_rwsem); 43318c2ecf20Sopenharmony_ci if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { 43328c2ecf20Sopenharmony_ci dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n", 43338c2ecf20Sopenharmony_ci __func__, rbd_dev, cid.gid, cid.handle); 43348c2ecf20Sopenharmony_ci } else { 43358c2ecf20Sopenharmony_ci rbd_set_owner_cid(rbd_dev, &cid); 43368c2ecf20Sopenharmony_ci } 43378c2ecf20Sopenharmony_ci downgrade_write(&rbd_dev->lock_rwsem); 43388c2ecf20Sopenharmony_ci } else { 43398c2ecf20Sopenharmony_ci down_read(&rbd_dev->lock_rwsem); 43408c2ecf20Sopenharmony_ci } 43418c2ecf20Sopenharmony_ci 43428c2ecf20Sopenharmony_ci maybe_kick_acquire(rbd_dev); 43438c2ecf20Sopenharmony_ci up_read(&rbd_dev->lock_rwsem); 43448c2ecf20Sopenharmony_ci} 43458c2ecf20Sopenharmony_ci 43468c2ecf20Sopenharmony_cistatic void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v, 43478c2ecf20Sopenharmony_ci void **p) 43488c2ecf20Sopenharmony_ci{ 43498c2ecf20Sopenharmony_ci struct rbd_client_id cid = { 0 }; 43508c2ecf20Sopenharmony_ci 43518c2ecf20Sopenharmony_ci if (struct_v >= 2) { 43528c2ecf20Sopenharmony_ci cid.gid = ceph_decode_64(p); 43538c2ecf20Sopenharmony_ci cid.handle = ceph_decode_64(p); 43548c2ecf20Sopenharmony_ci } 43558c2ecf20Sopenharmony_ci 43568c2ecf20Sopenharmony_ci dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid, 43578c2ecf20Sopenharmony_ci cid.handle); 43588c2ecf20Sopenharmony_ci if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { 43598c2ecf20Sopenharmony_ci down_write(&rbd_dev->lock_rwsem); 43608c2ecf20Sopenharmony_ci if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { 43618c2ecf20Sopenharmony_ci dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n", 43628c2ecf20Sopenharmony_ci __func__, rbd_dev, cid.gid, cid.handle, 43638c2ecf20Sopenharmony_ci rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle); 43648c2ecf20Sopenharmony_ci } else { 43658c2ecf20Sopenharmony_ci rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); 43668c2ecf20Sopenharmony_ci } 43678c2ecf20Sopenharmony_ci downgrade_write(&rbd_dev->lock_rwsem); 43688c2ecf20Sopenharmony_ci } else { 43698c2ecf20Sopenharmony_ci down_read(&rbd_dev->lock_rwsem); 43708c2ecf20Sopenharmony_ci } 43718c2ecf20Sopenharmony_ci 43728c2ecf20Sopenharmony_ci maybe_kick_acquire(rbd_dev); 43738c2ecf20Sopenharmony_ci up_read(&rbd_dev->lock_rwsem); 43748c2ecf20Sopenharmony_ci} 43758c2ecf20Sopenharmony_ci 43768c2ecf20Sopenharmony_ci/* 43778c2ecf20Sopenharmony_ci * Returns result for ResponseMessage to be encoded (<= 0), or 1 if no 43788c2ecf20Sopenharmony_ci * ResponseMessage is needed. 43798c2ecf20Sopenharmony_ci */ 43808c2ecf20Sopenharmony_cistatic int rbd_handle_request_lock(struct rbd_device *rbd_dev, u8 struct_v, 43818c2ecf20Sopenharmony_ci void **p) 43828c2ecf20Sopenharmony_ci{ 43838c2ecf20Sopenharmony_ci struct rbd_client_id my_cid = rbd_get_cid(rbd_dev); 43848c2ecf20Sopenharmony_ci struct rbd_client_id cid = { 0 }; 43858c2ecf20Sopenharmony_ci int result = 1; 43868c2ecf20Sopenharmony_ci 43878c2ecf20Sopenharmony_ci if (struct_v >= 2) { 43888c2ecf20Sopenharmony_ci cid.gid = ceph_decode_64(p); 43898c2ecf20Sopenharmony_ci cid.handle = ceph_decode_64(p); 43908c2ecf20Sopenharmony_ci } 43918c2ecf20Sopenharmony_ci 43928c2ecf20Sopenharmony_ci dout("%s rbd_dev %p cid %llu-%llu\n", __func__, rbd_dev, cid.gid, 43938c2ecf20Sopenharmony_ci cid.handle); 43948c2ecf20Sopenharmony_ci if (rbd_cid_equal(&cid, &my_cid)) 43958c2ecf20Sopenharmony_ci return result; 43968c2ecf20Sopenharmony_ci 43978c2ecf20Sopenharmony_ci down_read(&rbd_dev->lock_rwsem); 43988c2ecf20Sopenharmony_ci if (__rbd_is_lock_owner(rbd_dev)) { 43998c2ecf20Sopenharmony_ci if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED && 44008c2ecf20Sopenharmony_ci rbd_cid_equal(&rbd_dev->owner_cid, &rbd_empty_cid)) 44018c2ecf20Sopenharmony_ci goto out_unlock; 44028c2ecf20Sopenharmony_ci 44038c2ecf20Sopenharmony_ci /* 44048c2ecf20Sopenharmony_ci * encode ResponseMessage(0) so the peer can detect 44058c2ecf20Sopenharmony_ci * a missing owner 44068c2ecf20Sopenharmony_ci */ 44078c2ecf20Sopenharmony_ci result = 0; 44088c2ecf20Sopenharmony_ci 44098c2ecf20Sopenharmony_ci if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) { 44108c2ecf20Sopenharmony_ci if (!rbd_dev->opts->exclusive) { 44118c2ecf20Sopenharmony_ci dout("%s rbd_dev %p queueing unlock_work\n", 44128c2ecf20Sopenharmony_ci __func__, rbd_dev); 44138c2ecf20Sopenharmony_ci queue_work(rbd_dev->task_wq, 44148c2ecf20Sopenharmony_ci &rbd_dev->unlock_work); 44158c2ecf20Sopenharmony_ci } else { 44168c2ecf20Sopenharmony_ci /* refuse to release the lock */ 44178c2ecf20Sopenharmony_ci result = -EROFS; 44188c2ecf20Sopenharmony_ci } 44198c2ecf20Sopenharmony_ci } 44208c2ecf20Sopenharmony_ci } 44218c2ecf20Sopenharmony_ci 44228c2ecf20Sopenharmony_ciout_unlock: 44238c2ecf20Sopenharmony_ci up_read(&rbd_dev->lock_rwsem); 44248c2ecf20Sopenharmony_ci return result; 44258c2ecf20Sopenharmony_ci} 44268c2ecf20Sopenharmony_ci 44278c2ecf20Sopenharmony_cistatic void __rbd_acknowledge_notify(struct rbd_device *rbd_dev, 44288c2ecf20Sopenharmony_ci u64 notify_id, u64 cookie, s32 *result) 44298c2ecf20Sopenharmony_ci{ 44308c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 44318c2ecf20Sopenharmony_ci char buf[4 + CEPH_ENCODING_START_BLK_LEN]; 44328c2ecf20Sopenharmony_ci int buf_size = sizeof(buf); 44338c2ecf20Sopenharmony_ci int ret; 44348c2ecf20Sopenharmony_ci 44358c2ecf20Sopenharmony_ci if (result) { 44368c2ecf20Sopenharmony_ci void *p = buf; 44378c2ecf20Sopenharmony_ci 44388c2ecf20Sopenharmony_ci /* encode ResponseMessage */ 44398c2ecf20Sopenharmony_ci ceph_start_encoding(&p, 1, 1, 44408c2ecf20Sopenharmony_ci buf_size - CEPH_ENCODING_START_BLK_LEN); 44418c2ecf20Sopenharmony_ci ceph_encode_32(&p, *result); 44428c2ecf20Sopenharmony_ci } else { 44438c2ecf20Sopenharmony_ci buf_size = 0; 44448c2ecf20Sopenharmony_ci } 44458c2ecf20Sopenharmony_ci 44468c2ecf20Sopenharmony_ci ret = ceph_osdc_notify_ack(osdc, &rbd_dev->header_oid, 44478c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, notify_id, cookie, 44488c2ecf20Sopenharmony_ci buf, buf_size); 44498c2ecf20Sopenharmony_ci if (ret) 44508c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "acknowledge_notify failed: %d", ret); 44518c2ecf20Sopenharmony_ci} 44528c2ecf20Sopenharmony_ci 44538c2ecf20Sopenharmony_cistatic void rbd_acknowledge_notify(struct rbd_device *rbd_dev, u64 notify_id, 44548c2ecf20Sopenharmony_ci u64 cookie) 44558c2ecf20Sopenharmony_ci{ 44568c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 44578c2ecf20Sopenharmony_ci __rbd_acknowledge_notify(rbd_dev, notify_id, cookie, NULL); 44588c2ecf20Sopenharmony_ci} 44598c2ecf20Sopenharmony_ci 44608c2ecf20Sopenharmony_cistatic void rbd_acknowledge_notify_result(struct rbd_device *rbd_dev, 44618c2ecf20Sopenharmony_ci u64 notify_id, u64 cookie, s32 result) 44628c2ecf20Sopenharmony_ci{ 44638c2ecf20Sopenharmony_ci dout("%s rbd_dev %p result %d\n", __func__, rbd_dev, result); 44648c2ecf20Sopenharmony_ci __rbd_acknowledge_notify(rbd_dev, notify_id, cookie, &result); 44658c2ecf20Sopenharmony_ci} 44668c2ecf20Sopenharmony_ci 44678c2ecf20Sopenharmony_cistatic void rbd_watch_cb(void *arg, u64 notify_id, u64 cookie, 44688c2ecf20Sopenharmony_ci u64 notifier_id, void *data, size_t data_len) 44698c2ecf20Sopenharmony_ci{ 44708c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = arg; 44718c2ecf20Sopenharmony_ci void *p = data; 44728c2ecf20Sopenharmony_ci void *const end = p + data_len; 44738c2ecf20Sopenharmony_ci u8 struct_v = 0; 44748c2ecf20Sopenharmony_ci u32 len; 44758c2ecf20Sopenharmony_ci u32 notify_op; 44768c2ecf20Sopenharmony_ci int ret; 44778c2ecf20Sopenharmony_ci 44788c2ecf20Sopenharmony_ci dout("%s rbd_dev %p cookie %llu notify_id %llu data_len %zu\n", 44798c2ecf20Sopenharmony_ci __func__, rbd_dev, cookie, notify_id, data_len); 44808c2ecf20Sopenharmony_ci if (data_len) { 44818c2ecf20Sopenharmony_ci ret = ceph_start_decoding(&p, end, 1, "NotifyMessage", 44828c2ecf20Sopenharmony_ci &struct_v, &len); 44838c2ecf20Sopenharmony_ci if (ret) { 44848c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to decode NotifyMessage: %d", 44858c2ecf20Sopenharmony_ci ret); 44868c2ecf20Sopenharmony_ci return; 44878c2ecf20Sopenharmony_ci } 44888c2ecf20Sopenharmony_ci 44898c2ecf20Sopenharmony_ci notify_op = ceph_decode_32(&p); 44908c2ecf20Sopenharmony_ci } else { 44918c2ecf20Sopenharmony_ci /* legacy notification for header updates */ 44928c2ecf20Sopenharmony_ci notify_op = RBD_NOTIFY_OP_HEADER_UPDATE; 44938c2ecf20Sopenharmony_ci len = 0; 44948c2ecf20Sopenharmony_ci } 44958c2ecf20Sopenharmony_ci 44968c2ecf20Sopenharmony_ci dout("%s rbd_dev %p notify_op %u\n", __func__, rbd_dev, notify_op); 44978c2ecf20Sopenharmony_ci switch (notify_op) { 44988c2ecf20Sopenharmony_ci case RBD_NOTIFY_OP_ACQUIRED_LOCK: 44998c2ecf20Sopenharmony_ci rbd_handle_acquired_lock(rbd_dev, struct_v, &p); 45008c2ecf20Sopenharmony_ci rbd_acknowledge_notify(rbd_dev, notify_id, cookie); 45018c2ecf20Sopenharmony_ci break; 45028c2ecf20Sopenharmony_ci case RBD_NOTIFY_OP_RELEASED_LOCK: 45038c2ecf20Sopenharmony_ci rbd_handle_released_lock(rbd_dev, struct_v, &p); 45048c2ecf20Sopenharmony_ci rbd_acknowledge_notify(rbd_dev, notify_id, cookie); 45058c2ecf20Sopenharmony_ci break; 45068c2ecf20Sopenharmony_ci case RBD_NOTIFY_OP_REQUEST_LOCK: 45078c2ecf20Sopenharmony_ci ret = rbd_handle_request_lock(rbd_dev, struct_v, &p); 45088c2ecf20Sopenharmony_ci if (ret <= 0) 45098c2ecf20Sopenharmony_ci rbd_acknowledge_notify_result(rbd_dev, notify_id, 45108c2ecf20Sopenharmony_ci cookie, ret); 45118c2ecf20Sopenharmony_ci else 45128c2ecf20Sopenharmony_ci rbd_acknowledge_notify(rbd_dev, notify_id, cookie); 45138c2ecf20Sopenharmony_ci break; 45148c2ecf20Sopenharmony_ci case RBD_NOTIFY_OP_HEADER_UPDATE: 45158c2ecf20Sopenharmony_ci ret = rbd_dev_refresh(rbd_dev); 45168c2ecf20Sopenharmony_ci if (ret) 45178c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "refresh failed: %d", ret); 45188c2ecf20Sopenharmony_ci 45198c2ecf20Sopenharmony_ci rbd_acknowledge_notify(rbd_dev, notify_id, cookie); 45208c2ecf20Sopenharmony_ci break; 45218c2ecf20Sopenharmony_ci default: 45228c2ecf20Sopenharmony_ci if (rbd_is_lock_owner(rbd_dev)) 45238c2ecf20Sopenharmony_ci rbd_acknowledge_notify_result(rbd_dev, notify_id, 45248c2ecf20Sopenharmony_ci cookie, -EOPNOTSUPP); 45258c2ecf20Sopenharmony_ci else 45268c2ecf20Sopenharmony_ci rbd_acknowledge_notify(rbd_dev, notify_id, cookie); 45278c2ecf20Sopenharmony_ci break; 45288c2ecf20Sopenharmony_ci } 45298c2ecf20Sopenharmony_ci} 45308c2ecf20Sopenharmony_ci 45318c2ecf20Sopenharmony_cistatic void __rbd_unregister_watch(struct rbd_device *rbd_dev); 45328c2ecf20Sopenharmony_ci 45338c2ecf20Sopenharmony_cistatic void rbd_watch_errcb(void *arg, u64 cookie, int err) 45348c2ecf20Sopenharmony_ci{ 45358c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = arg; 45368c2ecf20Sopenharmony_ci 45378c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "encountered watch error: %d", err); 45388c2ecf20Sopenharmony_ci 45398c2ecf20Sopenharmony_ci down_write(&rbd_dev->lock_rwsem); 45408c2ecf20Sopenharmony_ci rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); 45418c2ecf20Sopenharmony_ci up_write(&rbd_dev->lock_rwsem); 45428c2ecf20Sopenharmony_ci 45438c2ecf20Sopenharmony_ci mutex_lock(&rbd_dev->watch_mutex); 45448c2ecf20Sopenharmony_ci if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) { 45458c2ecf20Sopenharmony_ci __rbd_unregister_watch(rbd_dev); 45468c2ecf20Sopenharmony_ci rbd_dev->watch_state = RBD_WATCH_STATE_ERROR; 45478c2ecf20Sopenharmony_ci 45488c2ecf20Sopenharmony_ci queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, 0); 45498c2ecf20Sopenharmony_ci } 45508c2ecf20Sopenharmony_ci mutex_unlock(&rbd_dev->watch_mutex); 45518c2ecf20Sopenharmony_ci} 45528c2ecf20Sopenharmony_ci 45538c2ecf20Sopenharmony_ci/* 45548c2ecf20Sopenharmony_ci * watch_mutex must be locked 45558c2ecf20Sopenharmony_ci */ 45568c2ecf20Sopenharmony_cistatic int __rbd_register_watch(struct rbd_device *rbd_dev) 45578c2ecf20Sopenharmony_ci{ 45588c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 45598c2ecf20Sopenharmony_ci struct ceph_osd_linger_request *handle; 45608c2ecf20Sopenharmony_ci 45618c2ecf20Sopenharmony_ci rbd_assert(!rbd_dev->watch_handle); 45628c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 45638c2ecf20Sopenharmony_ci 45648c2ecf20Sopenharmony_ci handle = ceph_osdc_watch(osdc, &rbd_dev->header_oid, 45658c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, rbd_watch_cb, 45668c2ecf20Sopenharmony_ci rbd_watch_errcb, rbd_dev); 45678c2ecf20Sopenharmony_ci if (IS_ERR(handle)) 45688c2ecf20Sopenharmony_ci return PTR_ERR(handle); 45698c2ecf20Sopenharmony_ci 45708c2ecf20Sopenharmony_ci rbd_dev->watch_handle = handle; 45718c2ecf20Sopenharmony_ci return 0; 45728c2ecf20Sopenharmony_ci} 45738c2ecf20Sopenharmony_ci 45748c2ecf20Sopenharmony_ci/* 45758c2ecf20Sopenharmony_ci * watch_mutex must be locked 45768c2ecf20Sopenharmony_ci */ 45778c2ecf20Sopenharmony_cistatic void __rbd_unregister_watch(struct rbd_device *rbd_dev) 45788c2ecf20Sopenharmony_ci{ 45798c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 45808c2ecf20Sopenharmony_ci int ret; 45818c2ecf20Sopenharmony_ci 45828c2ecf20Sopenharmony_ci rbd_assert(rbd_dev->watch_handle); 45838c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 45848c2ecf20Sopenharmony_ci 45858c2ecf20Sopenharmony_ci ret = ceph_osdc_unwatch(osdc, rbd_dev->watch_handle); 45868c2ecf20Sopenharmony_ci if (ret) 45878c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to unwatch: %d", ret); 45888c2ecf20Sopenharmony_ci 45898c2ecf20Sopenharmony_ci rbd_dev->watch_handle = NULL; 45908c2ecf20Sopenharmony_ci} 45918c2ecf20Sopenharmony_ci 45928c2ecf20Sopenharmony_cistatic int rbd_register_watch(struct rbd_device *rbd_dev) 45938c2ecf20Sopenharmony_ci{ 45948c2ecf20Sopenharmony_ci int ret; 45958c2ecf20Sopenharmony_ci 45968c2ecf20Sopenharmony_ci mutex_lock(&rbd_dev->watch_mutex); 45978c2ecf20Sopenharmony_ci rbd_assert(rbd_dev->watch_state == RBD_WATCH_STATE_UNREGISTERED); 45988c2ecf20Sopenharmony_ci ret = __rbd_register_watch(rbd_dev); 45998c2ecf20Sopenharmony_ci if (ret) 46008c2ecf20Sopenharmony_ci goto out; 46018c2ecf20Sopenharmony_ci 46028c2ecf20Sopenharmony_ci rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED; 46038c2ecf20Sopenharmony_ci rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id; 46048c2ecf20Sopenharmony_ci 46058c2ecf20Sopenharmony_ciout: 46068c2ecf20Sopenharmony_ci mutex_unlock(&rbd_dev->watch_mutex); 46078c2ecf20Sopenharmony_ci return ret; 46088c2ecf20Sopenharmony_ci} 46098c2ecf20Sopenharmony_ci 46108c2ecf20Sopenharmony_cistatic void cancel_tasks_sync(struct rbd_device *rbd_dev) 46118c2ecf20Sopenharmony_ci{ 46128c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 46138c2ecf20Sopenharmony_ci 46148c2ecf20Sopenharmony_ci cancel_work_sync(&rbd_dev->acquired_lock_work); 46158c2ecf20Sopenharmony_ci cancel_work_sync(&rbd_dev->released_lock_work); 46168c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&rbd_dev->lock_dwork); 46178c2ecf20Sopenharmony_ci cancel_work_sync(&rbd_dev->unlock_work); 46188c2ecf20Sopenharmony_ci} 46198c2ecf20Sopenharmony_ci 46208c2ecf20Sopenharmony_ci/* 46218c2ecf20Sopenharmony_ci * header_rwsem must not be held to avoid a deadlock with 46228c2ecf20Sopenharmony_ci * rbd_dev_refresh() when flushing notifies. 46238c2ecf20Sopenharmony_ci */ 46248c2ecf20Sopenharmony_cistatic void rbd_unregister_watch(struct rbd_device *rbd_dev) 46258c2ecf20Sopenharmony_ci{ 46268c2ecf20Sopenharmony_ci cancel_tasks_sync(rbd_dev); 46278c2ecf20Sopenharmony_ci 46288c2ecf20Sopenharmony_ci mutex_lock(&rbd_dev->watch_mutex); 46298c2ecf20Sopenharmony_ci if (rbd_dev->watch_state == RBD_WATCH_STATE_REGISTERED) 46308c2ecf20Sopenharmony_ci __rbd_unregister_watch(rbd_dev); 46318c2ecf20Sopenharmony_ci rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED; 46328c2ecf20Sopenharmony_ci mutex_unlock(&rbd_dev->watch_mutex); 46338c2ecf20Sopenharmony_ci 46348c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&rbd_dev->watch_dwork); 46358c2ecf20Sopenharmony_ci ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc); 46368c2ecf20Sopenharmony_ci} 46378c2ecf20Sopenharmony_ci 46388c2ecf20Sopenharmony_ci/* 46398c2ecf20Sopenharmony_ci * lock_rwsem must be held for write 46408c2ecf20Sopenharmony_ci */ 46418c2ecf20Sopenharmony_cistatic void rbd_reacquire_lock(struct rbd_device *rbd_dev) 46428c2ecf20Sopenharmony_ci{ 46438c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 46448c2ecf20Sopenharmony_ci char cookie[32]; 46458c2ecf20Sopenharmony_ci int ret; 46468c2ecf20Sopenharmony_ci 46478c2ecf20Sopenharmony_ci if (!rbd_quiesce_lock(rbd_dev)) 46488c2ecf20Sopenharmony_ci return; 46498c2ecf20Sopenharmony_ci 46508c2ecf20Sopenharmony_ci format_lock_cookie(rbd_dev, cookie); 46518c2ecf20Sopenharmony_ci ret = ceph_cls_set_cookie(osdc, &rbd_dev->header_oid, 46528c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, RBD_LOCK_NAME, 46538c2ecf20Sopenharmony_ci CEPH_CLS_LOCK_EXCLUSIVE, rbd_dev->lock_cookie, 46548c2ecf20Sopenharmony_ci RBD_LOCK_TAG, cookie); 46558c2ecf20Sopenharmony_ci if (ret) { 46568c2ecf20Sopenharmony_ci if (ret != -EOPNOTSUPP) 46578c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to update lock cookie: %d", 46588c2ecf20Sopenharmony_ci ret); 46598c2ecf20Sopenharmony_ci 46608c2ecf20Sopenharmony_ci /* 46618c2ecf20Sopenharmony_ci * Lock cookie cannot be updated on older OSDs, so do 46628c2ecf20Sopenharmony_ci * a manual release and queue an acquire. 46638c2ecf20Sopenharmony_ci */ 46648c2ecf20Sopenharmony_ci __rbd_release_lock(rbd_dev); 46658c2ecf20Sopenharmony_ci queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); 46668c2ecf20Sopenharmony_ci } else { 46678c2ecf20Sopenharmony_ci __rbd_lock(rbd_dev, cookie); 46688c2ecf20Sopenharmony_ci wake_lock_waiters(rbd_dev, 0); 46698c2ecf20Sopenharmony_ci } 46708c2ecf20Sopenharmony_ci} 46718c2ecf20Sopenharmony_ci 46728c2ecf20Sopenharmony_cistatic void rbd_reregister_watch(struct work_struct *work) 46738c2ecf20Sopenharmony_ci{ 46748c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = container_of(to_delayed_work(work), 46758c2ecf20Sopenharmony_ci struct rbd_device, watch_dwork); 46768c2ecf20Sopenharmony_ci int ret; 46778c2ecf20Sopenharmony_ci 46788c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 46798c2ecf20Sopenharmony_ci 46808c2ecf20Sopenharmony_ci mutex_lock(&rbd_dev->watch_mutex); 46818c2ecf20Sopenharmony_ci if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) { 46828c2ecf20Sopenharmony_ci mutex_unlock(&rbd_dev->watch_mutex); 46838c2ecf20Sopenharmony_ci return; 46848c2ecf20Sopenharmony_ci } 46858c2ecf20Sopenharmony_ci 46868c2ecf20Sopenharmony_ci ret = __rbd_register_watch(rbd_dev); 46878c2ecf20Sopenharmony_ci if (ret) { 46888c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to reregister watch: %d", ret); 46898c2ecf20Sopenharmony_ci if (ret != -EBLOCKLISTED && ret != -ENOENT) { 46908c2ecf20Sopenharmony_ci queue_delayed_work(rbd_dev->task_wq, 46918c2ecf20Sopenharmony_ci &rbd_dev->watch_dwork, 46928c2ecf20Sopenharmony_ci RBD_RETRY_DELAY); 46938c2ecf20Sopenharmony_ci mutex_unlock(&rbd_dev->watch_mutex); 46948c2ecf20Sopenharmony_ci return; 46958c2ecf20Sopenharmony_ci } 46968c2ecf20Sopenharmony_ci 46978c2ecf20Sopenharmony_ci mutex_unlock(&rbd_dev->watch_mutex); 46988c2ecf20Sopenharmony_ci down_write(&rbd_dev->lock_rwsem); 46998c2ecf20Sopenharmony_ci wake_lock_waiters(rbd_dev, ret); 47008c2ecf20Sopenharmony_ci up_write(&rbd_dev->lock_rwsem); 47018c2ecf20Sopenharmony_ci return; 47028c2ecf20Sopenharmony_ci } 47038c2ecf20Sopenharmony_ci 47048c2ecf20Sopenharmony_ci rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED; 47058c2ecf20Sopenharmony_ci rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id; 47068c2ecf20Sopenharmony_ci mutex_unlock(&rbd_dev->watch_mutex); 47078c2ecf20Sopenharmony_ci 47088c2ecf20Sopenharmony_ci down_write(&rbd_dev->lock_rwsem); 47098c2ecf20Sopenharmony_ci if (rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) 47108c2ecf20Sopenharmony_ci rbd_reacquire_lock(rbd_dev); 47118c2ecf20Sopenharmony_ci up_write(&rbd_dev->lock_rwsem); 47128c2ecf20Sopenharmony_ci 47138c2ecf20Sopenharmony_ci ret = rbd_dev_refresh(rbd_dev); 47148c2ecf20Sopenharmony_ci if (ret) 47158c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "reregistration refresh failed: %d", ret); 47168c2ecf20Sopenharmony_ci} 47178c2ecf20Sopenharmony_ci 47188c2ecf20Sopenharmony_ci/* 47198c2ecf20Sopenharmony_ci * Synchronous osd object method call. Returns the number of bytes 47208c2ecf20Sopenharmony_ci * returned in the outbound buffer, or a negative error code. 47218c2ecf20Sopenharmony_ci */ 47228c2ecf20Sopenharmony_cistatic int rbd_obj_method_sync(struct rbd_device *rbd_dev, 47238c2ecf20Sopenharmony_ci struct ceph_object_id *oid, 47248c2ecf20Sopenharmony_ci struct ceph_object_locator *oloc, 47258c2ecf20Sopenharmony_ci const char *method_name, 47268c2ecf20Sopenharmony_ci const void *outbound, 47278c2ecf20Sopenharmony_ci size_t outbound_size, 47288c2ecf20Sopenharmony_ci void *inbound, 47298c2ecf20Sopenharmony_ci size_t inbound_size) 47308c2ecf20Sopenharmony_ci{ 47318c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 47328c2ecf20Sopenharmony_ci struct page *req_page = NULL; 47338c2ecf20Sopenharmony_ci struct page *reply_page; 47348c2ecf20Sopenharmony_ci int ret; 47358c2ecf20Sopenharmony_ci 47368c2ecf20Sopenharmony_ci /* 47378c2ecf20Sopenharmony_ci * Method calls are ultimately read operations. The result 47388c2ecf20Sopenharmony_ci * should placed into the inbound buffer provided. They 47398c2ecf20Sopenharmony_ci * also supply outbound data--parameters for the object 47408c2ecf20Sopenharmony_ci * method. Currently if this is present it will be a 47418c2ecf20Sopenharmony_ci * snapshot id. 47428c2ecf20Sopenharmony_ci */ 47438c2ecf20Sopenharmony_ci if (outbound) { 47448c2ecf20Sopenharmony_ci if (outbound_size > PAGE_SIZE) 47458c2ecf20Sopenharmony_ci return -E2BIG; 47468c2ecf20Sopenharmony_ci 47478c2ecf20Sopenharmony_ci req_page = alloc_page(GFP_KERNEL); 47488c2ecf20Sopenharmony_ci if (!req_page) 47498c2ecf20Sopenharmony_ci return -ENOMEM; 47508c2ecf20Sopenharmony_ci 47518c2ecf20Sopenharmony_ci memcpy(page_address(req_page), outbound, outbound_size); 47528c2ecf20Sopenharmony_ci } 47538c2ecf20Sopenharmony_ci 47548c2ecf20Sopenharmony_ci reply_page = alloc_page(GFP_KERNEL); 47558c2ecf20Sopenharmony_ci if (!reply_page) { 47568c2ecf20Sopenharmony_ci if (req_page) 47578c2ecf20Sopenharmony_ci __free_page(req_page); 47588c2ecf20Sopenharmony_ci return -ENOMEM; 47598c2ecf20Sopenharmony_ci } 47608c2ecf20Sopenharmony_ci 47618c2ecf20Sopenharmony_ci ret = ceph_osdc_call(osdc, oid, oloc, RBD_DRV_NAME, method_name, 47628c2ecf20Sopenharmony_ci CEPH_OSD_FLAG_READ, req_page, outbound_size, 47638c2ecf20Sopenharmony_ci &reply_page, &inbound_size); 47648c2ecf20Sopenharmony_ci if (!ret) { 47658c2ecf20Sopenharmony_ci memcpy(inbound, page_address(reply_page), inbound_size); 47668c2ecf20Sopenharmony_ci ret = inbound_size; 47678c2ecf20Sopenharmony_ci } 47688c2ecf20Sopenharmony_ci 47698c2ecf20Sopenharmony_ci if (req_page) 47708c2ecf20Sopenharmony_ci __free_page(req_page); 47718c2ecf20Sopenharmony_ci __free_page(reply_page); 47728c2ecf20Sopenharmony_ci return ret; 47738c2ecf20Sopenharmony_ci} 47748c2ecf20Sopenharmony_ci 47758c2ecf20Sopenharmony_cistatic void rbd_queue_workfn(struct work_struct *work) 47768c2ecf20Sopenharmony_ci{ 47778c2ecf20Sopenharmony_ci struct rbd_img_request *img_request = 47788c2ecf20Sopenharmony_ci container_of(work, struct rbd_img_request, work); 47798c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = img_request->rbd_dev; 47808c2ecf20Sopenharmony_ci enum obj_operation_type op_type = img_request->op_type; 47818c2ecf20Sopenharmony_ci struct request *rq = blk_mq_rq_from_pdu(img_request); 47828c2ecf20Sopenharmony_ci u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; 47838c2ecf20Sopenharmony_ci u64 length = blk_rq_bytes(rq); 47848c2ecf20Sopenharmony_ci u64 mapping_size; 47858c2ecf20Sopenharmony_ci int result; 47868c2ecf20Sopenharmony_ci 47878c2ecf20Sopenharmony_ci /* Ignore/skip any zero-length requests */ 47888c2ecf20Sopenharmony_ci if (!length) { 47898c2ecf20Sopenharmony_ci dout("%s: zero-length request\n", __func__); 47908c2ecf20Sopenharmony_ci result = 0; 47918c2ecf20Sopenharmony_ci goto err_img_request; 47928c2ecf20Sopenharmony_ci } 47938c2ecf20Sopenharmony_ci 47948c2ecf20Sopenharmony_ci blk_mq_start_request(rq); 47958c2ecf20Sopenharmony_ci 47968c2ecf20Sopenharmony_ci down_read(&rbd_dev->header_rwsem); 47978c2ecf20Sopenharmony_ci mapping_size = rbd_dev->mapping.size; 47988c2ecf20Sopenharmony_ci rbd_img_capture_header(img_request); 47998c2ecf20Sopenharmony_ci up_read(&rbd_dev->header_rwsem); 48008c2ecf20Sopenharmony_ci 48018c2ecf20Sopenharmony_ci if (offset + length > mapping_size) { 48028c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset, 48038c2ecf20Sopenharmony_ci length, mapping_size); 48048c2ecf20Sopenharmony_ci result = -EIO; 48058c2ecf20Sopenharmony_ci goto err_img_request; 48068c2ecf20Sopenharmony_ci } 48078c2ecf20Sopenharmony_ci 48088c2ecf20Sopenharmony_ci dout("%s rbd_dev %p img_req %p %s %llu~%llu\n", __func__, rbd_dev, 48098c2ecf20Sopenharmony_ci img_request, obj_op_name(op_type), offset, length); 48108c2ecf20Sopenharmony_ci 48118c2ecf20Sopenharmony_ci if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_ZEROOUT) 48128c2ecf20Sopenharmony_ci result = rbd_img_fill_nodata(img_request, offset, length); 48138c2ecf20Sopenharmony_ci else 48148c2ecf20Sopenharmony_ci result = rbd_img_fill_from_bio(img_request, offset, length, 48158c2ecf20Sopenharmony_ci rq->bio); 48168c2ecf20Sopenharmony_ci if (result) 48178c2ecf20Sopenharmony_ci goto err_img_request; 48188c2ecf20Sopenharmony_ci 48198c2ecf20Sopenharmony_ci rbd_img_handle_request(img_request, 0); 48208c2ecf20Sopenharmony_ci return; 48218c2ecf20Sopenharmony_ci 48228c2ecf20Sopenharmony_cierr_img_request: 48238c2ecf20Sopenharmony_ci rbd_img_request_destroy(img_request); 48248c2ecf20Sopenharmony_ci if (result) 48258c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "%s %llx at %llx result %d", 48268c2ecf20Sopenharmony_ci obj_op_name(op_type), length, offset, result); 48278c2ecf20Sopenharmony_ci blk_mq_end_request(rq, errno_to_blk_status(result)); 48288c2ecf20Sopenharmony_ci} 48298c2ecf20Sopenharmony_ci 48308c2ecf20Sopenharmony_cistatic blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx, 48318c2ecf20Sopenharmony_ci const struct blk_mq_queue_data *bd) 48328c2ecf20Sopenharmony_ci{ 48338c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = hctx->queue->queuedata; 48348c2ecf20Sopenharmony_ci struct rbd_img_request *img_req = blk_mq_rq_to_pdu(bd->rq); 48358c2ecf20Sopenharmony_ci enum obj_operation_type op_type; 48368c2ecf20Sopenharmony_ci 48378c2ecf20Sopenharmony_ci switch (req_op(bd->rq)) { 48388c2ecf20Sopenharmony_ci case REQ_OP_DISCARD: 48398c2ecf20Sopenharmony_ci op_type = OBJ_OP_DISCARD; 48408c2ecf20Sopenharmony_ci break; 48418c2ecf20Sopenharmony_ci case REQ_OP_WRITE_ZEROES: 48428c2ecf20Sopenharmony_ci op_type = OBJ_OP_ZEROOUT; 48438c2ecf20Sopenharmony_ci break; 48448c2ecf20Sopenharmony_ci case REQ_OP_WRITE: 48458c2ecf20Sopenharmony_ci op_type = OBJ_OP_WRITE; 48468c2ecf20Sopenharmony_ci break; 48478c2ecf20Sopenharmony_ci case REQ_OP_READ: 48488c2ecf20Sopenharmony_ci op_type = OBJ_OP_READ; 48498c2ecf20Sopenharmony_ci break; 48508c2ecf20Sopenharmony_ci default: 48518c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "unknown req_op %d", req_op(bd->rq)); 48528c2ecf20Sopenharmony_ci return BLK_STS_IOERR; 48538c2ecf20Sopenharmony_ci } 48548c2ecf20Sopenharmony_ci 48558c2ecf20Sopenharmony_ci rbd_img_request_init(img_req, rbd_dev, op_type); 48568c2ecf20Sopenharmony_ci 48578c2ecf20Sopenharmony_ci if (rbd_img_is_write(img_req)) { 48588c2ecf20Sopenharmony_ci if (rbd_is_ro(rbd_dev)) { 48598c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "%s on read-only mapping", 48608c2ecf20Sopenharmony_ci obj_op_name(img_req->op_type)); 48618c2ecf20Sopenharmony_ci return BLK_STS_IOERR; 48628c2ecf20Sopenharmony_ci } 48638c2ecf20Sopenharmony_ci rbd_assert(!rbd_is_snap(rbd_dev)); 48648c2ecf20Sopenharmony_ci } 48658c2ecf20Sopenharmony_ci 48668c2ecf20Sopenharmony_ci INIT_WORK(&img_req->work, rbd_queue_workfn); 48678c2ecf20Sopenharmony_ci queue_work(rbd_wq, &img_req->work); 48688c2ecf20Sopenharmony_ci return BLK_STS_OK; 48698c2ecf20Sopenharmony_ci} 48708c2ecf20Sopenharmony_ci 48718c2ecf20Sopenharmony_cistatic void rbd_free_disk(struct rbd_device *rbd_dev) 48728c2ecf20Sopenharmony_ci{ 48738c2ecf20Sopenharmony_ci blk_cleanup_queue(rbd_dev->disk->queue); 48748c2ecf20Sopenharmony_ci blk_mq_free_tag_set(&rbd_dev->tag_set); 48758c2ecf20Sopenharmony_ci put_disk(rbd_dev->disk); 48768c2ecf20Sopenharmony_ci rbd_dev->disk = NULL; 48778c2ecf20Sopenharmony_ci} 48788c2ecf20Sopenharmony_ci 48798c2ecf20Sopenharmony_cistatic int rbd_obj_read_sync(struct rbd_device *rbd_dev, 48808c2ecf20Sopenharmony_ci struct ceph_object_id *oid, 48818c2ecf20Sopenharmony_ci struct ceph_object_locator *oloc, 48828c2ecf20Sopenharmony_ci void *buf, int buf_len) 48838c2ecf20Sopenharmony_ci 48848c2ecf20Sopenharmony_ci{ 48858c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 48868c2ecf20Sopenharmony_ci struct ceph_osd_request *req; 48878c2ecf20Sopenharmony_ci struct page **pages; 48888c2ecf20Sopenharmony_ci int num_pages = calc_pages_for(0, buf_len); 48898c2ecf20Sopenharmony_ci int ret; 48908c2ecf20Sopenharmony_ci 48918c2ecf20Sopenharmony_ci req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL); 48928c2ecf20Sopenharmony_ci if (!req) 48938c2ecf20Sopenharmony_ci return -ENOMEM; 48948c2ecf20Sopenharmony_ci 48958c2ecf20Sopenharmony_ci ceph_oid_copy(&req->r_base_oid, oid); 48968c2ecf20Sopenharmony_ci ceph_oloc_copy(&req->r_base_oloc, oloc); 48978c2ecf20Sopenharmony_ci req->r_flags = CEPH_OSD_FLAG_READ; 48988c2ecf20Sopenharmony_ci 48998c2ecf20Sopenharmony_ci pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); 49008c2ecf20Sopenharmony_ci if (IS_ERR(pages)) { 49018c2ecf20Sopenharmony_ci ret = PTR_ERR(pages); 49028c2ecf20Sopenharmony_ci goto out_req; 49038c2ecf20Sopenharmony_ci } 49048c2ecf20Sopenharmony_ci 49058c2ecf20Sopenharmony_ci osd_req_op_extent_init(req, 0, CEPH_OSD_OP_READ, 0, buf_len, 0, 0); 49068c2ecf20Sopenharmony_ci osd_req_op_extent_osd_data_pages(req, 0, pages, buf_len, 0, false, 49078c2ecf20Sopenharmony_ci true); 49088c2ecf20Sopenharmony_ci 49098c2ecf20Sopenharmony_ci ret = ceph_osdc_alloc_messages(req, GFP_KERNEL); 49108c2ecf20Sopenharmony_ci if (ret) 49118c2ecf20Sopenharmony_ci goto out_req; 49128c2ecf20Sopenharmony_ci 49138c2ecf20Sopenharmony_ci ceph_osdc_start_request(osdc, req, false); 49148c2ecf20Sopenharmony_ci ret = ceph_osdc_wait_request(osdc, req); 49158c2ecf20Sopenharmony_ci if (ret >= 0) 49168c2ecf20Sopenharmony_ci ceph_copy_from_page_vector(pages, buf, 0, ret); 49178c2ecf20Sopenharmony_ci 49188c2ecf20Sopenharmony_ciout_req: 49198c2ecf20Sopenharmony_ci ceph_osdc_put_request(req); 49208c2ecf20Sopenharmony_ci return ret; 49218c2ecf20Sopenharmony_ci} 49228c2ecf20Sopenharmony_ci 49238c2ecf20Sopenharmony_ci/* 49248c2ecf20Sopenharmony_ci * Read the complete header for the given rbd device. On successful 49258c2ecf20Sopenharmony_ci * return, the rbd_dev->header field will contain up-to-date 49268c2ecf20Sopenharmony_ci * information about the image. 49278c2ecf20Sopenharmony_ci */ 49288c2ecf20Sopenharmony_cistatic int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, 49298c2ecf20Sopenharmony_ci struct rbd_image_header *header, 49308c2ecf20Sopenharmony_ci bool first_time) 49318c2ecf20Sopenharmony_ci{ 49328c2ecf20Sopenharmony_ci struct rbd_image_header_ondisk *ondisk = NULL; 49338c2ecf20Sopenharmony_ci u32 snap_count = 0; 49348c2ecf20Sopenharmony_ci u64 names_size = 0; 49358c2ecf20Sopenharmony_ci u32 want_count; 49368c2ecf20Sopenharmony_ci int ret; 49378c2ecf20Sopenharmony_ci 49388c2ecf20Sopenharmony_ci /* 49398c2ecf20Sopenharmony_ci * The complete header will include an array of its 64-bit 49408c2ecf20Sopenharmony_ci * snapshot ids, followed by the names of those snapshots as 49418c2ecf20Sopenharmony_ci * a contiguous block of NUL-terminated strings. Note that 49428c2ecf20Sopenharmony_ci * the number of snapshots could change by the time we read 49438c2ecf20Sopenharmony_ci * it in, in which case we re-read it. 49448c2ecf20Sopenharmony_ci */ 49458c2ecf20Sopenharmony_ci do { 49468c2ecf20Sopenharmony_ci size_t size; 49478c2ecf20Sopenharmony_ci 49488c2ecf20Sopenharmony_ci kfree(ondisk); 49498c2ecf20Sopenharmony_ci 49508c2ecf20Sopenharmony_ci size = sizeof (*ondisk); 49518c2ecf20Sopenharmony_ci size += snap_count * sizeof (struct rbd_image_snap_ondisk); 49528c2ecf20Sopenharmony_ci size += names_size; 49538c2ecf20Sopenharmony_ci ondisk = kmalloc(size, GFP_KERNEL); 49548c2ecf20Sopenharmony_ci if (!ondisk) 49558c2ecf20Sopenharmony_ci return -ENOMEM; 49568c2ecf20Sopenharmony_ci 49578c2ecf20Sopenharmony_ci ret = rbd_obj_read_sync(rbd_dev, &rbd_dev->header_oid, 49588c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, ondisk, size); 49598c2ecf20Sopenharmony_ci if (ret < 0) 49608c2ecf20Sopenharmony_ci goto out; 49618c2ecf20Sopenharmony_ci if ((size_t)ret < size) { 49628c2ecf20Sopenharmony_ci ret = -ENXIO; 49638c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "short header read (want %zd got %d)", 49648c2ecf20Sopenharmony_ci size, ret); 49658c2ecf20Sopenharmony_ci goto out; 49668c2ecf20Sopenharmony_ci } 49678c2ecf20Sopenharmony_ci if (!rbd_dev_ondisk_valid(ondisk)) { 49688c2ecf20Sopenharmony_ci ret = -ENXIO; 49698c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "invalid header"); 49708c2ecf20Sopenharmony_ci goto out; 49718c2ecf20Sopenharmony_ci } 49728c2ecf20Sopenharmony_ci 49738c2ecf20Sopenharmony_ci names_size = le64_to_cpu(ondisk->snap_names_len); 49748c2ecf20Sopenharmony_ci want_count = snap_count; 49758c2ecf20Sopenharmony_ci snap_count = le32_to_cpu(ondisk->snap_count); 49768c2ecf20Sopenharmony_ci } while (snap_count != want_count); 49778c2ecf20Sopenharmony_ci 49788c2ecf20Sopenharmony_ci ret = rbd_header_from_disk(header, ondisk, first_time); 49798c2ecf20Sopenharmony_ciout: 49808c2ecf20Sopenharmony_ci kfree(ondisk); 49818c2ecf20Sopenharmony_ci 49828c2ecf20Sopenharmony_ci return ret; 49838c2ecf20Sopenharmony_ci} 49848c2ecf20Sopenharmony_ci 49858c2ecf20Sopenharmony_cistatic void rbd_dev_update_size(struct rbd_device *rbd_dev) 49868c2ecf20Sopenharmony_ci{ 49878c2ecf20Sopenharmony_ci sector_t size; 49888c2ecf20Sopenharmony_ci 49898c2ecf20Sopenharmony_ci /* 49908c2ecf20Sopenharmony_ci * If EXISTS is not set, rbd_dev->disk may be NULL, so don't 49918c2ecf20Sopenharmony_ci * try to update its size. If REMOVING is set, updating size 49928c2ecf20Sopenharmony_ci * is just useless work since the device can't be opened. 49938c2ecf20Sopenharmony_ci */ 49948c2ecf20Sopenharmony_ci if (test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags) && 49958c2ecf20Sopenharmony_ci !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) { 49968c2ecf20Sopenharmony_ci size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; 49978c2ecf20Sopenharmony_ci dout("setting size to %llu sectors", (unsigned long long)size); 49988c2ecf20Sopenharmony_ci set_capacity(rbd_dev->disk, size); 49998c2ecf20Sopenharmony_ci revalidate_disk_size(rbd_dev->disk, true); 50008c2ecf20Sopenharmony_ci } 50018c2ecf20Sopenharmony_ci} 50028c2ecf20Sopenharmony_ci 50038c2ecf20Sopenharmony_cistatic const struct blk_mq_ops rbd_mq_ops = { 50048c2ecf20Sopenharmony_ci .queue_rq = rbd_queue_rq, 50058c2ecf20Sopenharmony_ci}; 50068c2ecf20Sopenharmony_ci 50078c2ecf20Sopenharmony_cistatic int rbd_init_disk(struct rbd_device *rbd_dev) 50088c2ecf20Sopenharmony_ci{ 50098c2ecf20Sopenharmony_ci struct gendisk *disk; 50108c2ecf20Sopenharmony_ci struct request_queue *q; 50118c2ecf20Sopenharmony_ci unsigned int objset_bytes = 50128c2ecf20Sopenharmony_ci rbd_dev->layout.object_size * rbd_dev->layout.stripe_count; 50138c2ecf20Sopenharmony_ci int err; 50148c2ecf20Sopenharmony_ci 50158c2ecf20Sopenharmony_ci /* create gendisk info */ 50168c2ecf20Sopenharmony_ci disk = alloc_disk(single_major ? 50178c2ecf20Sopenharmony_ci (1 << RBD_SINGLE_MAJOR_PART_SHIFT) : 50188c2ecf20Sopenharmony_ci RBD_MINORS_PER_MAJOR); 50198c2ecf20Sopenharmony_ci if (!disk) 50208c2ecf20Sopenharmony_ci return -ENOMEM; 50218c2ecf20Sopenharmony_ci 50228c2ecf20Sopenharmony_ci snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d", 50238c2ecf20Sopenharmony_ci rbd_dev->dev_id); 50248c2ecf20Sopenharmony_ci disk->major = rbd_dev->major; 50258c2ecf20Sopenharmony_ci disk->first_minor = rbd_dev->minor; 50268c2ecf20Sopenharmony_ci if (single_major) 50278c2ecf20Sopenharmony_ci disk->flags |= GENHD_FL_EXT_DEVT; 50288c2ecf20Sopenharmony_ci disk->fops = &rbd_bd_ops; 50298c2ecf20Sopenharmony_ci disk->private_data = rbd_dev; 50308c2ecf20Sopenharmony_ci 50318c2ecf20Sopenharmony_ci memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); 50328c2ecf20Sopenharmony_ci rbd_dev->tag_set.ops = &rbd_mq_ops; 50338c2ecf20Sopenharmony_ci rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth; 50348c2ecf20Sopenharmony_ci rbd_dev->tag_set.numa_node = NUMA_NO_NODE; 50358c2ecf20Sopenharmony_ci rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 50368c2ecf20Sopenharmony_ci rbd_dev->tag_set.nr_hw_queues = num_present_cpus(); 50378c2ecf20Sopenharmony_ci rbd_dev->tag_set.cmd_size = sizeof(struct rbd_img_request); 50388c2ecf20Sopenharmony_ci 50398c2ecf20Sopenharmony_ci err = blk_mq_alloc_tag_set(&rbd_dev->tag_set); 50408c2ecf20Sopenharmony_ci if (err) 50418c2ecf20Sopenharmony_ci goto out_disk; 50428c2ecf20Sopenharmony_ci 50438c2ecf20Sopenharmony_ci q = blk_mq_init_queue(&rbd_dev->tag_set); 50448c2ecf20Sopenharmony_ci if (IS_ERR(q)) { 50458c2ecf20Sopenharmony_ci err = PTR_ERR(q); 50468c2ecf20Sopenharmony_ci goto out_tag_set; 50478c2ecf20Sopenharmony_ci } 50488c2ecf20Sopenharmony_ci 50498c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_NONROT, q); 50508c2ecf20Sopenharmony_ci /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ 50518c2ecf20Sopenharmony_ci 50528c2ecf20Sopenharmony_ci blk_queue_max_hw_sectors(q, objset_bytes >> SECTOR_SHIFT); 50538c2ecf20Sopenharmony_ci q->limits.max_sectors = queue_max_hw_sectors(q); 50548c2ecf20Sopenharmony_ci blk_queue_max_segments(q, USHRT_MAX); 50558c2ecf20Sopenharmony_ci blk_queue_max_segment_size(q, UINT_MAX); 50568c2ecf20Sopenharmony_ci blk_queue_io_min(q, rbd_dev->opts->alloc_size); 50578c2ecf20Sopenharmony_ci blk_queue_io_opt(q, rbd_dev->opts->alloc_size); 50588c2ecf20Sopenharmony_ci 50598c2ecf20Sopenharmony_ci if (rbd_dev->opts->trim) { 50608c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 50618c2ecf20Sopenharmony_ci q->limits.discard_granularity = rbd_dev->opts->alloc_size; 50628c2ecf20Sopenharmony_ci blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); 50638c2ecf20Sopenharmony_ci blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); 50648c2ecf20Sopenharmony_ci } 50658c2ecf20Sopenharmony_ci 50668c2ecf20Sopenharmony_ci if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) 50678c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); 50688c2ecf20Sopenharmony_ci 50698c2ecf20Sopenharmony_ci /* 50708c2ecf20Sopenharmony_ci * disk_release() expects a queue ref from add_disk() and will 50718c2ecf20Sopenharmony_ci * put it. Hold an extra ref until add_disk() is called. 50728c2ecf20Sopenharmony_ci */ 50738c2ecf20Sopenharmony_ci WARN_ON(!blk_get_queue(q)); 50748c2ecf20Sopenharmony_ci disk->queue = q; 50758c2ecf20Sopenharmony_ci q->queuedata = rbd_dev; 50768c2ecf20Sopenharmony_ci 50778c2ecf20Sopenharmony_ci rbd_dev->disk = disk; 50788c2ecf20Sopenharmony_ci 50798c2ecf20Sopenharmony_ci return 0; 50808c2ecf20Sopenharmony_ciout_tag_set: 50818c2ecf20Sopenharmony_ci blk_mq_free_tag_set(&rbd_dev->tag_set); 50828c2ecf20Sopenharmony_ciout_disk: 50838c2ecf20Sopenharmony_ci put_disk(disk); 50848c2ecf20Sopenharmony_ci return err; 50858c2ecf20Sopenharmony_ci} 50868c2ecf20Sopenharmony_ci 50878c2ecf20Sopenharmony_ci/* 50888c2ecf20Sopenharmony_ci sysfs 50898c2ecf20Sopenharmony_ci*/ 50908c2ecf20Sopenharmony_ci 50918c2ecf20Sopenharmony_cistatic struct rbd_device *dev_to_rbd_dev(struct device *dev) 50928c2ecf20Sopenharmony_ci{ 50938c2ecf20Sopenharmony_ci return container_of(dev, struct rbd_device, dev); 50948c2ecf20Sopenharmony_ci} 50958c2ecf20Sopenharmony_ci 50968c2ecf20Sopenharmony_cistatic ssize_t rbd_size_show(struct device *dev, 50978c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 50988c2ecf20Sopenharmony_ci{ 50998c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51008c2ecf20Sopenharmony_ci 51018c2ecf20Sopenharmony_ci return sprintf(buf, "%llu\n", 51028c2ecf20Sopenharmony_ci (unsigned long long)rbd_dev->mapping.size); 51038c2ecf20Sopenharmony_ci} 51048c2ecf20Sopenharmony_ci 51058c2ecf20Sopenharmony_cistatic ssize_t rbd_features_show(struct device *dev, 51068c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51078c2ecf20Sopenharmony_ci{ 51088c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51098c2ecf20Sopenharmony_ci 51108c2ecf20Sopenharmony_ci return sprintf(buf, "0x%016llx\n", rbd_dev->header.features); 51118c2ecf20Sopenharmony_ci} 51128c2ecf20Sopenharmony_ci 51138c2ecf20Sopenharmony_cistatic ssize_t rbd_major_show(struct device *dev, 51148c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51158c2ecf20Sopenharmony_ci{ 51168c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51178c2ecf20Sopenharmony_ci 51188c2ecf20Sopenharmony_ci if (rbd_dev->major) 51198c2ecf20Sopenharmony_ci return sprintf(buf, "%d\n", rbd_dev->major); 51208c2ecf20Sopenharmony_ci 51218c2ecf20Sopenharmony_ci return sprintf(buf, "(none)\n"); 51228c2ecf20Sopenharmony_ci} 51238c2ecf20Sopenharmony_ci 51248c2ecf20Sopenharmony_cistatic ssize_t rbd_minor_show(struct device *dev, 51258c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51268c2ecf20Sopenharmony_ci{ 51278c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51288c2ecf20Sopenharmony_ci 51298c2ecf20Sopenharmony_ci return sprintf(buf, "%d\n", rbd_dev->minor); 51308c2ecf20Sopenharmony_ci} 51318c2ecf20Sopenharmony_ci 51328c2ecf20Sopenharmony_cistatic ssize_t rbd_client_addr_show(struct device *dev, 51338c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51348c2ecf20Sopenharmony_ci{ 51358c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51368c2ecf20Sopenharmony_ci struct ceph_entity_addr *client_addr = 51378c2ecf20Sopenharmony_ci ceph_client_addr(rbd_dev->rbd_client->client); 51388c2ecf20Sopenharmony_ci 51398c2ecf20Sopenharmony_ci return sprintf(buf, "%pISpc/%u\n", &client_addr->in_addr, 51408c2ecf20Sopenharmony_ci le32_to_cpu(client_addr->nonce)); 51418c2ecf20Sopenharmony_ci} 51428c2ecf20Sopenharmony_ci 51438c2ecf20Sopenharmony_cistatic ssize_t rbd_client_id_show(struct device *dev, 51448c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51458c2ecf20Sopenharmony_ci{ 51468c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51478c2ecf20Sopenharmony_ci 51488c2ecf20Sopenharmony_ci return sprintf(buf, "client%lld\n", 51498c2ecf20Sopenharmony_ci ceph_client_gid(rbd_dev->rbd_client->client)); 51508c2ecf20Sopenharmony_ci} 51518c2ecf20Sopenharmony_ci 51528c2ecf20Sopenharmony_cistatic ssize_t rbd_cluster_fsid_show(struct device *dev, 51538c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51548c2ecf20Sopenharmony_ci{ 51558c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51568c2ecf20Sopenharmony_ci 51578c2ecf20Sopenharmony_ci return sprintf(buf, "%pU\n", &rbd_dev->rbd_client->client->fsid); 51588c2ecf20Sopenharmony_ci} 51598c2ecf20Sopenharmony_ci 51608c2ecf20Sopenharmony_cistatic ssize_t rbd_config_info_show(struct device *dev, 51618c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51628c2ecf20Sopenharmony_ci{ 51638c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51648c2ecf20Sopenharmony_ci 51658c2ecf20Sopenharmony_ci if (!capable(CAP_SYS_ADMIN)) 51668c2ecf20Sopenharmony_ci return -EPERM; 51678c2ecf20Sopenharmony_ci 51688c2ecf20Sopenharmony_ci return sprintf(buf, "%s\n", rbd_dev->config_info); 51698c2ecf20Sopenharmony_ci} 51708c2ecf20Sopenharmony_ci 51718c2ecf20Sopenharmony_cistatic ssize_t rbd_pool_show(struct device *dev, 51728c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51738c2ecf20Sopenharmony_ci{ 51748c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51758c2ecf20Sopenharmony_ci 51768c2ecf20Sopenharmony_ci return sprintf(buf, "%s\n", rbd_dev->spec->pool_name); 51778c2ecf20Sopenharmony_ci} 51788c2ecf20Sopenharmony_ci 51798c2ecf20Sopenharmony_cistatic ssize_t rbd_pool_id_show(struct device *dev, 51808c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51818c2ecf20Sopenharmony_ci{ 51828c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51838c2ecf20Sopenharmony_ci 51848c2ecf20Sopenharmony_ci return sprintf(buf, "%llu\n", 51858c2ecf20Sopenharmony_ci (unsigned long long) rbd_dev->spec->pool_id); 51868c2ecf20Sopenharmony_ci} 51878c2ecf20Sopenharmony_ci 51888c2ecf20Sopenharmony_cistatic ssize_t rbd_pool_ns_show(struct device *dev, 51898c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51908c2ecf20Sopenharmony_ci{ 51918c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 51928c2ecf20Sopenharmony_ci 51938c2ecf20Sopenharmony_ci return sprintf(buf, "%s\n", rbd_dev->spec->pool_ns ?: ""); 51948c2ecf20Sopenharmony_ci} 51958c2ecf20Sopenharmony_ci 51968c2ecf20Sopenharmony_cistatic ssize_t rbd_name_show(struct device *dev, 51978c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 51988c2ecf20Sopenharmony_ci{ 51998c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 52008c2ecf20Sopenharmony_ci 52018c2ecf20Sopenharmony_ci if (rbd_dev->spec->image_name) 52028c2ecf20Sopenharmony_ci return sprintf(buf, "%s\n", rbd_dev->spec->image_name); 52038c2ecf20Sopenharmony_ci 52048c2ecf20Sopenharmony_ci return sprintf(buf, "(unknown)\n"); 52058c2ecf20Sopenharmony_ci} 52068c2ecf20Sopenharmony_ci 52078c2ecf20Sopenharmony_cistatic ssize_t rbd_image_id_show(struct device *dev, 52088c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 52098c2ecf20Sopenharmony_ci{ 52108c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 52118c2ecf20Sopenharmony_ci 52128c2ecf20Sopenharmony_ci return sprintf(buf, "%s\n", rbd_dev->spec->image_id); 52138c2ecf20Sopenharmony_ci} 52148c2ecf20Sopenharmony_ci 52158c2ecf20Sopenharmony_ci/* 52168c2ecf20Sopenharmony_ci * Shows the name of the currently-mapped snapshot (or 52178c2ecf20Sopenharmony_ci * RBD_SNAP_HEAD_NAME for the base image). 52188c2ecf20Sopenharmony_ci */ 52198c2ecf20Sopenharmony_cistatic ssize_t rbd_snap_show(struct device *dev, 52208c2ecf20Sopenharmony_ci struct device_attribute *attr, 52218c2ecf20Sopenharmony_ci char *buf) 52228c2ecf20Sopenharmony_ci{ 52238c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 52248c2ecf20Sopenharmony_ci 52258c2ecf20Sopenharmony_ci return sprintf(buf, "%s\n", rbd_dev->spec->snap_name); 52268c2ecf20Sopenharmony_ci} 52278c2ecf20Sopenharmony_ci 52288c2ecf20Sopenharmony_cistatic ssize_t rbd_snap_id_show(struct device *dev, 52298c2ecf20Sopenharmony_ci struct device_attribute *attr, char *buf) 52308c2ecf20Sopenharmony_ci{ 52318c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 52328c2ecf20Sopenharmony_ci 52338c2ecf20Sopenharmony_ci return sprintf(buf, "%llu\n", rbd_dev->spec->snap_id); 52348c2ecf20Sopenharmony_ci} 52358c2ecf20Sopenharmony_ci 52368c2ecf20Sopenharmony_ci/* 52378c2ecf20Sopenharmony_ci * For a v2 image, shows the chain of parent images, separated by empty 52388c2ecf20Sopenharmony_ci * lines. For v1 images or if there is no parent, shows "(no parent 52398c2ecf20Sopenharmony_ci * image)". 52408c2ecf20Sopenharmony_ci */ 52418c2ecf20Sopenharmony_cistatic ssize_t rbd_parent_show(struct device *dev, 52428c2ecf20Sopenharmony_ci struct device_attribute *attr, 52438c2ecf20Sopenharmony_ci char *buf) 52448c2ecf20Sopenharmony_ci{ 52458c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 52468c2ecf20Sopenharmony_ci ssize_t count = 0; 52478c2ecf20Sopenharmony_ci 52488c2ecf20Sopenharmony_ci if (!rbd_dev->parent) 52498c2ecf20Sopenharmony_ci return sprintf(buf, "(no parent image)\n"); 52508c2ecf20Sopenharmony_ci 52518c2ecf20Sopenharmony_ci for ( ; rbd_dev->parent; rbd_dev = rbd_dev->parent) { 52528c2ecf20Sopenharmony_ci struct rbd_spec *spec = rbd_dev->parent_spec; 52538c2ecf20Sopenharmony_ci 52548c2ecf20Sopenharmony_ci count += sprintf(&buf[count], "%s" 52558c2ecf20Sopenharmony_ci "pool_id %llu\npool_name %s\n" 52568c2ecf20Sopenharmony_ci "pool_ns %s\n" 52578c2ecf20Sopenharmony_ci "image_id %s\nimage_name %s\n" 52588c2ecf20Sopenharmony_ci "snap_id %llu\nsnap_name %s\n" 52598c2ecf20Sopenharmony_ci "overlap %llu\n", 52608c2ecf20Sopenharmony_ci !count ? "" : "\n", /* first? */ 52618c2ecf20Sopenharmony_ci spec->pool_id, spec->pool_name, 52628c2ecf20Sopenharmony_ci spec->pool_ns ?: "", 52638c2ecf20Sopenharmony_ci spec->image_id, spec->image_name ?: "(unknown)", 52648c2ecf20Sopenharmony_ci spec->snap_id, spec->snap_name, 52658c2ecf20Sopenharmony_ci rbd_dev->parent_overlap); 52668c2ecf20Sopenharmony_ci } 52678c2ecf20Sopenharmony_ci 52688c2ecf20Sopenharmony_ci return count; 52698c2ecf20Sopenharmony_ci} 52708c2ecf20Sopenharmony_ci 52718c2ecf20Sopenharmony_cistatic ssize_t rbd_image_refresh(struct device *dev, 52728c2ecf20Sopenharmony_ci struct device_attribute *attr, 52738c2ecf20Sopenharmony_ci const char *buf, 52748c2ecf20Sopenharmony_ci size_t size) 52758c2ecf20Sopenharmony_ci{ 52768c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 52778c2ecf20Sopenharmony_ci int ret; 52788c2ecf20Sopenharmony_ci 52798c2ecf20Sopenharmony_ci if (!capable(CAP_SYS_ADMIN)) 52808c2ecf20Sopenharmony_ci return -EPERM; 52818c2ecf20Sopenharmony_ci 52828c2ecf20Sopenharmony_ci ret = rbd_dev_refresh(rbd_dev); 52838c2ecf20Sopenharmony_ci if (ret) 52848c2ecf20Sopenharmony_ci return ret; 52858c2ecf20Sopenharmony_ci 52868c2ecf20Sopenharmony_ci return size; 52878c2ecf20Sopenharmony_ci} 52888c2ecf20Sopenharmony_ci 52898c2ecf20Sopenharmony_cistatic DEVICE_ATTR(size, 0444, rbd_size_show, NULL); 52908c2ecf20Sopenharmony_cistatic DEVICE_ATTR(features, 0444, rbd_features_show, NULL); 52918c2ecf20Sopenharmony_cistatic DEVICE_ATTR(major, 0444, rbd_major_show, NULL); 52928c2ecf20Sopenharmony_cistatic DEVICE_ATTR(minor, 0444, rbd_minor_show, NULL); 52938c2ecf20Sopenharmony_cistatic DEVICE_ATTR(client_addr, 0444, rbd_client_addr_show, NULL); 52948c2ecf20Sopenharmony_cistatic DEVICE_ATTR(client_id, 0444, rbd_client_id_show, NULL); 52958c2ecf20Sopenharmony_cistatic DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL); 52968c2ecf20Sopenharmony_cistatic DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL); 52978c2ecf20Sopenharmony_cistatic DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL); 52988c2ecf20Sopenharmony_cistatic DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL); 52998c2ecf20Sopenharmony_cistatic DEVICE_ATTR(pool_ns, 0444, rbd_pool_ns_show, NULL); 53008c2ecf20Sopenharmony_cistatic DEVICE_ATTR(name, 0444, rbd_name_show, NULL); 53018c2ecf20Sopenharmony_cistatic DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL); 53028c2ecf20Sopenharmony_cistatic DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh); 53038c2ecf20Sopenharmony_cistatic DEVICE_ATTR(current_snap, 0444, rbd_snap_show, NULL); 53048c2ecf20Sopenharmony_cistatic DEVICE_ATTR(snap_id, 0444, rbd_snap_id_show, NULL); 53058c2ecf20Sopenharmony_cistatic DEVICE_ATTR(parent, 0444, rbd_parent_show, NULL); 53068c2ecf20Sopenharmony_ci 53078c2ecf20Sopenharmony_cistatic struct attribute *rbd_attrs[] = { 53088c2ecf20Sopenharmony_ci &dev_attr_size.attr, 53098c2ecf20Sopenharmony_ci &dev_attr_features.attr, 53108c2ecf20Sopenharmony_ci &dev_attr_major.attr, 53118c2ecf20Sopenharmony_ci &dev_attr_minor.attr, 53128c2ecf20Sopenharmony_ci &dev_attr_client_addr.attr, 53138c2ecf20Sopenharmony_ci &dev_attr_client_id.attr, 53148c2ecf20Sopenharmony_ci &dev_attr_cluster_fsid.attr, 53158c2ecf20Sopenharmony_ci &dev_attr_config_info.attr, 53168c2ecf20Sopenharmony_ci &dev_attr_pool.attr, 53178c2ecf20Sopenharmony_ci &dev_attr_pool_id.attr, 53188c2ecf20Sopenharmony_ci &dev_attr_pool_ns.attr, 53198c2ecf20Sopenharmony_ci &dev_attr_name.attr, 53208c2ecf20Sopenharmony_ci &dev_attr_image_id.attr, 53218c2ecf20Sopenharmony_ci &dev_attr_current_snap.attr, 53228c2ecf20Sopenharmony_ci &dev_attr_snap_id.attr, 53238c2ecf20Sopenharmony_ci &dev_attr_parent.attr, 53248c2ecf20Sopenharmony_ci &dev_attr_refresh.attr, 53258c2ecf20Sopenharmony_ci NULL 53268c2ecf20Sopenharmony_ci}; 53278c2ecf20Sopenharmony_ci 53288c2ecf20Sopenharmony_cistatic struct attribute_group rbd_attr_group = { 53298c2ecf20Sopenharmony_ci .attrs = rbd_attrs, 53308c2ecf20Sopenharmony_ci}; 53318c2ecf20Sopenharmony_ci 53328c2ecf20Sopenharmony_cistatic const struct attribute_group *rbd_attr_groups[] = { 53338c2ecf20Sopenharmony_ci &rbd_attr_group, 53348c2ecf20Sopenharmony_ci NULL 53358c2ecf20Sopenharmony_ci}; 53368c2ecf20Sopenharmony_ci 53378c2ecf20Sopenharmony_cistatic void rbd_dev_release(struct device *dev); 53388c2ecf20Sopenharmony_ci 53398c2ecf20Sopenharmony_cistatic const struct device_type rbd_device_type = { 53408c2ecf20Sopenharmony_ci .name = "rbd", 53418c2ecf20Sopenharmony_ci .groups = rbd_attr_groups, 53428c2ecf20Sopenharmony_ci .release = rbd_dev_release, 53438c2ecf20Sopenharmony_ci}; 53448c2ecf20Sopenharmony_ci 53458c2ecf20Sopenharmony_cistatic struct rbd_spec *rbd_spec_get(struct rbd_spec *spec) 53468c2ecf20Sopenharmony_ci{ 53478c2ecf20Sopenharmony_ci kref_get(&spec->kref); 53488c2ecf20Sopenharmony_ci 53498c2ecf20Sopenharmony_ci return spec; 53508c2ecf20Sopenharmony_ci} 53518c2ecf20Sopenharmony_ci 53528c2ecf20Sopenharmony_cistatic void rbd_spec_free(struct kref *kref); 53538c2ecf20Sopenharmony_cistatic void rbd_spec_put(struct rbd_spec *spec) 53548c2ecf20Sopenharmony_ci{ 53558c2ecf20Sopenharmony_ci if (spec) 53568c2ecf20Sopenharmony_ci kref_put(&spec->kref, rbd_spec_free); 53578c2ecf20Sopenharmony_ci} 53588c2ecf20Sopenharmony_ci 53598c2ecf20Sopenharmony_cistatic struct rbd_spec *rbd_spec_alloc(void) 53608c2ecf20Sopenharmony_ci{ 53618c2ecf20Sopenharmony_ci struct rbd_spec *spec; 53628c2ecf20Sopenharmony_ci 53638c2ecf20Sopenharmony_ci spec = kzalloc(sizeof (*spec), GFP_KERNEL); 53648c2ecf20Sopenharmony_ci if (!spec) 53658c2ecf20Sopenharmony_ci return NULL; 53668c2ecf20Sopenharmony_ci 53678c2ecf20Sopenharmony_ci spec->pool_id = CEPH_NOPOOL; 53688c2ecf20Sopenharmony_ci spec->snap_id = CEPH_NOSNAP; 53698c2ecf20Sopenharmony_ci kref_init(&spec->kref); 53708c2ecf20Sopenharmony_ci 53718c2ecf20Sopenharmony_ci return spec; 53728c2ecf20Sopenharmony_ci} 53738c2ecf20Sopenharmony_ci 53748c2ecf20Sopenharmony_cistatic void rbd_spec_free(struct kref *kref) 53758c2ecf20Sopenharmony_ci{ 53768c2ecf20Sopenharmony_ci struct rbd_spec *spec = container_of(kref, struct rbd_spec, kref); 53778c2ecf20Sopenharmony_ci 53788c2ecf20Sopenharmony_ci kfree(spec->pool_name); 53798c2ecf20Sopenharmony_ci kfree(spec->pool_ns); 53808c2ecf20Sopenharmony_ci kfree(spec->image_id); 53818c2ecf20Sopenharmony_ci kfree(spec->image_name); 53828c2ecf20Sopenharmony_ci kfree(spec->snap_name); 53838c2ecf20Sopenharmony_ci kfree(spec); 53848c2ecf20Sopenharmony_ci} 53858c2ecf20Sopenharmony_ci 53868c2ecf20Sopenharmony_cistatic void rbd_dev_free(struct rbd_device *rbd_dev) 53878c2ecf20Sopenharmony_ci{ 53888c2ecf20Sopenharmony_ci WARN_ON(rbd_dev->watch_state != RBD_WATCH_STATE_UNREGISTERED); 53898c2ecf20Sopenharmony_ci WARN_ON(rbd_dev->lock_state != RBD_LOCK_STATE_UNLOCKED); 53908c2ecf20Sopenharmony_ci 53918c2ecf20Sopenharmony_ci ceph_oid_destroy(&rbd_dev->header_oid); 53928c2ecf20Sopenharmony_ci ceph_oloc_destroy(&rbd_dev->header_oloc); 53938c2ecf20Sopenharmony_ci kfree(rbd_dev->config_info); 53948c2ecf20Sopenharmony_ci 53958c2ecf20Sopenharmony_ci rbd_put_client(rbd_dev->rbd_client); 53968c2ecf20Sopenharmony_ci rbd_spec_put(rbd_dev->spec); 53978c2ecf20Sopenharmony_ci kfree(rbd_dev->opts); 53988c2ecf20Sopenharmony_ci kfree(rbd_dev); 53998c2ecf20Sopenharmony_ci} 54008c2ecf20Sopenharmony_ci 54018c2ecf20Sopenharmony_cistatic void rbd_dev_release(struct device *dev) 54028c2ecf20Sopenharmony_ci{ 54038c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); 54048c2ecf20Sopenharmony_ci bool need_put = !!rbd_dev->opts; 54058c2ecf20Sopenharmony_ci 54068c2ecf20Sopenharmony_ci if (need_put) { 54078c2ecf20Sopenharmony_ci destroy_workqueue(rbd_dev->task_wq); 54088c2ecf20Sopenharmony_ci ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id); 54098c2ecf20Sopenharmony_ci } 54108c2ecf20Sopenharmony_ci 54118c2ecf20Sopenharmony_ci rbd_dev_free(rbd_dev); 54128c2ecf20Sopenharmony_ci 54138c2ecf20Sopenharmony_ci /* 54148c2ecf20Sopenharmony_ci * This is racy, but way better than putting module outside of 54158c2ecf20Sopenharmony_ci * the release callback. The race window is pretty small, so 54168c2ecf20Sopenharmony_ci * doing something similar to dm (dm-builtin.c) is overkill. 54178c2ecf20Sopenharmony_ci */ 54188c2ecf20Sopenharmony_ci if (need_put) 54198c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 54208c2ecf20Sopenharmony_ci} 54218c2ecf20Sopenharmony_ci 54228c2ecf20Sopenharmony_cistatic struct rbd_device *__rbd_dev_create(struct rbd_spec *spec) 54238c2ecf20Sopenharmony_ci{ 54248c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev; 54258c2ecf20Sopenharmony_ci 54268c2ecf20Sopenharmony_ci rbd_dev = kzalloc(sizeof(*rbd_dev), GFP_KERNEL); 54278c2ecf20Sopenharmony_ci if (!rbd_dev) 54288c2ecf20Sopenharmony_ci return NULL; 54298c2ecf20Sopenharmony_ci 54308c2ecf20Sopenharmony_ci spin_lock_init(&rbd_dev->lock); 54318c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rbd_dev->node); 54328c2ecf20Sopenharmony_ci init_rwsem(&rbd_dev->header_rwsem); 54338c2ecf20Sopenharmony_ci 54348c2ecf20Sopenharmony_ci rbd_dev->header.data_pool_id = CEPH_NOPOOL; 54358c2ecf20Sopenharmony_ci ceph_oid_init(&rbd_dev->header_oid); 54368c2ecf20Sopenharmony_ci rbd_dev->header_oloc.pool = spec->pool_id; 54378c2ecf20Sopenharmony_ci if (spec->pool_ns) { 54388c2ecf20Sopenharmony_ci WARN_ON(!*spec->pool_ns); 54398c2ecf20Sopenharmony_ci rbd_dev->header_oloc.pool_ns = 54408c2ecf20Sopenharmony_ci ceph_find_or_create_string(spec->pool_ns, 54418c2ecf20Sopenharmony_ci strlen(spec->pool_ns)); 54428c2ecf20Sopenharmony_ci } 54438c2ecf20Sopenharmony_ci 54448c2ecf20Sopenharmony_ci mutex_init(&rbd_dev->watch_mutex); 54458c2ecf20Sopenharmony_ci rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED; 54468c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&rbd_dev->watch_dwork, rbd_reregister_watch); 54478c2ecf20Sopenharmony_ci 54488c2ecf20Sopenharmony_ci init_rwsem(&rbd_dev->lock_rwsem); 54498c2ecf20Sopenharmony_ci rbd_dev->lock_state = RBD_LOCK_STATE_UNLOCKED; 54508c2ecf20Sopenharmony_ci INIT_WORK(&rbd_dev->acquired_lock_work, rbd_notify_acquired_lock); 54518c2ecf20Sopenharmony_ci INIT_WORK(&rbd_dev->released_lock_work, rbd_notify_released_lock); 54528c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&rbd_dev->lock_dwork, rbd_acquire_lock); 54538c2ecf20Sopenharmony_ci INIT_WORK(&rbd_dev->unlock_work, rbd_release_lock_work); 54548c2ecf20Sopenharmony_ci spin_lock_init(&rbd_dev->lock_lists_lock); 54558c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rbd_dev->acquiring_list); 54568c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rbd_dev->running_list); 54578c2ecf20Sopenharmony_ci init_completion(&rbd_dev->acquire_wait); 54588c2ecf20Sopenharmony_ci init_completion(&rbd_dev->releasing_wait); 54598c2ecf20Sopenharmony_ci 54608c2ecf20Sopenharmony_ci spin_lock_init(&rbd_dev->object_map_lock); 54618c2ecf20Sopenharmony_ci 54628c2ecf20Sopenharmony_ci rbd_dev->dev.bus = &rbd_bus_type; 54638c2ecf20Sopenharmony_ci rbd_dev->dev.type = &rbd_device_type; 54648c2ecf20Sopenharmony_ci rbd_dev->dev.parent = &rbd_root_dev; 54658c2ecf20Sopenharmony_ci device_initialize(&rbd_dev->dev); 54668c2ecf20Sopenharmony_ci 54678c2ecf20Sopenharmony_ci return rbd_dev; 54688c2ecf20Sopenharmony_ci} 54698c2ecf20Sopenharmony_ci 54708c2ecf20Sopenharmony_ci/* 54718c2ecf20Sopenharmony_ci * Create a mapping rbd_dev. 54728c2ecf20Sopenharmony_ci */ 54738c2ecf20Sopenharmony_cistatic struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, 54748c2ecf20Sopenharmony_ci struct rbd_spec *spec, 54758c2ecf20Sopenharmony_ci struct rbd_options *opts) 54768c2ecf20Sopenharmony_ci{ 54778c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev; 54788c2ecf20Sopenharmony_ci 54798c2ecf20Sopenharmony_ci rbd_dev = __rbd_dev_create(spec); 54808c2ecf20Sopenharmony_ci if (!rbd_dev) 54818c2ecf20Sopenharmony_ci return NULL; 54828c2ecf20Sopenharmony_ci 54838c2ecf20Sopenharmony_ci /* get an id and fill in device name */ 54848c2ecf20Sopenharmony_ci rbd_dev->dev_id = ida_simple_get(&rbd_dev_id_ida, 0, 54858c2ecf20Sopenharmony_ci minor_to_rbd_dev_id(1 << MINORBITS), 54868c2ecf20Sopenharmony_ci GFP_KERNEL); 54878c2ecf20Sopenharmony_ci if (rbd_dev->dev_id < 0) 54888c2ecf20Sopenharmony_ci goto fail_rbd_dev; 54898c2ecf20Sopenharmony_ci 54908c2ecf20Sopenharmony_ci sprintf(rbd_dev->name, RBD_DRV_NAME "%d", rbd_dev->dev_id); 54918c2ecf20Sopenharmony_ci rbd_dev->task_wq = alloc_ordered_workqueue("%s-tasks", WQ_MEM_RECLAIM, 54928c2ecf20Sopenharmony_ci rbd_dev->name); 54938c2ecf20Sopenharmony_ci if (!rbd_dev->task_wq) 54948c2ecf20Sopenharmony_ci goto fail_dev_id; 54958c2ecf20Sopenharmony_ci 54968c2ecf20Sopenharmony_ci /* we have a ref from do_rbd_add() */ 54978c2ecf20Sopenharmony_ci __module_get(THIS_MODULE); 54988c2ecf20Sopenharmony_ci 54998c2ecf20Sopenharmony_ci rbd_dev->rbd_client = rbdc; 55008c2ecf20Sopenharmony_ci rbd_dev->spec = spec; 55018c2ecf20Sopenharmony_ci rbd_dev->opts = opts; 55028c2ecf20Sopenharmony_ci 55038c2ecf20Sopenharmony_ci dout("%s rbd_dev %p dev_id %d\n", __func__, rbd_dev, rbd_dev->dev_id); 55048c2ecf20Sopenharmony_ci return rbd_dev; 55058c2ecf20Sopenharmony_ci 55068c2ecf20Sopenharmony_cifail_dev_id: 55078c2ecf20Sopenharmony_ci ida_simple_remove(&rbd_dev_id_ida, rbd_dev->dev_id); 55088c2ecf20Sopenharmony_cifail_rbd_dev: 55098c2ecf20Sopenharmony_ci rbd_dev_free(rbd_dev); 55108c2ecf20Sopenharmony_ci return NULL; 55118c2ecf20Sopenharmony_ci} 55128c2ecf20Sopenharmony_ci 55138c2ecf20Sopenharmony_cistatic void rbd_dev_destroy(struct rbd_device *rbd_dev) 55148c2ecf20Sopenharmony_ci{ 55158c2ecf20Sopenharmony_ci if (rbd_dev) 55168c2ecf20Sopenharmony_ci put_device(&rbd_dev->dev); 55178c2ecf20Sopenharmony_ci} 55188c2ecf20Sopenharmony_ci 55198c2ecf20Sopenharmony_ci/* 55208c2ecf20Sopenharmony_ci * Get the size and object order for an image snapshot, or if 55218c2ecf20Sopenharmony_ci * snap_id is CEPH_NOSNAP, gets this information for the base 55228c2ecf20Sopenharmony_ci * image. 55238c2ecf20Sopenharmony_ci */ 55248c2ecf20Sopenharmony_cistatic int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, 55258c2ecf20Sopenharmony_ci u8 *order, u64 *snap_size) 55268c2ecf20Sopenharmony_ci{ 55278c2ecf20Sopenharmony_ci __le64 snapid = cpu_to_le64(snap_id); 55288c2ecf20Sopenharmony_ci int ret; 55298c2ecf20Sopenharmony_ci struct { 55308c2ecf20Sopenharmony_ci u8 order; 55318c2ecf20Sopenharmony_ci __le64 size; 55328c2ecf20Sopenharmony_ci } __attribute__ ((packed)) size_buf = { 0 }; 55338c2ecf20Sopenharmony_ci 55348c2ecf20Sopenharmony_ci ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, 55358c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, "get_size", 55368c2ecf20Sopenharmony_ci &snapid, sizeof(snapid), 55378c2ecf20Sopenharmony_ci &size_buf, sizeof(size_buf)); 55388c2ecf20Sopenharmony_ci dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 55398c2ecf20Sopenharmony_ci if (ret < 0) 55408c2ecf20Sopenharmony_ci return ret; 55418c2ecf20Sopenharmony_ci if (ret < sizeof (size_buf)) 55428c2ecf20Sopenharmony_ci return -ERANGE; 55438c2ecf20Sopenharmony_ci 55448c2ecf20Sopenharmony_ci if (order) { 55458c2ecf20Sopenharmony_ci *order = size_buf.order; 55468c2ecf20Sopenharmony_ci dout(" order %u", (unsigned int)*order); 55478c2ecf20Sopenharmony_ci } 55488c2ecf20Sopenharmony_ci *snap_size = le64_to_cpu(size_buf.size); 55498c2ecf20Sopenharmony_ci 55508c2ecf20Sopenharmony_ci dout(" snap_id 0x%016llx snap_size = %llu\n", 55518c2ecf20Sopenharmony_ci (unsigned long long)snap_id, 55528c2ecf20Sopenharmony_ci (unsigned long long)*snap_size); 55538c2ecf20Sopenharmony_ci 55548c2ecf20Sopenharmony_ci return 0; 55558c2ecf20Sopenharmony_ci} 55568c2ecf20Sopenharmony_ci 55578c2ecf20Sopenharmony_cistatic int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev, 55588c2ecf20Sopenharmony_ci char **pobject_prefix) 55598c2ecf20Sopenharmony_ci{ 55608c2ecf20Sopenharmony_ci size_t size; 55618c2ecf20Sopenharmony_ci void *reply_buf; 55628c2ecf20Sopenharmony_ci char *object_prefix; 55638c2ecf20Sopenharmony_ci int ret; 55648c2ecf20Sopenharmony_ci void *p; 55658c2ecf20Sopenharmony_ci 55668c2ecf20Sopenharmony_ci /* Response will be an encoded string, which includes a length */ 55678c2ecf20Sopenharmony_ci size = sizeof(__le32) + RBD_OBJ_PREFIX_LEN_MAX; 55688c2ecf20Sopenharmony_ci reply_buf = kzalloc(size, GFP_KERNEL); 55698c2ecf20Sopenharmony_ci if (!reply_buf) 55708c2ecf20Sopenharmony_ci return -ENOMEM; 55718c2ecf20Sopenharmony_ci 55728c2ecf20Sopenharmony_ci ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, 55738c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, "get_object_prefix", 55748c2ecf20Sopenharmony_ci NULL, 0, reply_buf, size); 55758c2ecf20Sopenharmony_ci dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 55768c2ecf20Sopenharmony_ci if (ret < 0) 55778c2ecf20Sopenharmony_ci goto out; 55788c2ecf20Sopenharmony_ci 55798c2ecf20Sopenharmony_ci p = reply_buf; 55808c2ecf20Sopenharmony_ci object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL, 55818c2ecf20Sopenharmony_ci GFP_NOIO); 55828c2ecf20Sopenharmony_ci if (IS_ERR(object_prefix)) { 55838c2ecf20Sopenharmony_ci ret = PTR_ERR(object_prefix); 55848c2ecf20Sopenharmony_ci goto out; 55858c2ecf20Sopenharmony_ci } 55868c2ecf20Sopenharmony_ci ret = 0; 55878c2ecf20Sopenharmony_ci 55888c2ecf20Sopenharmony_ci *pobject_prefix = object_prefix; 55898c2ecf20Sopenharmony_ci dout(" object_prefix = %s\n", object_prefix); 55908c2ecf20Sopenharmony_ciout: 55918c2ecf20Sopenharmony_ci kfree(reply_buf); 55928c2ecf20Sopenharmony_ci 55938c2ecf20Sopenharmony_ci return ret; 55948c2ecf20Sopenharmony_ci} 55958c2ecf20Sopenharmony_ci 55968c2ecf20Sopenharmony_cistatic int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, 55978c2ecf20Sopenharmony_ci bool read_only, u64 *snap_features) 55988c2ecf20Sopenharmony_ci{ 55998c2ecf20Sopenharmony_ci struct { 56008c2ecf20Sopenharmony_ci __le64 snap_id; 56018c2ecf20Sopenharmony_ci u8 read_only; 56028c2ecf20Sopenharmony_ci } features_in; 56038c2ecf20Sopenharmony_ci struct { 56048c2ecf20Sopenharmony_ci __le64 features; 56058c2ecf20Sopenharmony_ci __le64 incompat; 56068c2ecf20Sopenharmony_ci } __attribute__ ((packed)) features_buf = { 0 }; 56078c2ecf20Sopenharmony_ci u64 unsup; 56088c2ecf20Sopenharmony_ci int ret; 56098c2ecf20Sopenharmony_ci 56108c2ecf20Sopenharmony_ci features_in.snap_id = cpu_to_le64(snap_id); 56118c2ecf20Sopenharmony_ci features_in.read_only = read_only; 56128c2ecf20Sopenharmony_ci 56138c2ecf20Sopenharmony_ci ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, 56148c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, "get_features", 56158c2ecf20Sopenharmony_ci &features_in, sizeof(features_in), 56168c2ecf20Sopenharmony_ci &features_buf, sizeof(features_buf)); 56178c2ecf20Sopenharmony_ci dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 56188c2ecf20Sopenharmony_ci if (ret < 0) 56198c2ecf20Sopenharmony_ci return ret; 56208c2ecf20Sopenharmony_ci if (ret < sizeof (features_buf)) 56218c2ecf20Sopenharmony_ci return -ERANGE; 56228c2ecf20Sopenharmony_ci 56238c2ecf20Sopenharmony_ci unsup = le64_to_cpu(features_buf.incompat) & ~RBD_FEATURES_SUPPORTED; 56248c2ecf20Sopenharmony_ci if (unsup) { 56258c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "image uses unsupported features: 0x%llx", 56268c2ecf20Sopenharmony_ci unsup); 56278c2ecf20Sopenharmony_ci return -ENXIO; 56288c2ecf20Sopenharmony_ci } 56298c2ecf20Sopenharmony_ci 56308c2ecf20Sopenharmony_ci *snap_features = le64_to_cpu(features_buf.features); 56318c2ecf20Sopenharmony_ci 56328c2ecf20Sopenharmony_ci dout(" snap_id 0x%016llx features = 0x%016llx incompat = 0x%016llx\n", 56338c2ecf20Sopenharmony_ci (unsigned long long)snap_id, 56348c2ecf20Sopenharmony_ci (unsigned long long)*snap_features, 56358c2ecf20Sopenharmony_ci (unsigned long long)le64_to_cpu(features_buf.incompat)); 56368c2ecf20Sopenharmony_ci 56378c2ecf20Sopenharmony_ci return 0; 56388c2ecf20Sopenharmony_ci} 56398c2ecf20Sopenharmony_ci 56408c2ecf20Sopenharmony_ci/* 56418c2ecf20Sopenharmony_ci * These are generic image flags, but since they are used only for 56428c2ecf20Sopenharmony_ci * object map, store them in rbd_dev->object_map_flags. 56438c2ecf20Sopenharmony_ci * 56448c2ecf20Sopenharmony_ci * For the same reason, this function is called only on object map 56458c2ecf20Sopenharmony_ci * (re)load and not on header refresh. 56468c2ecf20Sopenharmony_ci */ 56478c2ecf20Sopenharmony_cistatic int rbd_dev_v2_get_flags(struct rbd_device *rbd_dev) 56488c2ecf20Sopenharmony_ci{ 56498c2ecf20Sopenharmony_ci __le64 snapid = cpu_to_le64(rbd_dev->spec->snap_id); 56508c2ecf20Sopenharmony_ci __le64 flags; 56518c2ecf20Sopenharmony_ci int ret; 56528c2ecf20Sopenharmony_ci 56538c2ecf20Sopenharmony_ci ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, 56548c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, "get_flags", 56558c2ecf20Sopenharmony_ci &snapid, sizeof(snapid), 56568c2ecf20Sopenharmony_ci &flags, sizeof(flags)); 56578c2ecf20Sopenharmony_ci if (ret < 0) 56588c2ecf20Sopenharmony_ci return ret; 56598c2ecf20Sopenharmony_ci if (ret < sizeof(flags)) 56608c2ecf20Sopenharmony_ci return -EBADMSG; 56618c2ecf20Sopenharmony_ci 56628c2ecf20Sopenharmony_ci rbd_dev->object_map_flags = le64_to_cpu(flags); 56638c2ecf20Sopenharmony_ci return 0; 56648c2ecf20Sopenharmony_ci} 56658c2ecf20Sopenharmony_ci 56668c2ecf20Sopenharmony_cistruct parent_image_info { 56678c2ecf20Sopenharmony_ci u64 pool_id; 56688c2ecf20Sopenharmony_ci const char *pool_ns; 56698c2ecf20Sopenharmony_ci const char *image_id; 56708c2ecf20Sopenharmony_ci u64 snap_id; 56718c2ecf20Sopenharmony_ci 56728c2ecf20Sopenharmony_ci bool has_overlap; 56738c2ecf20Sopenharmony_ci u64 overlap; 56748c2ecf20Sopenharmony_ci}; 56758c2ecf20Sopenharmony_ci 56768c2ecf20Sopenharmony_cistatic void rbd_parent_info_cleanup(struct parent_image_info *pii) 56778c2ecf20Sopenharmony_ci{ 56788c2ecf20Sopenharmony_ci kfree(pii->pool_ns); 56798c2ecf20Sopenharmony_ci kfree(pii->image_id); 56808c2ecf20Sopenharmony_ci 56818c2ecf20Sopenharmony_ci memset(pii, 0, sizeof(*pii)); 56828c2ecf20Sopenharmony_ci} 56838c2ecf20Sopenharmony_ci 56848c2ecf20Sopenharmony_ci/* 56858c2ecf20Sopenharmony_ci * The caller is responsible for @pii. 56868c2ecf20Sopenharmony_ci */ 56878c2ecf20Sopenharmony_cistatic int decode_parent_image_spec(void **p, void *end, 56888c2ecf20Sopenharmony_ci struct parent_image_info *pii) 56898c2ecf20Sopenharmony_ci{ 56908c2ecf20Sopenharmony_ci u8 struct_v; 56918c2ecf20Sopenharmony_ci u32 struct_len; 56928c2ecf20Sopenharmony_ci int ret; 56938c2ecf20Sopenharmony_ci 56948c2ecf20Sopenharmony_ci ret = ceph_start_decoding(p, end, 1, "ParentImageSpec", 56958c2ecf20Sopenharmony_ci &struct_v, &struct_len); 56968c2ecf20Sopenharmony_ci if (ret) 56978c2ecf20Sopenharmony_ci return ret; 56988c2ecf20Sopenharmony_ci 56998c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, pii->pool_id, e_inval); 57008c2ecf20Sopenharmony_ci pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL); 57018c2ecf20Sopenharmony_ci if (IS_ERR(pii->pool_ns)) { 57028c2ecf20Sopenharmony_ci ret = PTR_ERR(pii->pool_ns); 57038c2ecf20Sopenharmony_ci pii->pool_ns = NULL; 57048c2ecf20Sopenharmony_ci return ret; 57058c2ecf20Sopenharmony_ci } 57068c2ecf20Sopenharmony_ci pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL); 57078c2ecf20Sopenharmony_ci if (IS_ERR(pii->image_id)) { 57088c2ecf20Sopenharmony_ci ret = PTR_ERR(pii->image_id); 57098c2ecf20Sopenharmony_ci pii->image_id = NULL; 57108c2ecf20Sopenharmony_ci return ret; 57118c2ecf20Sopenharmony_ci } 57128c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, pii->snap_id, e_inval); 57138c2ecf20Sopenharmony_ci return 0; 57148c2ecf20Sopenharmony_ci 57158c2ecf20Sopenharmony_cie_inval: 57168c2ecf20Sopenharmony_ci return -EINVAL; 57178c2ecf20Sopenharmony_ci} 57188c2ecf20Sopenharmony_ci 57198c2ecf20Sopenharmony_cistatic int __get_parent_info(struct rbd_device *rbd_dev, 57208c2ecf20Sopenharmony_ci struct page *req_page, 57218c2ecf20Sopenharmony_ci struct page *reply_page, 57228c2ecf20Sopenharmony_ci struct parent_image_info *pii) 57238c2ecf20Sopenharmony_ci{ 57248c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 57258c2ecf20Sopenharmony_ci size_t reply_len = PAGE_SIZE; 57268c2ecf20Sopenharmony_ci void *p, *end; 57278c2ecf20Sopenharmony_ci int ret; 57288c2ecf20Sopenharmony_ci 57298c2ecf20Sopenharmony_ci ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, 57308c2ecf20Sopenharmony_ci "rbd", "parent_get", CEPH_OSD_FLAG_READ, 57318c2ecf20Sopenharmony_ci req_page, sizeof(u64), &reply_page, &reply_len); 57328c2ecf20Sopenharmony_ci if (ret) 57338c2ecf20Sopenharmony_ci return ret == -EOPNOTSUPP ? 1 : ret; 57348c2ecf20Sopenharmony_ci 57358c2ecf20Sopenharmony_ci p = page_address(reply_page); 57368c2ecf20Sopenharmony_ci end = p + reply_len; 57378c2ecf20Sopenharmony_ci ret = decode_parent_image_spec(&p, end, pii); 57388c2ecf20Sopenharmony_ci if (ret) 57398c2ecf20Sopenharmony_ci return ret; 57408c2ecf20Sopenharmony_ci 57418c2ecf20Sopenharmony_ci ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, 57428c2ecf20Sopenharmony_ci "rbd", "parent_overlap_get", CEPH_OSD_FLAG_READ, 57438c2ecf20Sopenharmony_ci req_page, sizeof(u64), &reply_page, &reply_len); 57448c2ecf20Sopenharmony_ci if (ret) 57458c2ecf20Sopenharmony_ci return ret; 57468c2ecf20Sopenharmony_ci 57478c2ecf20Sopenharmony_ci p = page_address(reply_page); 57488c2ecf20Sopenharmony_ci end = p + reply_len; 57498c2ecf20Sopenharmony_ci ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval); 57508c2ecf20Sopenharmony_ci if (pii->has_overlap) 57518c2ecf20Sopenharmony_ci ceph_decode_64_safe(&p, end, pii->overlap, e_inval); 57528c2ecf20Sopenharmony_ci 57538c2ecf20Sopenharmony_ci dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", 57548c2ecf20Sopenharmony_ci __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, 57558c2ecf20Sopenharmony_ci pii->has_overlap, pii->overlap); 57568c2ecf20Sopenharmony_ci return 0; 57578c2ecf20Sopenharmony_ci 57588c2ecf20Sopenharmony_cie_inval: 57598c2ecf20Sopenharmony_ci return -EINVAL; 57608c2ecf20Sopenharmony_ci} 57618c2ecf20Sopenharmony_ci 57628c2ecf20Sopenharmony_ci/* 57638c2ecf20Sopenharmony_ci * The caller is responsible for @pii. 57648c2ecf20Sopenharmony_ci */ 57658c2ecf20Sopenharmony_cistatic int __get_parent_info_legacy(struct rbd_device *rbd_dev, 57668c2ecf20Sopenharmony_ci struct page *req_page, 57678c2ecf20Sopenharmony_ci struct page *reply_page, 57688c2ecf20Sopenharmony_ci struct parent_image_info *pii) 57698c2ecf20Sopenharmony_ci{ 57708c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 57718c2ecf20Sopenharmony_ci size_t reply_len = PAGE_SIZE; 57728c2ecf20Sopenharmony_ci void *p, *end; 57738c2ecf20Sopenharmony_ci int ret; 57748c2ecf20Sopenharmony_ci 57758c2ecf20Sopenharmony_ci ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, 57768c2ecf20Sopenharmony_ci "rbd", "get_parent", CEPH_OSD_FLAG_READ, 57778c2ecf20Sopenharmony_ci req_page, sizeof(u64), &reply_page, &reply_len); 57788c2ecf20Sopenharmony_ci if (ret) 57798c2ecf20Sopenharmony_ci return ret; 57808c2ecf20Sopenharmony_ci 57818c2ecf20Sopenharmony_ci p = page_address(reply_page); 57828c2ecf20Sopenharmony_ci end = p + reply_len; 57838c2ecf20Sopenharmony_ci ceph_decode_64_safe(&p, end, pii->pool_id, e_inval); 57848c2ecf20Sopenharmony_ci pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); 57858c2ecf20Sopenharmony_ci if (IS_ERR(pii->image_id)) { 57868c2ecf20Sopenharmony_ci ret = PTR_ERR(pii->image_id); 57878c2ecf20Sopenharmony_ci pii->image_id = NULL; 57888c2ecf20Sopenharmony_ci return ret; 57898c2ecf20Sopenharmony_ci } 57908c2ecf20Sopenharmony_ci ceph_decode_64_safe(&p, end, pii->snap_id, e_inval); 57918c2ecf20Sopenharmony_ci pii->has_overlap = true; 57928c2ecf20Sopenharmony_ci ceph_decode_64_safe(&p, end, pii->overlap, e_inval); 57938c2ecf20Sopenharmony_ci 57948c2ecf20Sopenharmony_ci dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n", 57958c2ecf20Sopenharmony_ci __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id, 57968c2ecf20Sopenharmony_ci pii->has_overlap, pii->overlap); 57978c2ecf20Sopenharmony_ci return 0; 57988c2ecf20Sopenharmony_ci 57998c2ecf20Sopenharmony_cie_inval: 58008c2ecf20Sopenharmony_ci return -EINVAL; 58018c2ecf20Sopenharmony_ci} 58028c2ecf20Sopenharmony_ci 58038c2ecf20Sopenharmony_cistatic int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev, 58048c2ecf20Sopenharmony_ci struct parent_image_info *pii) 58058c2ecf20Sopenharmony_ci{ 58068c2ecf20Sopenharmony_ci struct page *req_page, *reply_page; 58078c2ecf20Sopenharmony_ci void *p; 58088c2ecf20Sopenharmony_ci int ret; 58098c2ecf20Sopenharmony_ci 58108c2ecf20Sopenharmony_ci req_page = alloc_page(GFP_KERNEL); 58118c2ecf20Sopenharmony_ci if (!req_page) 58128c2ecf20Sopenharmony_ci return -ENOMEM; 58138c2ecf20Sopenharmony_ci 58148c2ecf20Sopenharmony_ci reply_page = alloc_page(GFP_KERNEL); 58158c2ecf20Sopenharmony_ci if (!reply_page) { 58168c2ecf20Sopenharmony_ci __free_page(req_page); 58178c2ecf20Sopenharmony_ci return -ENOMEM; 58188c2ecf20Sopenharmony_ci } 58198c2ecf20Sopenharmony_ci 58208c2ecf20Sopenharmony_ci p = page_address(req_page); 58218c2ecf20Sopenharmony_ci ceph_encode_64(&p, rbd_dev->spec->snap_id); 58228c2ecf20Sopenharmony_ci ret = __get_parent_info(rbd_dev, req_page, reply_page, pii); 58238c2ecf20Sopenharmony_ci if (ret > 0) 58248c2ecf20Sopenharmony_ci ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page, 58258c2ecf20Sopenharmony_ci pii); 58268c2ecf20Sopenharmony_ci 58278c2ecf20Sopenharmony_ci __free_page(req_page); 58288c2ecf20Sopenharmony_ci __free_page(reply_page); 58298c2ecf20Sopenharmony_ci return ret; 58308c2ecf20Sopenharmony_ci} 58318c2ecf20Sopenharmony_ci 58328c2ecf20Sopenharmony_cistatic int rbd_dev_setup_parent(struct rbd_device *rbd_dev) 58338c2ecf20Sopenharmony_ci{ 58348c2ecf20Sopenharmony_ci struct rbd_spec *parent_spec; 58358c2ecf20Sopenharmony_ci struct parent_image_info pii = { 0 }; 58368c2ecf20Sopenharmony_ci int ret; 58378c2ecf20Sopenharmony_ci 58388c2ecf20Sopenharmony_ci parent_spec = rbd_spec_alloc(); 58398c2ecf20Sopenharmony_ci if (!parent_spec) 58408c2ecf20Sopenharmony_ci return -ENOMEM; 58418c2ecf20Sopenharmony_ci 58428c2ecf20Sopenharmony_ci ret = rbd_dev_v2_parent_info(rbd_dev, &pii); 58438c2ecf20Sopenharmony_ci if (ret) 58448c2ecf20Sopenharmony_ci goto out_err; 58458c2ecf20Sopenharmony_ci 58468c2ecf20Sopenharmony_ci if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) 58478c2ecf20Sopenharmony_ci goto out; /* No parent? No problem. */ 58488c2ecf20Sopenharmony_ci 58498c2ecf20Sopenharmony_ci /* The ceph file layout needs to fit pool id in 32 bits */ 58508c2ecf20Sopenharmony_ci 58518c2ecf20Sopenharmony_ci ret = -EIO; 58528c2ecf20Sopenharmony_ci if (pii.pool_id > (u64)U32_MAX) { 58538c2ecf20Sopenharmony_ci rbd_warn(NULL, "parent pool id too large (%llu > %u)", 58548c2ecf20Sopenharmony_ci (unsigned long long)pii.pool_id, U32_MAX); 58558c2ecf20Sopenharmony_ci goto out_err; 58568c2ecf20Sopenharmony_ci } 58578c2ecf20Sopenharmony_ci 58588c2ecf20Sopenharmony_ci /* 58598c2ecf20Sopenharmony_ci * The parent won't change except when the clone is flattened, 58608c2ecf20Sopenharmony_ci * so we only need to record the parent image spec once. 58618c2ecf20Sopenharmony_ci */ 58628c2ecf20Sopenharmony_ci parent_spec->pool_id = pii.pool_id; 58638c2ecf20Sopenharmony_ci if (pii.pool_ns && *pii.pool_ns) { 58648c2ecf20Sopenharmony_ci parent_spec->pool_ns = pii.pool_ns; 58658c2ecf20Sopenharmony_ci pii.pool_ns = NULL; 58668c2ecf20Sopenharmony_ci } 58678c2ecf20Sopenharmony_ci parent_spec->image_id = pii.image_id; 58688c2ecf20Sopenharmony_ci pii.image_id = NULL; 58698c2ecf20Sopenharmony_ci parent_spec->snap_id = pii.snap_id; 58708c2ecf20Sopenharmony_ci 58718c2ecf20Sopenharmony_ci rbd_assert(!rbd_dev->parent_spec); 58728c2ecf20Sopenharmony_ci rbd_dev->parent_spec = parent_spec; 58738c2ecf20Sopenharmony_ci parent_spec = NULL; /* rbd_dev now owns this */ 58748c2ecf20Sopenharmony_ci 58758c2ecf20Sopenharmony_ci /* 58768c2ecf20Sopenharmony_ci * Record the parent overlap. If it's zero, issue a warning as 58778c2ecf20Sopenharmony_ci * we will proceed as if there is no parent. 58788c2ecf20Sopenharmony_ci */ 58798c2ecf20Sopenharmony_ci if (!pii.overlap) 58808c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "clone is standalone (overlap 0)"); 58818c2ecf20Sopenharmony_ci rbd_dev->parent_overlap = pii.overlap; 58828c2ecf20Sopenharmony_ci 58838c2ecf20Sopenharmony_ciout: 58848c2ecf20Sopenharmony_ci ret = 0; 58858c2ecf20Sopenharmony_ciout_err: 58868c2ecf20Sopenharmony_ci rbd_parent_info_cleanup(&pii); 58878c2ecf20Sopenharmony_ci rbd_spec_put(parent_spec); 58888c2ecf20Sopenharmony_ci return ret; 58898c2ecf20Sopenharmony_ci} 58908c2ecf20Sopenharmony_ci 58918c2ecf20Sopenharmony_cistatic int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev, 58928c2ecf20Sopenharmony_ci u64 *stripe_unit, u64 *stripe_count) 58938c2ecf20Sopenharmony_ci{ 58948c2ecf20Sopenharmony_ci struct { 58958c2ecf20Sopenharmony_ci __le64 stripe_unit; 58968c2ecf20Sopenharmony_ci __le64 stripe_count; 58978c2ecf20Sopenharmony_ci } __attribute__ ((packed)) striping_info_buf = { 0 }; 58988c2ecf20Sopenharmony_ci size_t size = sizeof (striping_info_buf); 58998c2ecf20Sopenharmony_ci int ret; 59008c2ecf20Sopenharmony_ci 59018c2ecf20Sopenharmony_ci ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, 59028c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, "get_stripe_unit_count", 59038c2ecf20Sopenharmony_ci NULL, 0, &striping_info_buf, size); 59048c2ecf20Sopenharmony_ci dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 59058c2ecf20Sopenharmony_ci if (ret < 0) 59068c2ecf20Sopenharmony_ci return ret; 59078c2ecf20Sopenharmony_ci if (ret < size) 59088c2ecf20Sopenharmony_ci return -ERANGE; 59098c2ecf20Sopenharmony_ci 59108c2ecf20Sopenharmony_ci *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit); 59118c2ecf20Sopenharmony_ci *stripe_count = le64_to_cpu(striping_info_buf.stripe_count); 59128c2ecf20Sopenharmony_ci dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit, 59138c2ecf20Sopenharmony_ci *stripe_count); 59148c2ecf20Sopenharmony_ci 59158c2ecf20Sopenharmony_ci return 0; 59168c2ecf20Sopenharmony_ci} 59178c2ecf20Sopenharmony_ci 59188c2ecf20Sopenharmony_cistatic int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id) 59198c2ecf20Sopenharmony_ci{ 59208c2ecf20Sopenharmony_ci __le64 data_pool_buf; 59218c2ecf20Sopenharmony_ci int ret; 59228c2ecf20Sopenharmony_ci 59238c2ecf20Sopenharmony_ci ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, 59248c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, "get_data_pool", 59258c2ecf20Sopenharmony_ci NULL, 0, &data_pool_buf, 59268c2ecf20Sopenharmony_ci sizeof(data_pool_buf)); 59278c2ecf20Sopenharmony_ci dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 59288c2ecf20Sopenharmony_ci if (ret < 0) 59298c2ecf20Sopenharmony_ci return ret; 59308c2ecf20Sopenharmony_ci if (ret < sizeof(data_pool_buf)) 59318c2ecf20Sopenharmony_ci return -EBADMSG; 59328c2ecf20Sopenharmony_ci 59338c2ecf20Sopenharmony_ci *data_pool_id = le64_to_cpu(data_pool_buf); 59348c2ecf20Sopenharmony_ci dout(" data_pool_id = %lld\n", *data_pool_id); 59358c2ecf20Sopenharmony_ci WARN_ON(*data_pool_id == CEPH_NOPOOL); 59368c2ecf20Sopenharmony_ci 59378c2ecf20Sopenharmony_ci return 0; 59388c2ecf20Sopenharmony_ci} 59398c2ecf20Sopenharmony_ci 59408c2ecf20Sopenharmony_cistatic char *rbd_dev_image_name(struct rbd_device *rbd_dev) 59418c2ecf20Sopenharmony_ci{ 59428c2ecf20Sopenharmony_ci CEPH_DEFINE_OID_ONSTACK(oid); 59438c2ecf20Sopenharmony_ci size_t image_id_size; 59448c2ecf20Sopenharmony_ci char *image_id; 59458c2ecf20Sopenharmony_ci void *p; 59468c2ecf20Sopenharmony_ci void *end; 59478c2ecf20Sopenharmony_ci size_t size; 59488c2ecf20Sopenharmony_ci void *reply_buf = NULL; 59498c2ecf20Sopenharmony_ci size_t len = 0; 59508c2ecf20Sopenharmony_ci char *image_name = NULL; 59518c2ecf20Sopenharmony_ci int ret; 59528c2ecf20Sopenharmony_ci 59538c2ecf20Sopenharmony_ci rbd_assert(!rbd_dev->spec->image_name); 59548c2ecf20Sopenharmony_ci 59558c2ecf20Sopenharmony_ci len = strlen(rbd_dev->spec->image_id); 59568c2ecf20Sopenharmony_ci image_id_size = sizeof (__le32) + len; 59578c2ecf20Sopenharmony_ci image_id = kmalloc(image_id_size, GFP_KERNEL); 59588c2ecf20Sopenharmony_ci if (!image_id) 59598c2ecf20Sopenharmony_ci return NULL; 59608c2ecf20Sopenharmony_ci 59618c2ecf20Sopenharmony_ci p = image_id; 59628c2ecf20Sopenharmony_ci end = image_id + image_id_size; 59638c2ecf20Sopenharmony_ci ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32)len); 59648c2ecf20Sopenharmony_ci 59658c2ecf20Sopenharmony_ci size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; 59668c2ecf20Sopenharmony_ci reply_buf = kmalloc(size, GFP_KERNEL); 59678c2ecf20Sopenharmony_ci if (!reply_buf) 59688c2ecf20Sopenharmony_ci goto out; 59698c2ecf20Sopenharmony_ci 59708c2ecf20Sopenharmony_ci ceph_oid_printf(&oid, "%s", RBD_DIRECTORY); 59718c2ecf20Sopenharmony_ci ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc, 59728c2ecf20Sopenharmony_ci "dir_get_name", image_id, image_id_size, 59738c2ecf20Sopenharmony_ci reply_buf, size); 59748c2ecf20Sopenharmony_ci if (ret < 0) 59758c2ecf20Sopenharmony_ci goto out; 59768c2ecf20Sopenharmony_ci p = reply_buf; 59778c2ecf20Sopenharmony_ci end = reply_buf + ret; 59788c2ecf20Sopenharmony_ci 59798c2ecf20Sopenharmony_ci image_name = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); 59808c2ecf20Sopenharmony_ci if (IS_ERR(image_name)) 59818c2ecf20Sopenharmony_ci image_name = NULL; 59828c2ecf20Sopenharmony_ci else 59838c2ecf20Sopenharmony_ci dout("%s: name is %s len is %zd\n", __func__, image_name, len); 59848c2ecf20Sopenharmony_ciout: 59858c2ecf20Sopenharmony_ci kfree(reply_buf); 59868c2ecf20Sopenharmony_ci kfree(image_id); 59878c2ecf20Sopenharmony_ci 59888c2ecf20Sopenharmony_ci return image_name; 59898c2ecf20Sopenharmony_ci} 59908c2ecf20Sopenharmony_ci 59918c2ecf20Sopenharmony_cistatic u64 rbd_v1_snap_id_by_name(struct rbd_device *rbd_dev, const char *name) 59928c2ecf20Sopenharmony_ci{ 59938c2ecf20Sopenharmony_ci struct ceph_snap_context *snapc = rbd_dev->header.snapc; 59948c2ecf20Sopenharmony_ci const char *snap_name; 59958c2ecf20Sopenharmony_ci u32 which = 0; 59968c2ecf20Sopenharmony_ci 59978c2ecf20Sopenharmony_ci /* Skip over names until we find the one we are looking for */ 59988c2ecf20Sopenharmony_ci 59998c2ecf20Sopenharmony_ci snap_name = rbd_dev->header.snap_names; 60008c2ecf20Sopenharmony_ci while (which < snapc->num_snaps) { 60018c2ecf20Sopenharmony_ci if (!strcmp(name, snap_name)) 60028c2ecf20Sopenharmony_ci return snapc->snaps[which]; 60038c2ecf20Sopenharmony_ci snap_name += strlen(snap_name) + 1; 60048c2ecf20Sopenharmony_ci which++; 60058c2ecf20Sopenharmony_ci } 60068c2ecf20Sopenharmony_ci return CEPH_NOSNAP; 60078c2ecf20Sopenharmony_ci} 60088c2ecf20Sopenharmony_ci 60098c2ecf20Sopenharmony_cistatic u64 rbd_v2_snap_id_by_name(struct rbd_device *rbd_dev, const char *name) 60108c2ecf20Sopenharmony_ci{ 60118c2ecf20Sopenharmony_ci struct ceph_snap_context *snapc = rbd_dev->header.snapc; 60128c2ecf20Sopenharmony_ci u32 which; 60138c2ecf20Sopenharmony_ci bool found = false; 60148c2ecf20Sopenharmony_ci u64 snap_id; 60158c2ecf20Sopenharmony_ci 60168c2ecf20Sopenharmony_ci for (which = 0; !found && which < snapc->num_snaps; which++) { 60178c2ecf20Sopenharmony_ci const char *snap_name; 60188c2ecf20Sopenharmony_ci 60198c2ecf20Sopenharmony_ci snap_id = snapc->snaps[which]; 60208c2ecf20Sopenharmony_ci snap_name = rbd_dev_v2_snap_name(rbd_dev, snap_id); 60218c2ecf20Sopenharmony_ci if (IS_ERR(snap_name)) { 60228c2ecf20Sopenharmony_ci /* ignore no-longer existing snapshots */ 60238c2ecf20Sopenharmony_ci if (PTR_ERR(snap_name) == -ENOENT) 60248c2ecf20Sopenharmony_ci continue; 60258c2ecf20Sopenharmony_ci else 60268c2ecf20Sopenharmony_ci break; 60278c2ecf20Sopenharmony_ci } 60288c2ecf20Sopenharmony_ci found = !strcmp(name, snap_name); 60298c2ecf20Sopenharmony_ci kfree(snap_name); 60308c2ecf20Sopenharmony_ci } 60318c2ecf20Sopenharmony_ci return found ? snap_id : CEPH_NOSNAP; 60328c2ecf20Sopenharmony_ci} 60338c2ecf20Sopenharmony_ci 60348c2ecf20Sopenharmony_ci/* 60358c2ecf20Sopenharmony_ci * Assumes name is never RBD_SNAP_HEAD_NAME; returns CEPH_NOSNAP if 60368c2ecf20Sopenharmony_ci * no snapshot by that name is found, or if an error occurs. 60378c2ecf20Sopenharmony_ci */ 60388c2ecf20Sopenharmony_cistatic u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name) 60398c2ecf20Sopenharmony_ci{ 60408c2ecf20Sopenharmony_ci if (rbd_dev->image_format == 1) 60418c2ecf20Sopenharmony_ci return rbd_v1_snap_id_by_name(rbd_dev, name); 60428c2ecf20Sopenharmony_ci 60438c2ecf20Sopenharmony_ci return rbd_v2_snap_id_by_name(rbd_dev, name); 60448c2ecf20Sopenharmony_ci} 60458c2ecf20Sopenharmony_ci 60468c2ecf20Sopenharmony_ci/* 60478c2ecf20Sopenharmony_ci * An image being mapped will have everything but the snap id. 60488c2ecf20Sopenharmony_ci */ 60498c2ecf20Sopenharmony_cistatic int rbd_spec_fill_snap_id(struct rbd_device *rbd_dev) 60508c2ecf20Sopenharmony_ci{ 60518c2ecf20Sopenharmony_ci struct rbd_spec *spec = rbd_dev->spec; 60528c2ecf20Sopenharmony_ci 60538c2ecf20Sopenharmony_ci rbd_assert(spec->pool_id != CEPH_NOPOOL && spec->pool_name); 60548c2ecf20Sopenharmony_ci rbd_assert(spec->image_id && spec->image_name); 60558c2ecf20Sopenharmony_ci rbd_assert(spec->snap_name); 60568c2ecf20Sopenharmony_ci 60578c2ecf20Sopenharmony_ci if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) { 60588c2ecf20Sopenharmony_ci u64 snap_id; 60598c2ecf20Sopenharmony_ci 60608c2ecf20Sopenharmony_ci snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name); 60618c2ecf20Sopenharmony_ci if (snap_id == CEPH_NOSNAP) 60628c2ecf20Sopenharmony_ci return -ENOENT; 60638c2ecf20Sopenharmony_ci 60648c2ecf20Sopenharmony_ci spec->snap_id = snap_id; 60658c2ecf20Sopenharmony_ci } else { 60668c2ecf20Sopenharmony_ci spec->snap_id = CEPH_NOSNAP; 60678c2ecf20Sopenharmony_ci } 60688c2ecf20Sopenharmony_ci 60698c2ecf20Sopenharmony_ci return 0; 60708c2ecf20Sopenharmony_ci} 60718c2ecf20Sopenharmony_ci 60728c2ecf20Sopenharmony_ci/* 60738c2ecf20Sopenharmony_ci * A parent image will have all ids but none of the names. 60748c2ecf20Sopenharmony_ci * 60758c2ecf20Sopenharmony_ci * All names in an rbd spec are dynamically allocated. It's OK if we 60768c2ecf20Sopenharmony_ci * can't figure out the name for an image id. 60778c2ecf20Sopenharmony_ci */ 60788c2ecf20Sopenharmony_cistatic int rbd_spec_fill_names(struct rbd_device *rbd_dev) 60798c2ecf20Sopenharmony_ci{ 60808c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 60818c2ecf20Sopenharmony_ci struct rbd_spec *spec = rbd_dev->spec; 60828c2ecf20Sopenharmony_ci const char *pool_name; 60838c2ecf20Sopenharmony_ci const char *image_name; 60848c2ecf20Sopenharmony_ci const char *snap_name; 60858c2ecf20Sopenharmony_ci int ret; 60868c2ecf20Sopenharmony_ci 60878c2ecf20Sopenharmony_ci rbd_assert(spec->pool_id != CEPH_NOPOOL); 60888c2ecf20Sopenharmony_ci rbd_assert(spec->image_id); 60898c2ecf20Sopenharmony_ci rbd_assert(spec->snap_id != CEPH_NOSNAP); 60908c2ecf20Sopenharmony_ci 60918c2ecf20Sopenharmony_ci /* Get the pool name; we have to make our own copy of this */ 60928c2ecf20Sopenharmony_ci 60938c2ecf20Sopenharmony_ci pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, spec->pool_id); 60948c2ecf20Sopenharmony_ci if (!pool_name) { 60958c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "no pool with id %llu", spec->pool_id); 60968c2ecf20Sopenharmony_ci return -EIO; 60978c2ecf20Sopenharmony_ci } 60988c2ecf20Sopenharmony_ci pool_name = kstrdup(pool_name, GFP_KERNEL); 60998c2ecf20Sopenharmony_ci if (!pool_name) 61008c2ecf20Sopenharmony_ci return -ENOMEM; 61018c2ecf20Sopenharmony_ci 61028c2ecf20Sopenharmony_ci /* Fetch the image name; tolerate failure here */ 61038c2ecf20Sopenharmony_ci 61048c2ecf20Sopenharmony_ci image_name = rbd_dev_image_name(rbd_dev); 61058c2ecf20Sopenharmony_ci if (!image_name) 61068c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "unable to get image name"); 61078c2ecf20Sopenharmony_ci 61088c2ecf20Sopenharmony_ci /* Fetch the snapshot name */ 61098c2ecf20Sopenharmony_ci 61108c2ecf20Sopenharmony_ci snap_name = rbd_snap_name(rbd_dev, spec->snap_id); 61118c2ecf20Sopenharmony_ci if (IS_ERR(snap_name)) { 61128c2ecf20Sopenharmony_ci ret = PTR_ERR(snap_name); 61138c2ecf20Sopenharmony_ci goto out_err; 61148c2ecf20Sopenharmony_ci } 61158c2ecf20Sopenharmony_ci 61168c2ecf20Sopenharmony_ci spec->pool_name = pool_name; 61178c2ecf20Sopenharmony_ci spec->image_name = image_name; 61188c2ecf20Sopenharmony_ci spec->snap_name = snap_name; 61198c2ecf20Sopenharmony_ci 61208c2ecf20Sopenharmony_ci return 0; 61218c2ecf20Sopenharmony_ci 61228c2ecf20Sopenharmony_ciout_err: 61238c2ecf20Sopenharmony_ci kfree(image_name); 61248c2ecf20Sopenharmony_ci kfree(pool_name); 61258c2ecf20Sopenharmony_ci return ret; 61268c2ecf20Sopenharmony_ci} 61278c2ecf20Sopenharmony_ci 61288c2ecf20Sopenharmony_cistatic int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, 61298c2ecf20Sopenharmony_ci struct ceph_snap_context **psnapc) 61308c2ecf20Sopenharmony_ci{ 61318c2ecf20Sopenharmony_ci size_t size; 61328c2ecf20Sopenharmony_ci int ret; 61338c2ecf20Sopenharmony_ci void *reply_buf; 61348c2ecf20Sopenharmony_ci void *p; 61358c2ecf20Sopenharmony_ci void *end; 61368c2ecf20Sopenharmony_ci u64 seq; 61378c2ecf20Sopenharmony_ci u32 snap_count; 61388c2ecf20Sopenharmony_ci struct ceph_snap_context *snapc; 61398c2ecf20Sopenharmony_ci u32 i; 61408c2ecf20Sopenharmony_ci 61418c2ecf20Sopenharmony_ci /* 61428c2ecf20Sopenharmony_ci * We'll need room for the seq value (maximum snapshot id), 61438c2ecf20Sopenharmony_ci * snapshot count, and array of that many snapshot ids. 61448c2ecf20Sopenharmony_ci * For now we have a fixed upper limit on the number we're 61458c2ecf20Sopenharmony_ci * prepared to receive. 61468c2ecf20Sopenharmony_ci */ 61478c2ecf20Sopenharmony_ci size = sizeof (__le64) + sizeof (__le32) + 61488c2ecf20Sopenharmony_ci RBD_MAX_SNAP_COUNT * sizeof (__le64); 61498c2ecf20Sopenharmony_ci reply_buf = kzalloc(size, GFP_KERNEL); 61508c2ecf20Sopenharmony_ci if (!reply_buf) 61518c2ecf20Sopenharmony_ci return -ENOMEM; 61528c2ecf20Sopenharmony_ci 61538c2ecf20Sopenharmony_ci ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, 61548c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, "get_snapcontext", 61558c2ecf20Sopenharmony_ci NULL, 0, reply_buf, size); 61568c2ecf20Sopenharmony_ci dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 61578c2ecf20Sopenharmony_ci if (ret < 0) 61588c2ecf20Sopenharmony_ci goto out; 61598c2ecf20Sopenharmony_ci 61608c2ecf20Sopenharmony_ci p = reply_buf; 61618c2ecf20Sopenharmony_ci end = reply_buf + ret; 61628c2ecf20Sopenharmony_ci ret = -ERANGE; 61638c2ecf20Sopenharmony_ci ceph_decode_64_safe(&p, end, seq, out); 61648c2ecf20Sopenharmony_ci ceph_decode_32_safe(&p, end, snap_count, out); 61658c2ecf20Sopenharmony_ci 61668c2ecf20Sopenharmony_ci /* 61678c2ecf20Sopenharmony_ci * Make sure the reported number of snapshot ids wouldn't go 61688c2ecf20Sopenharmony_ci * beyond the end of our buffer. But before checking that, 61698c2ecf20Sopenharmony_ci * make sure the computed size of the snapshot context we 61708c2ecf20Sopenharmony_ci * allocate is representable in a size_t. 61718c2ecf20Sopenharmony_ci */ 61728c2ecf20Sopenharmony_ci if (snap_count > (SIZE_MAX - sizeof (struct ceph_snap_context)) 61738c2ecf20Sopenharmony_ci / sizeof (u64)) { 61748c2ecf20Sopenharmony_ci ret = -EINVAL; 61758c2ecf20Sopenharmony_ci goto out; 61768c2ecf20Sopenharmony_ci } 61778c2ecf20Sopenharmony_ci if (!ceph_has_room(&p, end, snap_count * sizeof (__le64))) 61788c2ecf20Sopenharmony_ci goto out; 61798c2ecf20Sopenharmony_ci ret = 0; 61808c2ecf20Sopenharmony_ci 61818c2ecf20Sopenharmony_ci snapc = ceph_create_snap_context(snap_count, GFP_KERNEL); 61828c2ecf20Sopenharmony_ci if (!snapc) { 61838c2ecf20Sopenharmony_ci ret = -ENOMEM; 61848c2ecf20Sopenharmony_ci goto out; 61858c2ecf20Sopenharmony_ci } 61868c2ecf20Sopenharmony_ci snapc->seq = seq; 61878c2ecf20Sopenharmony_ci for (i = 0; i < snap_count; i++) 61888c2ecf20Sopenharmony_ci snapc->snaps[i] = ceph_decode_64(&p); 61898c2ecf20Sopenharmony_ci 61908c2ecf20Sopenharmony_ci *psnapc = snapc; 61918c2ecf20Sopenharmony_ci dout(" snap context seq = %llu, snap_count = %u\n", 61928c2ecf20Sopenharmony_ci (unsigned long long)seq, (unsigned int)snap_count); 61938c2ecf20Sopenharmony_ciout: 61948c2ecf20Sopenharmony_ci kfree(reply_buf); 61958c2ecf20Sopenharmony_ci 61968c2ecf20Sopenharmony_ci return ret; 61978c2ecf20Sopenharmony_ci} 61988c2ecf20Sopenharmony_ci 61998c2ecf20Sopenharmony_cistatic const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, 62008c2ecf20Sopenharmony_ci u64 snap_id) 62018c2ecf20Sopenharmony_ci{ 62028c2ecf20Sopenharmony_ci size_t size; 62038c2ecf20Sopenharmony_ci void *reply_buf; 62048c2ecf20Sopenharmony_ci __le64 snapid; 62058c2ecf20Sopenharmony_ci int ret; 62068c2ecf20Sopenharmony_ci void *p; 62078c2ecf20Sopenharmony_ci void *end; 62088c2ecf20Sopenharmony_ci char *snap_name; 62098c2ecf20Sopenharmony_ci 62108c2ecf20Sopenharmony_ci size = sizeof (__le32) + RBD_MAX_SNAP_NAME_LEN; 62118c2ecf20Sopenharmony_ci reply_buf = kmalloc(size, GFP_KERNEL); 62128c2ecf20Sopenharmony_ci if (!reply_buf) 62138c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 62148c2ecf20Sopenharmony_ci 62158c2ecf20Sopenharmony_ci snapid = cpu_to_le64(snap_id); 62168c2ecf20Sopenharmony_ci ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, 62178c2ecf20Sopenharmony_ci &rbd_dev->header_oloc, "get_snapshot_name", 62188c2ecf20Sopenharmony_ci &snapid, sizeof(snapid), reply_buf, size); 62198c2ecf20Sopenharmony_ci dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 62208c2ecf20Sopenharmony_ci if (ret < 0) { 62218c2ecf20Sopenharmony_ci snap_name = ERR_PTR(ret); 62228c2ecf20Sopenharmony_ci goto out; 62238c2ecf20Sopenharmony_ci } 62248c2ecf20Sopenharmony_ci 62258c2ecf20Sopenharmony_ci p = reply_buf; 62268c2ecf20Sopenharmony_ci end = reply_buf + ret; 62278c2ecf20Sopenharmony_ci snap_name = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); 62288c2ecf20Sopenharmony_ci if (IS_ERR(snap_name)) 62298c2ecf20Sopenharmony_ci goto out; 62308c2ecf20Sopenharmony_ci 62318c2ecf20Sopenharmony_ci dout(" snap_id 0x%016llx snap_name = %s\n", 62328c2ecf20Sopenharmony_ci (unsigned long long)snap_id, snap_name); 62338c2ecf20Sopenharmony_ciout: 62348c2ecf20Sopenharmony_ci kfree(reply_buf); 62358c2ecf20Sopenharmony_ci 62368c2ecf20Sopenharmony_ci return snap_name; 62378c2ecf20Sopenharmony_ci} 62388c2ecf20Sopenharmony_ci 62398c2ecf20Sopenharmony_cistatic int rbd_dev_v2_header_info(struct rbd_device *rbd_dev, 62408c2ecf20Sopenharmony_ci struct rbd_image_header *header, 62418c2ecf20Sopenharmony_ci bool first_time) 62428c2ecf20Sopenharmony_ci{ 62438c2ecf20Sopenharmony_ci int ret; 62448c2ecf20Sopenharmony_ci 62458c2ecf20Sopenharmony_ci ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, 62468c2ecf20Sopenharmony_ci first_time ? &header->obj_order : NULL, 62478c2ecf20Sopenharmony_ci &header->image_size); 62488c2ecf20Sopenharmony_ci if (ret) 62498c2ecf20Sopenharmony_ci return ret; 62508c2ecf20Sopenharmony_ci 62518c2ecf20Sopenharmony_ci if (first_time) { 62528c2ecf20Sopenharmony_ci ret = rbd_dev_v2_header_onetime(rbd_dev, header); 62538c2ecf20Sopenharmony_ci if (ret) 62548c2ecf20Sopenharmony_ci return ret; 62558c2ecf20Sopenharmony_ci } 62568c2ecf20Sopenharmony_ci 62578c2ecf20Sopenharmony_ci ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc); 62588c2ecf20Sopenharmony_ci if (ret) 62598c2ecf20Sopenharmony_ci return ret; 62608c2ecf20Sopenharmony_ci 62618c2ecf20Sopenharmony_ci return 0; 62628c2ecf20Sopenharmony_ci} 62638c2ecf20Sopenharmony_ci 62648c2ecf20Sopenharmony_cistatic int rbd_dev_header_info(struct rbd_device *rbd_dev, 62658c2ecf20Sopenharmony_ci struct rbd_image_header *header, 62668c2ecf20Sopenharmony_ci bool first_time) 62678c2ecf20Sopenharmony_ci{ 62688c2ecf20Sopenharmony_ci rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); 62698c2ecf20Sopenharmony_ci rbd_assert(!header->object_prefix && !header->snapc); 62708c2ecf20Sopenharmony_ci 62718c2ecf20Sopenharmony_ci if (rbd_dev->image_format == 1) 62728c2ecf20Sopenharmony_ci return rbd_dev_v1_header_info(rbd_dev, header, first_time); 62738c2ecf20Sopenharmony_ci 62748c2ecf20Sopenharmony_ci return rbd_dev_v2_header_info(rbd_dev, header, first_time); 62758c2ecf20Sopenharmony_ci} 62768c2ecf20Sopenharmony_ci 62778c2ecf20Sopenharmony_ci/* 62788c2ecf20Sopenharmony_ci * Skips over white space at *buf, and updates *buf to point to the 62798c2ecf20Sopenharmony_ci * first found non-space character (if any). Returns the length of 62808c2ecf20Sopenharmony_ci * the token (string of non-white space characters) found. Note 62818c2ecf20Sopenharmony_ci * that *buf must be terminated with '\0'. 62828c2ecf20Sopenharmony_ci */ 62838c2ecf20Sopenharmony_cistatic inline size_t next_token(const char **buf) 62848c2ecf20Sopenharmony_ci{ 62858c2ecf20Sopenharmony_ci /* 62868c2ecf20Sopenharmony_ci * These are the characters that produce nonzero for 62878c2ecf20Sopenharmony_ci * isspace() in the "C" and "POSIX" locales. 62888c2ecf20Sopenharmony_ci */ 62898c2ecf20Sopenharmony_ci const char *spaces = " \f\n\r\t\v"; 62908c2ecf20Sopenharmony_ci 62918c2ecf20Sopenharmony_ci *buf += strspn(*buf, spaces); /* Find start of token */ 62928c2ecf20Sopenharmony_ci 62938c2ecf20Sopenharmony_ci return strcspn(*buf, spaces); /* Return token length */ 62948c2ecf20Sopenharmony_ci} 62958c2ecf20Sopenharmony_ci 62968c2ecf20Sopenharmony_ci/* 62978c2ecf20Sopenharmony_ci * Finds the next token in *buf, dynamically allocates a buffer big 62988c2ecf20Sopenharmony_ci * enough to hold a copy of it, and copies the token into the new 62998c2ecf20Sopenharmony_ci * buffer. The copy is guaranteed to be terminated with '\0'. Note 63008c2ecf20Sopenharmony_ci * that a duplicate buffer is created even for a zero-length token. 63018c2ecf20Sopenharmony_ci * 63028c2ecf20Sopenharmony_ci * Returns a pointer to the newly-allocated duplicate, or a null 63038c2ecf20Sopenharmony_ci * pointer if memory for the duplicate was not available. If 63048c2ecf20Sopenharmony_ci * the lenp argument is a non-null pointer, the length of the token 63058c2ecf20Sopenharmony_ci * (not including the '\0') is returned in *lenp. 63068c2ecf20Sopenharmony_ci * 63078c2ecf20Sopenharmony_ci * If successful, the *buf pointer will be updated to point beyond 63088c2ecf20Sopenharmony_ci * the end of the found token. 63098c2ecf20Sopenharmony_ci * 63108c2ecf20Sopenharmony_ci * Note: uses GFP_KERNEL for allocation. 63118c2ecf20Sopenharmony_ci */ 63128c2ecf20Sopenharmony_cistatic inline char *dup_token(const char **buf, size_t *lenp) 63138c2ecf20Sopenharmony_ci{ 63148c2ecf20Sopenharmony_ci char *dup; 63158c2ecf20Sopenharmony_ci size_t len; 63168c2ecf20Sopenharmony_ci 63178c2ecf20Sopenharmony_ci len = next_token(buf); 63188c2ecf20Sopenharmony_ci dup = kmemdup(*buf, len + 1, GFP_KERNEL); 63198c2ecf20Sopenharmony_ci if (!dup) 63208c2ecf20Sopenharmony_ci return NULL; 63218c2ecf20Sopenharmony_ci *(dup + len) = '\0'; 63228c2ecf20Sopenharmony_ci *buf += len; 63238c2ecf20Sopenharmony_ci 63248c2ecf20Sopenharmony_ci if (lenp) 63258c2ecf20Sopenharmony_ci *lenp = len; 63268c2ecf20Sopenharmony_ci 63278c2ecf20Sopenharmony_ci return dup; 63288c2ecf20Sopenharmony_ci} 63298c2ecf20Sopenharmony_ci 63308c2ecf20Sopenharmony_cistatic int rbd_parse_param(struct fs_parameter *param, 63318c2ecf20Sopenharmony_ci struct rbd_parse_opts_ctx *pctx) 63328c2ecf20Sopenharmony_ci{ 63338c2ecf20Sopenharmony_ci struct rbd_options *opt = pctx->opts; 63348c2ecf20Sopenharmony_ci struct fs_parse_result result; 63358c2ecf20Sopenharmony_ci struct p_log log = {.prefix = "rbd"}; 63368c2ecf20Sopenharmony_ci int token, ret; 63378c2ecf20Sopenharmony_ci 63388c2ecf20Sopenharmony_ci ret = ceph_parse_param(param, pctx->copts, NULL); 63398c2ecf20Sopenharmony_ci if (ret != -ENOPARAM) 63408c2ecf20Sopenharmony_ci return ret; 63418c2ecf20Sopenharmony_ci 63428c2ecf20Sopenharmony_ci token = __fs_parse(&log, rbd_parameters, param, &result); 63438c2ecf20Sopenharmony_ci dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); 63448c2ecf20Sopenharmony_ci if (token < 0) { 63458c2ecf20Sopenharmony_ci if (token == -ENOPARAM) 63468c2ecf20Sopenharmony_ci return inval_plog(&log, "Unknown parameter '%s'", 63478c2ecf20Sopenharmony_ci param->key); 63488c2ecf20Sopenharmony_ci return token; 63498c2ecf20Sopenharmony_ci } 63508c2ecf20Sopenharmony_ci 63518c2ecf20Sopenharmony_ci switch (token) { 63528c2ecf20Sopenharmony_ci case Opt_queue_depth: 63538c2ecf20Sopenharmony_ci if (result.uint_32 < 1) 63548c2ecf20Sopenharmony_ci goto out_of_range; 63558c2ecf20Sopenharmony_ci opt->queue_depth = result.uint_32; 63568c2ecf20Sopenharmony_ci break; 63578c2ecf20Sopenharmony_ci case Opt_alloc_size: 63588c2ecf20Sopenharmony_ci if (result.uint_32 < SECTOR_SIZE) 63598c2ecf20Sopenharmony_ci goto out_of_range; 63608c2ecf20Sopenharmony_ci if (!is_power_of_2(result.uint_32)) 63618c2ecf20Sopenharmony_ci return inval_plog(&log, "alloc_size must be a power of 2"); 63628c2ecf20Sopenharmony_ci opt->alloc_size = result.uint_32; 63638c2ecf20Sopenharmony_ci break; 63648c2ecf20Sopenharmony_ci case Opt_lock_timeout: 63658c2ecf20Sopenharmony_ci /* 0 is "wait forever" (i.e. infinite timeout) */ 63668c2ecf20Sopenharmony_ci if (result.uint_32 > INT_MAX / 1000) 63678c2ecf20Sopenharmony_ci goto out_of_range; 63688c2ecf20Sopenharmony_ci opt->lock_timeout = msecs_to_jiffies(result.uint_32 * 1000); 63698c2ecf20Sopenharmony_ci break; 63708c2ecf20Sopenharmony_ci case Opt_pool_ns: 63718c2ecf20Sopenharmony_ci kfree(pctx->spec->pool_ns); 63728c2ecf20Sopenharmony_ci pctx->spec->pool_ns = param->string; 63738c2ecf20Sopenharmony_ci param->string = NULL; 63748c2ecf20Sopenharmony_ci break; 63758c2ecf20Sopenharmony_ci case Opt_compression_hint: 63768c2ecf20Sopenharmony_ci switch (result.uint_32) { 63778c2ecf20Sopenharmony_ci case Opt_compression_hint_none: 63788c2ecf20Sopenharmony_ci opt->alloc_hint_flags &= 63798c2ecf20Sopenharmony_ci ~(CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE | 63808c2ecf20Sopenharmony_ci CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE); 63818c2ecf20Sopenharmony_ci break; 63828c2ecf20Sopenharmony_ci case Opt_compression_hint_compressible: 63838c2ecf20Sopenharmony_ci opt->alloc_hint_flags |= 63848c2ecf20Sopenharmony_ci CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE; 63858c2ecf20Sopenharmony_ci opt->alloc_hint_flags &= 63868c2ecf20Sopenharmony_ci ~CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE; 63878c2ecf20Sopenharmony_ci break; 63888c2ecf20Sopenharmony_ci case Opt_compression_hint_incompressible: 63898c2ecf20Sopenharmony_ci opt->alloc_hint_flags |= 63908c2ecf20Sopenharmony_ci CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE; 63918c2ecf20Sopenharmony_ci opt->alloc_hint_flags &= 63928c2ecf20Sopenharmony_ci ~CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE; 63938c2ecf20Sopenharmony_ci break; 63948c2ecf20Sopenharmony_ci default: 63958c2ecf20Sopenharmony_ci BUG(); 63968c2ecf20Sopenharmony_ci } 63978c2ecf20Sopenharmony_ci break; 63988c2ecf20Sopenharmony_ci case Opt_read_only: 63998c2ecf20Sopenharmony_ci opt->read_only = true; 64008c2ecf20Sopenharmony_ci break; 64018c2ecf20Sopenharmony_ci case Opt_read_write: 64028c2ecf20Sopenharmony_ci opt->read_only = false; 64038c2ecf20Sopenharmony_ci break; 64048c2ecf20Sopenharmony_ci case Opt_lock_on_read: 64058c2ecf20Sopenharmony_ci opt->lock_on_read = true; 64068c2ecf20Sopenharmony_ci break; 64078c2ecf20Sopenharmony_ci case Opt_exclusive: 64088c2ecf20Sopenharmony_ci opt->exclusive = true; 64098c2ecf20Sopenharmony_ci break; 64108c2ecf20Sopenharmony_ci case Opt_notrim: 64118c2ecf20Sopenharmony_ci opt->trim = false; 64128c2ecf20Sopenharmony_ci break; 64138c2ecf20Sopenharmony_ci default: 64148c2ecf20Sopenharmony_ci BUG(); 64158c2ecf20Sopenharmony_ci } 64168c2ecf20Sopenharmony_ci 64178c2ecf20Sopenharmony_ci return 0; 64188c2ecf20Sopenharmony_ci 64198c2ecf20Sopenharmony_ciout_of_range: 64208c2ecf20Sopenharmony_ci return inval_plog(&log, "%s out of range", param->key); 64218c2ecf20Sopenharmony_ci} 64228c2ecf20Sopenharmony_ci 64238c2ecf20Sopenharmony_ci/* 64248c2ecf20Sopenharmony_ci * This duplicates most of generic_parse_monolithic(), untying it from 64258c2ecf20Sopenharmony_ci * fs_context and skipping standard superblock and security options. 64268c2ecf20Sopenharmony_ci */ 64278c2ecf20Sopenharmony_cistatic int rbd_parse_options(char *options, struct rbd_parse_opts_ctx *pctx) 64288c2ecf20Sopenharmony_ci{ 64298c2ecf20Sopenharmony_ci char *key; 64308c2ecf20Sopenharmony_ci int ret = 0; 64318c2ecf20Sopenharmony_ci 64328c2ecf20Sopenharmony_ci dout("%s '%s'\n", __func__, options); 64338c2ecf20Sopenharmony_ci while ((key = strsep(&options, ",")) != NULL) { 64348c2ecf20Sopenharmony_ci if (*key) { 64358c2ecf20Sopenharmony_ci struct fs_parameter param = { 64368c2ecf20Sopenharmony_ci .key = key, 64378c2ecf20Sopenharmony_ci .type = fs_value_is_flag, 64388c2ecf20Sopenharmony_ci }; 64398c2ecf20Sopenharmony_ci char *value = strchr(key, '='); 64408c2ecf20Sopenharmony_ci size_t v_len = 0; 64418c2ecf20Sopenharmony_ci 64428c2ecf20Sopenharmony_ci if (value) { 64438c2ecf20Sopenharmony_ci if (value == key) 64448c2ecf20Sopenharmony_ci continue; 64458c2ecf20Sopenharmony_ci *value++ = 0; 64468c2ecf20Sopenharmony_ci v_len = strlen(value); 64478c2ecf20Sopenharmony_ci param.string = kmemdup_nul(value, v_len, 64488c2ecf20Sopenharmony_ci GFP_KERNEL); 64498c2ecf20Sopenharmony_ci if (!param.string) 64508c2ecf20Sopenharmony_ci return -ENOMEM; 64518c2ecf20Sopenharmony_ci param.type = fs_value_is_string; 64528c2ecf20Sopenharmony_ci } 64538c2ecf20Sopenharmony_ci param.size = v_len; 64548c2ecf20Sopenharmony_ci 64558c2ecf20Sopenharmony_ci ret = rbd_parse_param(¶m, pctx); 64568c2ecf20Sopenharmony_ci kfree(param.string); 64578c2ecf20Sopenharmony_ci if (ret) 64588c2ecf20Sopenharmony_ci break; 64598c2ecf20Sopenharmony_ci } 64608c2ecf20Sopenharmony_ci } 64618c2ecf20Sopenharmony_ci 64628c2ecf20Sopenharmony_ci return ret; 64638c2ecf20Sopenharmony_ci} 64648c2ecf20Sopenharmony_ci 64658c2ecf20Sopenharmony_ci/* 64668c2ecf20Sopenharmony_ci * Parse the options provided for an "rbd add" (i.e., rbd image 64678c2ecf20Sopenharmony_ci * mapping) request. These arrive via a write to /sys/bus/rbd/add, 64688c2ecf20Sopenharmony_ci * and the data written is passed here via a NUL-terminated buffer. 64698c2ecf20Sopenharmony_ci * Returns 0 if successful or an error code otherwise. 64708c2ecf20Sopenharmony_ci * 64718c2ecf20Sopenharmony_ci * The information extracted from these options is recorded in 64728c2ecf20Sopenharmony_ci * the other parameters which return dynamically-allocated 64738c2ecf20Sopenharmony_ci * structures: 64748c2ecf20Sopenharmony_ci * ceph_opts 64758c2ecf20Sopenharmony_ci * The address of a pointer that will refer to a ceph options 64768c2ecf20Sopenharmony_ci * structure. Caller must release the returned pointer using 64778c2ecf20Sopenharmony_ci * ceph_destroy_options() when it is no longer needed. 64788c2ecf20Sopenharmony_ci * rbd_opts 64798c2ecf20Sopenharmony_ci * Address of an rbd options pointer. Fully initialized by 64808c2ecf20Sopenharmony_ci * this function; caller must release with kfree(). 64818c2ecf20Sopenharmony_ci * spec 64828c2ecf20Sopenharmony_ci * Address of an rbd image specification pointer. Fully 64838c2ecf20Sopenharmony_ci * initialized by this function based on parsed options. 64848c2ecf20Sopenharmony_ci * Caller must release with rbd_spec_put(). 64858c2ecf20Sopenharmony_ci * 64868c2ecf20Sopenharmony_ci * The options passed take this form: 64878c2ecf20Sopenharmony_ci * <mon_addrs> <options> <pool_name> <image_name> [<snap_id>] 64888c2ecf20Sopenharmony_ci * where: 64898c2ecf20Sopenharmony_ci * <mon_addrs> 64908c2ecf20Sopenharmony_ci * A comma-separated list of one or more monitor addresses. 64918c2ecf20Sopenharmony_ci * A monitor address is an ip address, optionally followed 64928c2ecf20Sopenharmony_ci * by a port number (separated by a colon). 64938c2ecf20Sopenharmony_ci * I.e.: ip1[:port1][,ip2[:port2]...] 64948c2ecf20Sopenharmony_ci * <options> 64958c2ecf20Sopenharmony_ci * A comma-separated list of ceph and/or rbd options. 64968c2ecf20Sopenharmony_ci * <pool_name> 64978c2ecf20Sopenharmony_ci * The name of the rados pool containing the rbd image. 64988c2ecf20Sopenharmony_ci * <image_name> 64998c2ecf20Sopenharmony_ci * The name of the image in that pool to map. 65008c2ecf20Sopenharmony_ci * <snap_id> 65018c2ecf20Sopenharmony_ci * An optional snapshot id. If provided, the mapping will 65028c2ecf20Sopenharmony_ci * present data from the image at the time that snapshot was 65038c2ecf20Sopenharmony_ci * created. The image head is used if no snapshot id is 65048c2ecf20Sopenharmony_ci * provided. Snapshot mappings are always read-only. 65058c2ecf20Sopenharmony_ci */ 65068c2ecf20Sopenharmony_cistatic int rbd_add_parse_args(const char *buf, 65078c2ecf20Sopenharmony_ci struct ceph_options **ceph_opts, 65088c2ecf20Sopenharmony_ci struct rbd_options **opts, 65098c2ecf20Sopenharmony_ci struct rbd_spec **rbd_spec) 65108c2ecf20Sopenharmony_ci{ 65118c2ecf20Sopenharmony_ci size_t len; 65128c2ecf20Sopenharmony_ci char *options; 65138c2ecf20Sopenharmony_ci const char *mon_addrs; 65148c2ecf20Sopenharmony_ci char *snap_name; 65158c2ecf20Sopenharmony_ci size_t mon_addrs_size; 65168c2ecf20Sopenharmony_ci struct rbd_parse_opts_ctx pctx = { 0 }; 65178c2ecf20Sopenharmony_ci int ret; 65188c2ecf20Sopenharmony_ci 65198c2ecf20Sopenharmony_ci /* The first four tokens are required */ 65208c2ecf20Sopenharmony_ci 65218c2ecf20Sopenharmony_ci len = next_token(&buf); 65228c2ecf20Sopenharmony_ci if (!len) { 65238c2ecf20Sopenharmony_ci rbd_warn(NULL, "no monitor address(es) provided"); 65248c2ecf20Sopenharmony_ci return -EINVAL; 65258c2ecf20Sopenharmony_ci } 65268c2ecf20Sopenharmony_ci mon_addrs = buf; 65278c2ecf20Sopenharmony_ci mon_addrs_size = len; 65288c2ecf20Sopenharmony_ci buf += len; 65298c2ecf20Sopenharmony_ci 65308c2ecf20Sopenharmony_ci ret = -EINVAL; 65318c2ecf20Sopenharmony_ci options = dup_token(&buf, NULL); 65328c2ecf20Sopenharmony_ci if (!options) 65338c2ecf20Sopenharmony_ci return -ENOMEM; 65348c2ecf20Sopenharmony_ci if (!*options) { 65358c2ecf20Sopenharmony_ci rbd_warn(NULL, "no options provided"); 65368c2ecf20Sopenharmony_ci goto out_err; 65378c2ecf20Sopenharmony_ci } 65388c2ecf20Sopenharmony_ci 65398c2ecf20Sopenharmony_ci pctx.spec = rbd_spec_alloc(); 65408c2ecf20Sopenharmony_ci if (!pctx.spec) 65418c2ecf20Sopenharmony_ci goto out_mem; 65428c2ecf20Sopenharmony_ci 65438c2ecf20Sopenharmony_ci pctx.spec->pool_name = dup_token(&buf, NULL); 65448c2ecf20Sopenharmony_ci if (!pctx.spec->pool_name) 65458c2ecf20Sopenharmony_ci goto out_mem; 65468c2ecf20Sopenharmony_ci if (!*pctx.spec->pool_name) { 65478c2ecf20Sopenharmony_ci rbd_warn(NULL, "no pool name provided"); 65488c2ecf20Sopenharmony_ci goto out_err; 65498c2ecf20Sopenharmony_ci } 65508c2ecf20Sopenharmony_ci 65518c2ecf20Sopenharmony_ci pctx.spec->image_name = dup_token(&buf, NULL); 65528c2ecf20Sopenharmony_ci if (!pctx.spec->image_name) 65538c2ecf20Sopenharmony_ci goto out_mem; 65548c2ecf20Sopenharmony_ci if (!*pctx.spec->image_name) { 65558c2ecf20Sopenharmony_ci rbd_warn(NULL, "no image name provided"); 65568c2ecf20Sopenharmony_ci goto out_err; 65578c2ecf20Sopenharmony_ci } 65588c2ecf20Sopenharmony_ci 65598c2ecf20Sopenharmony_ci /* 65608c2ecf20Sopenharmony_ci * Snapshot name is optional; default is to use "-" 65618c2ecf20Sopenharmony_ci * (indicating the head/no snapshot). 65628c2ecf20Sopenharmony_ci */ 65638c2ecf20Sopenharmony_ci len = next_token(&buf); 65648c2ecf20Sopenharmony_ci if (!len) { 65658c2ecf20Sopenharmony_ci buf = RBD_SNAP_HEAD_NAME; /* No snapshot supplied */ 65668c2ecf20Sopenharmony_ci len = sizeof (RBD_SNAP_HEAD_NAME) - 1; 65678c2ecf20Sopenharmony_ci } else if (len > RBD_MAX_SNAP_NAME_LEN) { 65688c2ecf20Sopenharmony_ci ret = -ENAMETOOLONG; 65698c2ecf20Sopenharmony_ci goto out_err; 65708c2ecf20Sopenharmony_ci } 65718c2ecf20Sopenharmony_ci snap_name = kmemdup(buf, len + 1, GFP_KERNEL); 65728c2ecf20Sopenharmony_ci if (!snap_name) 65738c2ecf20Sopenharmony_ci goto out_mem; 65748c2ecf20Sopenharmony_ci *(snap_name + len) = '\0'; 65758c2ecf20Sopenharmony_ci pctx.spec->snap_name = snap_name; 65768c2ecf20Sopenharmony_ci 65778c2ecf20Sopenharmony_ci pctx.copts = ceph_alloc_options(); 65788c2ecf20Sopenharmony_ci if (!pctx.copts) 65798c2ecf20Sopenharmony_ci goto out_mem; 65808c2ecf20Sopenharmony_ci 65818c2ecf20Sopenharmony_ci /* Initialize all rbd options to the defaults */ 65828c2ecf20Sopenharmony_ci 65838c2ecf20Sopenharmony_ci pctx.opts = kzalloc(sizeof(*pctx.opts), GFP_KERNEL); 65848c2ecf20Sopenharmony_ci if (!pctx.opts) 65858c2ecf20Sopenharmony_ci goto out_mem; 65868c2ecf20Sopenharmony_ci 65878c2ecf20Sopenharmony_ci pctx.opts->read_only = RBD_READ_ONLY_DEFAULT; 65888c2ecf20Sopenharmony_ci pctx.opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT; 65898c2ecf20Sopenharmony_ci pctx.opts->alloc_size = RBD_ALLOC_SIZE_DEFAULT; 65908c2ecf20Sopenharmony_ci pctx.opts->lock_timeout = RBD_LOCK_TIMEOUT_DEFAULT; 65918c2ecf20Sopenharmony_ci pctx.opts->lock_on_read = RBD_LOCK_ON_READ_DEFAULT; 65928c2ecf20Sopenharmony_ci pctx.opts->exclusive = RBD_EXCLUSIVE_DEFAULT; 65938c2ecf20Sopenharmony_ci pctx.opts->trim = RBD_TRIM_DEFAULT; 65948c2ecf20Sopenharmony_ci 65958c2ecf20Sopenharmony_ci ret = ceph_parse_mon_ips(mon_addrs, mon_addrs_size, pctx.copts, NULL); 65968c2ecf20Sopenharmony_ci if (ret) 65978c2ecf20Sopenharmony_ci goto out_err; 65988c2ecf20Sopenharmony_ci 65998c2ecf20Sopenharmony_ci ret = rbd_parse_options(options, &pctx); 66008c2ecf20Sopenharmony_ci if (ret) 66018c2ecf20Sopenharmony_ci goto out_err; 66028c2ecf20Sopenharmony_ci 66038c2ecf20Sopenharmony_ci *ceph_opts = pctx.copts; 66048c2ecf20Sopenharmony_ci *opts = pctx.opts; 66058c2ecf20Sopenharmony_ci *rbd_spec = pctx.spec; 66068c2ecf20Sopenharmony_ci kfree(options); 66078c2ecf20Sopenharmony_ci return 0; 66088c2ecf20Sopenharmony_ci 66098c2ecf20Sopenharmony_ciout_mem: 66108c2ecf20Sopenharmony_ci ret = -ENOMEM; 66118c2ecf20Sopenharmony_ciout_err: 66128c2ecf20Sopenharmony_ci kfree(pctx.opts); 66138c2ecf20Sopenharmony_ci ceph_destroy_options(pctx.copts); 66148c2ecf20Sopenharmony_ci rbd_spec_put(pctx.spec); 66158c2ecf20Sopenharmony_ci kfree(options); 66168c2ecf20Sopenharmony_ci return ret; 66178c2ecf20Sopenharmony_ci} 66188c2ecf20Sopenharmony_ci 66198c2ecf20Sopenharmony_cistatic void rbd_dev_image_unlock(struct rbd_device *rbd_dev) 66208c2ecf20Sopenharmony_ci{ 66218c2ecf20Sopenharmony_ci down_write(&rbd_dev->lock_rwsem); 66228c2ecf20Sopenharmony_ci if (__rbd_is_lock_owner(rbd_dev)) 66238c2ecf20Sopenharmony_ci __rbd_release_lock(rbd_dev); 66248c2ecf20Sopenharmony_ci up_write(&rbd_dev->lock_rwsem); 66258c2ecf20Sopenharmony_ci} 66268c2ecf20Sopenharmony_ci 66278c2ecf20Sopenharmony_ci/* 66288c2ecf20Sopenharmony_ci * If the wait is interrupted, an error is returned even if the lock 66298c2ecf20Sopenharmony_ci * was successfully acquired. rbd_dev_image_unlock() will release it 66308c2ecf20Sopenharmony_ci * if needed. 66318c2ecf20Sopenharmony_ci */ 66328c2ecf20Sopenharmony_cistatic int rbd_add_acquire_lock(struct rbd_device *rbd_dev) 66338c2ecf20Sopenharmony_ci{ 66348c2ecf20Sopenharmony_ci long ret; 66358c2ecf20Sopenharmony_ci 66368c2ecf20Sopenharmony_ci if (!(rbd_dev->header.features & RBD_FEATURE_EXCLUSIVE_LOCK)) { 66378c2ecf20Sopenharmony_ci if (!rbd_dev->opts->exclusive && !rbd_dev->opts->lock_on_read) 66388c2ecf20Sopenharmony_ci return 0; 66398c2ecf20Sopenharmony_ci 66408c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "exclusive-lock feature is not enabled"); 66418c2ecf20Sopenharmony_ci return -EINVAL; 66428c2ecf20Sopenharmony_ci } 66438c2ecf20Sopenharmony_ci 66448c2ecf20Sopenharmony_ci if (rbd_is_ro(rbd_dev)) 66458c2ecf20Sopenharmony_ci return 0; 66468c2ecf20Sopenharmony_ci 66478c2ecf20Sopenharmony_ci rbd_assert(!rbd_is_lock_owner(rbd_dev)); 66488c2ecf20Sopenharmony_ci queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0); 66498c2ecf20Sopenharmony_ci ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait, 66508c2ecf20Sopenharmony_ci ceph_timeout_jiffies(rbd_dev->opts->lock_timeout)); 66518c2ecf20Sopenharmony_ci if (ret > 0) { 66528c2ecf20Sopenharmony_ci ret = rbd_dev->acquire_err; 66538c2ecf20Sopenharmony_ci } else { 66548c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&rbd_dev->lock_dwork); 66558c2ecf20Sopenharmony_ci if (!ret) 66568c2ecf20Sopenharmony_ci ret = -ETIMEDOUT; 66578c2ecf20Sopenharmony_ci 66588c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret); 66598c2ecf20Sopenharmony_ci } 66608c2ecf20Sopenharmony_ci if (ret) 66618c2ecf20Sopenharmony_ci return ret; 66628c2ecf20Sopenharmony_ci 66638c2ecf20Sopenharmony_ci /* 66648c2ecf20Sopenharmony_ci * The lock may have been released by now, unless automatic lock 66658c2ecf20Sopenharmony_ci * transitions are disabled. 66668c2ecf20Sopenharmony_ci */ 66678c2ecf20Sopenharmony_ci rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev)); 66688c2ecf20Sopenharmony_ci return 0; 66698c2ecf20Sopenharmony_ci} 66708c2ecf20Sopenharmony_ci 66718c2ecf20Sopenharmony_ci/* 66728c2ecf20Sopenharmony_ci * An rbd format 2 image has a unique identifier, distinct from the 66738c2ecf20Sopenharmony_ci * name given to it by the user. Internally, that identifier is 66748c2ecf20Sopenharmony_ci * what's used to specify the names of objects related to the image. 66758c2ecf20Sopenharmony_ci * 66768c2ecf20Sopenharmony_ci * A special "rbd id" object is used to map an rbd image name to its 66778c2ecf20Sopenharmony_ci * id. If that object doesn't exist, then there is no v2 rbd image 66788c2ecf20Sopenharmony_ci * with the supplied name. 66798c2ecf20Sopenharmony_ci * 66808c2ecf20Sopenharmony_ci * This function will record the given rbd_dev's image_id field if 66818c2ecf20Sopenharmony_ci * it can be determined, and in that case will return 0. If any 66828c2ecf20Sopenharmony_ci * errors occur a negative errno will be returned and the rbd_dev's 66838c2ecf20Sopenharmony_ci * image_id field will be unchanged (and should be NULL). 66848c2ecf20Sopenharmony_ci */ 66858c2ecf20Sopenharmony_cistatic int rbd_dev_image_id(struct rbd_device *rbd_dev) 66868c2ecf20Sopenharmony_ci{ 66878c2ecf20Sopenharmony_ci int ret; 66888c2ecf20Sopenharmony_ci size_t size; 66898c2ecf20Sopenharmony_ci CEPH_DEFINE_OID_ONSTACK(oid); 66908c2ecf20Sopenharmony_ci void *response; 66918c2ecf20Sopenharmony_ci char *image_id; 66928c2ecf20Sopenharmony_ci 66938c2ecf20Sopenharmony_ci /* 66948c2ecf20Sopenharmony_ci * When probing a parent image, the image id is already 66958c2ecf20Sopenharmony_ci * known (and the image name likely is not). There's no 66968c2ecf20Sopenharmony_ci * need to fetch the image id again in this case. We 66978c2ecf20Sopenharmony_ci * do still need to set the image format though. 66988c2ecf20Sopenharmony_ci */ 66998c2ecf20Sopenharmony_ci if (rbd_dev->spec->image_id) { 67008c2ecf20Sopenharmony_ci rbd_dev->image_format = *rbd_dev->spec->image_id ? 2 : 1; 67018c2ecf20Sopenharmony_ci 67028c2ecf20Sopenharmony_ci return 0; 67038c2ecf20Sopenharmony_ci } 67048c2ecf20Sopenharmony_ci 67058c2ecf20Sopenharmony_ci /* 67068c2ecf20Sopenharmony_ci * First, see if the format 2 image id file exists, and if 67078c2ecf20Sopenharmony_ci * so, get the image's persistent id from it. 67088c2ecf20Sopenharmony_ci */ 67098c2ecf20Sopenharmony_ci ret = ceph_oid_aprintf(&oid, GFP_KERNEL, "%s%s", RBD_ID_PREFIX, 67108c2ecf20Sopenharmony_ci rbd_dev->spec->image_name); 67118c2ecf20Sopenharmony_ci if (ret) 67128c2ecf20Sopenharmony_ci return ret; 67138c2ecf20Sopenharmony_ci 67148c2ecf20Sopenharmony_ci dout("rbd id object name is %s\n", oid.name); 67158c2ecf20Sopenharmony_ci 67168c2ecf20Sopenharmony_ci /* Response will be an encoded string, which includes a length */ 67178c2ecf20Sopenharmony_ci size = sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX; 67188c2ecf20Sopenharmony_ci response = kzalloc(size, GFP_NOIO); 67198c2ecf20Sopenharmony_ci if (!response) { 67208c2ecf20Sopenharmony_ci ret = -ENOMEM; 67218c2ecf20Sopenharmony_ci goto out; 67228c2ecf20Sopenharmony_ci } 67238c2ecf20Sopenharmony_ci 67248c2ecf20Sopenharmony_ci /* If it doesn't exist we'll assume it's a format 1 image */ 67258c2ecf20Sopenharmony_ci 67268c2ecf20Sopenharmony_ci ret = rbd_obj_method_sync(rbd_dev, &oid, &rbd_dev->header_oloc, 67278c2ecf20Sopenharmony_ci "get_id", NULL, 0, 67288c2ecf20Sopenharmony_ci response, size); 67298c2ecf20Sopenharmony_ci dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); 67308c2ecf20Sopenharmony_ci if (ret == -ENOENT) { 67318c2ecf20Sopenharmony_ci image_id = kstrdup("", GFP_KERNEL); 67328c2ecf20Sopenharmony_ci ret = image_id ? 0 : -ENOMEM; 67338c2ecf20Sopenharmony_ci if (!ret) 67348c2ecf20Sopenharmony_ci rbd_dev->image_format = 1; 67358c2ecf20Sopenharmony_ci } else if (ret >= 0) { 67368c2ecf20Sopenharmony_ci void *p = response; 67378c2ecf20Sopenharmony_ci 67388c2ecf20Sopenharmony_ci image_id = ceph_extract_encoded_string(&p, p + ret, 67398c2ecf20Sopenharmony_ci NULL, GFP_NOIO); 67408c2ecf20Sopenharmony_ci ret = PTR_ERR_OR_ZERO(image_id); 67418c2ecf20Sopenharmony_ci if (!ret) 67428c2ecf20Sopenharmony_ci rbd_dev->image_format = 2; 67438c2ecf20Sopenharmony_ci } 67448c2ecf20Sopenharmony_ci 67458c2ecf20Sopenharmony_ci if (!ret) { 67468c2ecf20Sopenharmony_ci rbd_dev->spec->image_id = image_id; 67478c2ecf20Sopenharmony_ci dout("image_id is %s\n", image_id); 67488c2ecf20Sopenharmony_ci } 67498c2ecf20Sopenharmony_ciout: 67508c2ecf20Sopenharmony_ci kfree(response); 67518c2ecf20Sopenharmony_ci ceph_oid_destroy(&oid); 67528c2ecf20Sopenharmony_ci return ret; 67538c2ecf20Sopenharmony_ci} 67548c2ecf20Sopenharmony_ci 67558c2ecf20Sopenharmony_ci/* 67568c2ecf20Sopenharmony_ci * Undo whatever state changes are made by v1 or v2 header info 67578c2ecf20Sopenharmony_ci * call. 67588c2ecf20Sopenharmony_ci */ 67598c2ecf20Sopenharmony_cistatic void rbd_dev_unprobe(struct rbd_device *rbd_dev) 67608c2ecf20Sopenharmony_ci{ 67618c2ecf20Sopenharmony_ci rbd_dev_parent_put(rbd_dev); 67628c2ecf20Sopenharmony_ci rbd_object_map_free(rbd_dev); 67638c2ecf20Sopenharmony_ci rbd_dev_mapping_clear(rbd_dev); 67648c2ecf20Sopenharmony_ci 67658c2ecf20Sopenharmony_ci /* Free dynamic fields from the header, then zero it out */ 67668c2ecf20Sopenharmony_ci 67678c2ecf20Sopenharmony_ci rbd_image_header_cleanup(&rbd_dev->header); 67688c2ecf20Sopenharmony_ci} 67698c2ecf20Sopenharmony_ci 67708c2ecf20Sopenharmony_cistatic int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, 67718c2ecf20Sopenharmony_ci struct rbd_image_header *header) 67728c2ecf20Sopenharmony_ci{ 67738c2ecf20Sopenharmony_ci int ret; 67748c2ecf20Sopenharmony_ci 67758c2ecf20Sopenharmony_ci ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix); 67768c2ecf20Sopenharmony_ci if (ret) 67778c2ecf20Sopenharmony_ci return ret; 67788c2ecf20Sopenharmony_ci 67798c2ecf20Sopenharmony_ci /* 67808c2ecf20Sopenharmony_ci * Get the and check features for the image. Currently the 67818c2ecf20Sopenharmony_ci * features are assumed to never change. 67828c2ecf20Sopenharmony_ci */ 67838c2ecf20Sopenharmony_ci ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, 67848c2ecf20Sopenharmony_ci rbd_is_ro(rbd_dev), &header->features); 67858c2ecf20Sopenharmony_ci if (ret) 67868c2ecf20Sopenharmony_ci return ret; 67878c2ecf20Sopenharmony_ci 67888c2ecf20Sopenharmony_ci /* If the image supports fancy striping, get its parameters */ 67898c2ecf20Sopenharmony_ci 67908c2ecf20Sopenharmony_ci if (header->features & RBD_FEATURE_STRIPINGV2) { 67918c2ecf20Sopenharmony_ci ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit, 67928c2ecf20Sopenharmony_ci &header->stripe_count); 67938c2ecf20Sopenharmony_ci if (ret) 67948c2ecf20Sopenharmony_ci return ret; 67958c2ecf20Sopenharmony_ci } 67968c2ecf20Sopenharmony_ci 67978c2ecf20Sopenharmony_ci if (header->features & RBD_FEATURE_DATA_POOL) { 67988c2ecf20Sopenharmony_ci ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id); 67998c2ecf20Sopenharmony_ci if (ret) 68008c2ecf20Sopenharmony_ci return ret; 68018c2ecf20Sopenharmony_ci } 68028c2ecf20Sopenharmony_ci 68038c2ecf20Sopenharmony_ci return 0; 68048c2ecf20Sopenharmony_ci} 68058c2ecf20Sopenharmony_ci 68068c2ecf20Sopenharmony_ci/* 68078c2ecf20Sopenharmony_ci * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() -> 68088c2ecf20Sopenharmony_ci * rbd_dev_image_probe() recursion depth, which means it's also the 68098c2ecf20Sopenharmony_ci * length of the already discovered part of the parent chain. 68108c2ecf20Sopenharmony_ci */ 68118c2ecf20Sopenharmony_cistatic int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth) 68128c2ecf20Sopenharmony_ci{ 68138c2ecf20Sopenharmony_ci struct rbd_device *parent = NULL; 68148c2ecf20Sopenharmony_ci int ret; 68158c2ecf20Sopenharmony_ci 68168c2ecf20Sopenharmony_ci if (!rbd_dev->parent_spec) 68178c2ecf20Sopenharmony_ci return 0; 68188c2ecf20Sopenharmony_ci 68198c2ecf20Sopenharmony_ci if (++depth > RBD_MAX_PARENT_CHAIN_LEN) { 68208c2ecf20Sopenharmony_ci pr_info("parent chain is too long (%d)\n", depth); 68218c2ecf20Sopenharmony_ci ret = -EINVAL; 68228c2ecf20Sopenharmony_ci goto out_err; 68238c2ecf20Sopenharmony_ci } 68248c2ecf20Sopenharmony_ci 68258c2ecf20Sopenharmony_ci parent = __rbd_dev_create(rbd_dev->parent_spec); 68268c2ecf20Sopenharmony_ci if (!parent) { 68278c2ecf20Sopenharmony_ci ret = -ENOMEM; 68288c2ecf20Sopenharmony_ci goto out_err; 68298c2ecf20Sopenharmony_ci } 68308c2ecf20Sopenharmony_ci 68318c2ecf20Sopenharmony_ci /* 68328c2ecf20Sopenharmony_ci * Images related by parent/child relationships always share 68338c2ecf20Sopenharmony_ci * rbd_client and spec/parent_spec, so bump their refcounts. 68348c2ecf20Sopenharmony_ci */ 68358c2ecf20Sopenharmony_ci parent->rbd_client = __rbd_get_client(rbd_dev->rbd_client); 68368c2ecf20Sopenharmony_ci parent->spec = rbd_spec_get(rbd_dev->parent_spec); 68378c2ecf20Sopenharmony_ci 68388c2ecf20Sopenharmony_ci __set_bit(RBD_DEV_FLAG_READONLY, &parent->flags); 68398c2ecf20Sopenharmony_ci 68408c2ecf20Sopenharmony_ci ret = rbd_dev_image_probe(parent, depth); 68418c2ecf20Sopenharmony_ci if (ret < 0) 68428c2ecf20Sopenharmony_ci goto out_err; 68438c2ecf20Sopenharmony_ci 68448c2ecf20Sopenharmony_ci rbd_dev->parent = parent; 68458c2ecf20Sopenharmony_ci atomic_set(&rbd_dev->parent_ref, 1); 68468c2ecf20Sopenharmony_ci return 0; 68478c2ecf20Sopenharmony_ci 68488c2ecf20Sopenharmony_ciout_err: 68498c2ecf20Sopenharmony_ci rbd_dev_unparent(rbd_dev); 68508c2ecf20Sopenharmony_ci rbd_dev_destroy(parent); 68518c2ecf20Sopenharmony_ci return ret; 68528c2ecf20Sopenharmony_ci} 68538c2ecf20Sopenharmony_ci 68548c2ecf20Sopenharmony_cistatic void rbd_dev_device_release(struct rbd_device *rbd_dev) 68558c2ecf20Sopenharmony_ci{ 68568c2ecf20Sopenharmony_ci clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 68578c2ecf20Sopenharmony_ci rbd_free_disk(rbd_dev); 68588c2ecf20Sopenharmony_ci if (!single_major) 68598c2ecf20Sopenharmony_ci unregister_blkdev(rbd_dev->major, rbd_dev->name); 68608c2ecf20Sopenharmony_ci} 68618c2ecf20Sopenharmony_ci 68628c2ecf20Sopenharmony_ci/* 68638c2ecf20Sopenharmony_ci * rbd_dev->header_rwsem must be locked for write and will be unlocked 68648c2ecf20Sopenharmony_ci * upon return. 68658c2ecf20Sopenharmony_ci */ 68668c2ecf20Sopenharmony_cistatic int rbd_dev_device_setup(struct rbd_device *rbd_dev) 68678c2ecf20Sopenharmony_ci{ 68688c2ecf20Sopenharmony_ci int ret; 68698c2ecf20Sopenharmony_ci 68708c2ecf20Sopenharmony_ci /* Record our major and minor device numbers. */ 68718c2ecf20Sopenharmony_ci 68728c2ecf20Sopenharmony_ci if (!single_major) { 68738c2ecf20Sopenharmony_ci ret = register_blkdev(0, rbd_dev->name); 68748c2ecf20Sopenharmony_ci if (ret < 0) 68758c2ecf20Sopenharmony_ci goto err_out_unlock; 68768c2ecf20Sopenharmony_ci 68778c2ecf20Sopenharmony_ci rbd_dev->major = ret; 68788c2ecf20Sopenharmony_ci rbd_dev->minor = 0; 68798c2ecf20Sopenharmony_ci } else { 68808c2ecf20Sopenharmony_ci rbd_dev->major = rbd_major; 68818c2ecf20Sopenharmony_ci rbd_dev->minor = rbd_dev_id_to_minor(rbd_dev->dev_id); 68828c2ecf20Sopenharmony_ci } 68838c2ecf20Sopenharmony_ci 68848c2ecf20Sopenharmony_ci /* Set up the blkdev mapping. */ 68858c2ecf20Sopenharmony_ci 68868c2ecf20Sopenharmony_ci ret = rbd_init_disk(rbd_dev); 68878c2ecf20Sopenharmony_ci if (ret) 68888c2ecf20Sopenharmony_ci goto err_out_blkdev; 68898c2ecf20Sopenharmony_ci 68908c2ecf20Sopenharmony_ci set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); 68918c2ecf20Sopenharmony_ci set_disk_ro(rbd_dev->disk, rbd_is_ro(rbd_dev)); 68928c2ecf20Sopenharmony_ci 68938c2ecf20Sopenharmony_ci ret = dev_set_name(&rbd_dev->dev, "%d", rbd_dev->dev_id); 68948c2ecf20Sopenharmony_ci if (ret) 68958c2ecf20Sopenharmony_ci goto err_out_disk; 68968c2ecf20Sopenharmony_ci 68978c2ecf20Sopenharmony_ci set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 68988c2ecf20Sopenharmony_ci up_write(&rbd_dev->header_rwsem); 68998c2ecf20Sopenharmony_ci return 0; 69008c2ecf20Sopenharmony_ci 69018c2ecf20Sopenharmony_cierr_out_disk: 69028c2ecf20Sopenharmony_ci rbd_free_disk(rbd_dev); 69038c2ecf20Sopenharmony_cierr_out_blkdev: 69048c2ecf20Sopenharmony_ci if (!single_major) 69058c2ecf20Sopenharmony_ci unregister_blkdev(rbd_dev->major, rbd_dev->name); 69068c2ecf20Sopenharmony_cierr_out_unlock: 69078c2ecf20Sopenharmony_ci up_write(&rbd_dev->header_rwsem); 69088c2ecf20Sopenharmony_ci return ret; 69098c2ecf20Sopenharmony_ci} 69108c2ecf20Sopenharmony_ci 69118c2ecf20Sopenharmony_cistatic int rbd_dev_header_name(struct rbd_device *rbd_dev) 69128c2ecf20Sopenharmony_ci{ 69138c2ecf20Sopenharmony_ci struct rbd_spec *spec = rbd_dev->spec; 69148c2ecf20Sopenharmony_ci int ret; 69158c2ecf20Sopenharmony_ci 69168c2ecf20Sopenharmony_ci /* Record the header object name for this rbd image. */ 69178c2ecf20Sopenharmony_ci 69188c2ecf20Sopenharmony_ci rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); 69198c2ecf20Sopenharmony_ci if (rbd_dev->image_format == 1) 69208c2ecf20Sopenharmony_ci ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s", 69218c2ecf20Sopenharmony_ci spec->image_name, RBD_SUFFIX); 69228c2ecf20Sopenharmony_ci else 69238c2ecf20Sopenharmony_ci ret = ceph_oid_aprintf(&rbd_dev->header_oid, GFP_KERNEL, "%s%s", 69248c2ecf20Sopenharmony_ci RBD_HEADER_PREFIX, spec->image_id); 69258c2ecf20Sopenharmony_ci 69268c2ecf20Sopenharmony_ci return ret; 69278c2ecf20Sopenharmony_ci} 69288c2ecf20Sopenharmony_ci 69298c2ecf20Sopenharmony_cistatic void rbd_print_dne(struct rbd_device *rbd_dev, bool is_snap) 69308c2ecf20Sopenharmony_ci{ 69318c2ecf20Sopenharmony_ci if (!is_snap) { 69328c2ecf20Sopenharmony_ci pr_info("image %s/%s%s%s does not exist\n", 69338c2ecf20Sopenharmony_ci rbd_dev->spec->pool_name, 69348c2ecf20Sopenharmony_ci rbd_dev->spec->pool_ns ?: "", 69358c2ecf20Sopenharmony_ci rbd_dev->spec->pool_ns ? "/" : "", 69368c2ecf20Sopenharmony_ci rbd_dev->spec->image_name); 69378c2ecf20Sopenharmony_ci } else { 69388c2ecf20Sopenharmony_ci pr_info("snap %s/%s%s%s@%s does not exist\n", 69398c2ecf20Sopenharmony_ci rbd_dev->spec->pool_name, 69408c2ecf20Sopenharmony_ci rbd_dev->spec->pool_ns ?: "", 69418c2ecf20Sopenharmony_ci rbd_dev->spec->pool_ns ? "/" : "", 69428c2ecf20Sopenharmony_ci rbd_dev->spec->image_name, 69438c2ecf20Sopenharmony_ci rbd_dev->spec->snap_name); 69448c2ecf20Sopenharmony_ci } 69458c2ecf20Sopenharmony_ci} 69468c2ecf20Sopenharmony_ci 69478c2ecf20Sopenharmony_cistatic void rbd_dev_image_release(struct rbd_device *rbd_dev) 69488c2ecf20Sopenharmony_ci{ 69498c2ecf20Sopenharmony_ci if (!rbd_is_ro(rbd_dev)) 69508c2ecf20Sopenharmony_ci rbd_unregister_watch(rbd_dev); 69518c2ecf20Sopenharmony_ci 69528c2ecf20Sopenharmony_ci rbd_dev_unprobe(rbd_dev); 69538c2ecf20Sopenharmony_ci rbd_dev->image_format = 0; 69548c2ecf20Sopenharmony_ci kfree(rbd_dev->spec->image_id); 69558c2ecf20Sopenharmony_ci rbd_dev->spec->image_id = NULL; 69568c2ecf20Sopenharmony_ci} 69578c2ecf20Sopenharmony_ci 69588c2ecf20Sopenharmony_ci/* 69598c2ecf20Sopenharmony_ci * Probe for the existence of the header object for the given rbd 69608c2ecf20Sopenharmony_ci * device. If this image is the one being mapped (i.e., not a 69618c2ecf20Sopenharmony_ci * parent), initiate a watch on its header object before using that 69628c2ecf20Sopenharmony_ci * object to get detailed information about the rbd image. 69638c2ecf20Sopenharmony_ci * 69648c2ecf20Sopenharmony_ci * On success, returns with header_rwsem held for write if called 69658c2ecf20Sopenharmony_ci * with @depth == 0. 69668c2ecf20Sopenharmony_ci */ 69678c2ecf20Sopenharmony_cistatic int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) 69688c2ecf20Sopenharmony_ci{ 69698c2ecf20Sopenharmony_ci bool need_watch = !rbd_is_ro(rbd_dev); 69708c2ecf20Sopenharmony_ci int ret; 69718c2ecf20Sopenharmony_ci 69728c2ecf20Sopenharmony_ci /* 69738c2ecf20Sopenharmony_ci * Get the id from the image id object. Unless there's an 69748c2ecf20Sopenharmony_ci * error, rbd_dev->spec->image_id will be filled in with 69758c2ecf20Sopenharmony_ci * a dynamically-allocated string, and rbd_dev->image_format 69768c2ecf20Sopenharmony_ci * will be set to either 1 or 2. 69778c2ecf20Sopenharmony_ci */ 69788c2ecf20Sopenharmony_ci ret = rbd_dev_image_id(rbd_dev); 69798c2ecf20Sopenharmony_ci if (ret) 69808c2ecf20Sopenharmony_ci return ret; 69818c2ecf20Sopenharmony_ci 69828c2ecf20Sopenharmony_ci ret = rbd_dev_header_name(rbd_dev); 69838c2ecf20Sopenharmony_ci if (ret) 69848c2ecf20Sopenharmony_ci goto err_out_format; 69858c2ecf20Sopenharmony_ci 69868c2ecf20Sopenharmony_ci if (need_watch) { 69878c2ecf20Sopenharmony_ci ret = rbd_register_watch(rbd_dev); 69888c2ecf20Sopenharmony_ci if (ret) { 69898c2ecf20Sopenharmony_ci if (ret == -ENOENT) 69908c2ecf20Sopenharmony_ci rbd_print_dne(rbd_dev, false); 69918c2ecf20Sopenharmony_ci goto err_out_format; 69928c2ecf20Sopenharmony_ci } 69938c2ecf20Sopenharmony_ci } 69948c2ecf20Sopenharmony_ci 69958c2ecf20Sopenharmony_ci if (!depth) 69968c2ecf20Sopenharmony_ci down_write(&rbd_dev->header_rwsem); 69978c2ecf20Sopenharmony_ci 69988c2ecf20Sopenharmony_ci ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true); 69998c2ecf20Sopenharmony_ci if (ret) { 70008c2ecf20Sopenharmony_ci if (ret == -ENOENT && !need_watch) 70018c2ecf20Sopenharmony_ci rbd_print_dne(rbd_dev, false); 70028c2ecf20Sopenharmony_ci goto err_out_probe; 70038c2ecf20Sopenharmony_ci } 70048c2ecf20Sopenharmony_ci 70058c2ecf20Sopenharmony_ci rbd_init_layout(rbd_dev); 70068c2ecf20Sopenharmony_ci 70078c2ecf20Sopenharmony_ci /* 70088c2ecf20Sopenharmony_ci * If this image is the one being mapped, we have pool name and 70098c2ecf20Sopenharmony_ci * id, image name and id, and snap name - need to fill snap id. 70108c2ecf20Sopenharmony_ci * Otherwise this is a parent image, identified by pool, image 70118c2ecf20Sopenharmony_ci * and snap ids - need to fill in names for those ids. 70128c2ecf20Sopenharmony_ci */ 70138c2ecf20Sopenharmony_ci if (!depth) 70148c2ecf20Sopenharmony_ci ret = rbd_spec_fill_snap_id(rbd_dev); 70158c2ecf20Sopenharmony_ci else 70168c2ecf20Sopenharmony_ci ret = rbd_spec_fill_names(rbd_dev); 70178c2ecf20Sopenharmony_ci if (ret) { 70188c2ecf20Sopenharmony_ci if (ret == -ENOENT) 70198c2ecf20Sopenharmony_ci rbd_print_dne(rbd_dev, true); 70208c2ecf20Sopenharmony_ci goto err_out_probe; 70218c2ecf20Sopenharmony_ci } 70228c2ecf20Sopenharmony_ci 70238c2ecf20Sopenharmony_ci ret = rbd_dev_mapping_set(rbd_dev); 70248c2ecf20Sopenharmony_ci if (ret) 70258c2ecf20Sopenharmony_ci goto err_out_probe; 70268c2ecf20Sopenharmony_ci 70278c2ecf20Sopenharmony_ci if (rbd_is_snap(rbd_dev) && 70288c2ecf20Sopenharmony_ci (rbd_dev->header.features & RBD_FEATURE_OBJECT_MAP)) { 70298c2ecf20Sopenharmony_ci ret = rbd_object_map_load(rbd_dev); 70308c2ecf20Sopenharmony_ci if (ret) 70318c2ecf20Sopenharmony_ci goto err_out_probe; 70328c2ecf20Sopenharmony_ci } 70338c2ecf20Sopenharmony_ci 70348c2ecf20Sopenharmony_ci if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { 70358c2ecf20Sopenharmony_ci ret = rbd_dev_setup_parent(rbd_dev); 70368c2ecf20Sopenharmony_ci if (ret) 70378c2ecf20Sopenharmony_ci goto err_out_probe; 70388c2ecf20Sopenharmony_ci } 70398c2ecf20Sopenharmony_ci 70408c2ecf20Sopenharmony_ci ret = rbd_dev_probe_parent(rbd_dev, depth); 70418c2ecf20Sopenharmony_ci if (ret) 70428c2ecf20Sopenharmony_ci goto err_out_probe; 70438c2ecf20Sopenharmony_ci 70448c2ecf20Sopenharmony_ci dout("discovered format %u image, header name is %s\n", 70458c2ecf20Sopenharmony_ci rbd_dev->image_format, rbd_dev->header_oid.name); 70468c2ecf20Sopenharmony_ci return 0; 70478c2ecf20Sopenharmony_ci 70488c2ecf20Sopenharmony_cierr_out_probe: 70498c2ecf20Sopenharmony_ci if (!depth) 70508c2ecf20Sopenharmony_ci up_write(&rbd_dev->header_rwsem); 70518c2ecf20Sopenharmony_ci if (need_watch) 70528c2ecf20Sopenharmony_ci rbd_unregister_watch(rbd_dev); 70538c2ecf20Sopenharmony_ci rbd_dev_unprobe(rbd_dev); 70548c2ecf20Sopenharmony_cierr_out_format: 70558c2ecf20Sopenharmony_ci rbd_dev->image_format = 0; 70568c2ecf20Sopenharmony_ci kfree(rbd_dev->spec->image_id); 70578c2ecf20Sopenharmony_ci rbd_dev->spec->image_id = NULL; 70588c2ecf20Sopenharmony_ci return ret; 70598c2ecf20Sopenharmony_ci} 70608c2ecf20Sopenharmony_ci 70618c2ecf20Sopenharmony_cistatic void rbd_dev_update_header(struct rbd_device *rbd_dev, 70628c2ecf20Sopenharmony_ci struct rbd_image_header *header) 70638c2ecf20Sopenharmony_ci{ 70648c2ecf20Sopenharmony_ci rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); 70658c2ecf20Sopenharmony_ci rbd_assert(rbd_dev->header.object_prefix); /* !first_time */ 70668c2ecf20Sopenharmony_ci 70678c2ecf20Sopenharmony_ci if (rbd_dev->header.image_size != header->image_size) { 70688c2ecf20Sopenharmony_ci rbd_dev->header.image_size = header->image_size; 70698c2ecf20Sopenharmony_ci 70708c2ecf20Sopenharmony_ci if (!rbd_is_snap(rbd_dev)) { 70718c2ecf20Sopenharmony_ci rbd_dev->mapping.size = header->image_size; 70728c2ecf20Sopenharmony_ci rbd_dev_update_size(rbd_dev); 70738c2ecf20Sopenharmony_ci } 70748c2ecf20Sopenharmony_ci } 70758c2ecf20Sopenharmony_ci 70768c2ecf20Sopenharmony_ci ceph_put_snap_context(rbd_dev->header.snapc); 70778c2ecf20Sopenharmony_ci rbd_dev->header.snapc = header->snapc; 70788c2ecf20Sopenharmony_ci header->snapc = NULL; 70798c2ecf20Sopenharmony_ci 70808c2ecf20Sopenharmony_ci if (rbd_dev->image_format == 1) { 70818c2ecf20Sopenharmony_ci kfree(rbd_dev->header.snap_names); 70828c2ecf20Sopenharmony_ci rbd_dev->header.snap_names = header->snap_names; 70838c2ecf20Sopenharmony_ci header->snap_names = NULL; 70848c2ecf20Sopenharmony_ci 70858c2ecf20Sopenharmony_ci kfree(rbd_dev->header.snap_sizes); 70868c2ecf20Sopenharmony_ci rbd_dev->header.snap_sizes = header->snap_sizes; 70878c2ecf20Sopenharmony_ci header->snap_sizes = NULL; 70888c2ecf20Sopenharmony_ci } 70898c2ecf20Sopenharmony_ci} 70908c2ecf20Sopenharmony_ci 70918c2ecf20Sopenharmony_cistatic void rbd_dev_update_parent(struct rbd_device *rbd_dev, 70928c2ecf20Sopenharmony_ci struct parent_image_info *pii) 70938c2ecf20Sopenharmony_ci{ 70948c2ecf20Sopenharmony_ci if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) { 70958c2ecf20Sopenharmony_ci /* 70968c2ecf20Sopenharmony_ci * Either the parent never existed, or we have 70978c2ecf20Sopenharmony_ci * record of it but the image got flattened so it no 70988c2ecf20Sopenharmony_ci * longer has a parent. When the parent of a 70998c2ecf20Sopenharmony_ci * layered image disappears we immediately set the 71008c2ecf20Sopenharmony_ci * overlap to 0. The effect of this is that all new 71018c2ecf20Sopenharmony_ci * requests will be treated as if the image had no 71028c2ecf20Sopenharmony_ci * parent. 71038c2ecf20Sopenharmony_ci * 71048c2ecf20Sopenharmony_ci * If !pii.has_overlap, the parent image spec is not 71058c2ecf20Sopenharmony_ci * applicable. It's there to avoid duplication in each 71068c2ecf20Sopenharmony_ci * snapshot record. 71078c2ecf20Sopenharmony_ci */ 71088c2ecf20Sopenharmony_ci if (rbd_dev->parent_overlap) { 71098c2ecf20Sopenharmony_ci rbd_dev->parent_overlap = 0; 71108c2ecf20Sopenharmony_ci rbd_dev_parent_put(rbd_dev); 71118c2ecf20Sopenharmony_ci pr_info("%s: clone has been flattened\n", 71128c2ecf20Sopenharmony_ci rbd_dev->disk->disk_name); 71138c2ecf20Sopenharmony_ci } 71148c2ecf20Sopenharmony_ci } else { 71158c2ecf20Sopenharmony_ci rbd_assert(rbd_dev->parent_spec); 71168c2ecf20Sopenharmony_ci 71178c2ecf20Sopenharmony_ci /* 71188c2ecf20Sopenharmony_ci * Update the parent overlap. If it became zero, issue 71198c2ecf20Sopenharmony_ci * a warning as we will proceed as if there is no parent. 71208c2ecf20Sopenharmony_ci */ 71218c2ecf20Sopenharmony_ci if (!pii->overlap && rbd_dev->parent_overlap) 71228c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, 71238c2ecf20Sopenharmony_ci "clone has become standalone (overlap 0)"); 71248c2ecf20Sopenharmony_ci rbd_dev->parent_overlap = pii->overlap; 71258c2ecf20Sopenharmony_ci } 71268c2ecf20Sopenharmony_ci} 71278c2ecf20Sopenharmony_ci 71288c2ecf20Sopenharmony_cistatic int rbd_dev_refresh(struct rbd_device *rbd_dev) 71298c2ecf20Sopenharmony_ci{ 71308c2ecf20Sopenharmony_ci struct rbd_image_header header = { 0 }; 71318c2ecf20Sopenharmony_ci struct parent_image_info pii = { 0 }; 71328c2ecf20Sopenharmony_ci int ret; 71338c2ecf20Sopenharmony_ci 71348c2ecf20Sopenharmony_ci dout("%s rbd_dev %p\n", __func__, rbd_dev); 71358c2ecf20Sopenharmony_ci 71368c2ecf20Sopenharmony_ci ret = rbd_dev_header_info(rbd_dev, &header, false); 71378c2ecf20Sopenharmony_ci if (ret) 71388c2ecf20Sopenharmony_ci goto out; 71398c2ecf20Sopenharmony_ci 71408c2ecf20Sopenharmony_ci /* 71418c2ecf20Sopenharmony_ci * If there is a parent, see if it has disappeared due to the 71428c2ecf20Sopenharmony_ci * mapped image getting flattened. 71438c2ecf20Sopenharmony_ci */ 71448c2ecf20Sopenharmony_ci if (rbd_dev->parent) { 71458c2ecf20Sopenharmony_ci ret = rbd_dev_v2_parent_info(rbd_dev, &pii); 71468c2ecf20Sopenharmony_ci if (ret) 71478c2ecf20Sopenharmony_ci goto out; 71488c2ecf20Sopenharmony_ci } 71498c2ecf20Sopenharmony_ci 71508c2ecf20Sopenharmony_ci down_write(&rbd_dev->header_rwsem); 71518c2ecf20Sopenharmony_ci rbd_dev_update_header(rbd_dev, &header); 71528c2ecf20Sopenharmony_ci if (rbd_dev->parent) 71538c2ecf20Sopenharmony_ci rbd_dev_update_parent(rbd_dev, &pii); 71548c2ecf20Sopenharmony_ci up_write(&rbd_dev->header_rwsem); 71558c2ecf20Sopenharmony_ci 71568c2ecf20Sopenharmony_ciout: 71578c2ecf20Sopenharmony_ci rbd_parent_info_cleanup(&pii); 71588c2ecf20Sopenharmony_ci rbd_image_header_cleanup(&header); 71598c2ecf20Sopenharmony_ci return ret; 71608c2ecf20Sopenharmony_ci} 71618c2ecf20Sopenharmony_ci 71628c2ecf20Sopenharmony_cistatic ssize_t do_rbd_add(struct bus_type *bus, 71638c2ecf20Sopenharmony_ci const char *buf, 71648c2ecf20Sopenharmony_ci size_t count) 71658c2ecf20Sopenharmony_ci{ 71668c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = NULL; 71678c2ecf20Sopenharmony_ci struct ceph_options *ceph_opts = NULL; 71688c2ecf20Sopenharmony_ci struct rbd_options *rbd_opts = NULL; 71698c2ecf20Sopenharmony_ci struct rbd_spec *spec = NULL; 71708c2ecf20Sopenharmony_ci struct rbd_client *rbdc; 71718c2ecf20Sopenharmony_ci int rc; 71728c2ecf20Sopenharmony_ci 71738c2ecf20Sopenharmony_ci if (!capable(CAP_SYS_ADMIN)) 71748c2ecf20Sopenharmony_ci return -EPERM; 71758c2ecf20Sopenharmony_ci 71768c2ecf20Sopenharmony_ci if (!try_module_get(THIS_MODULE)) 71778c2ecf20Sopenharmony_ci return -ENODEV; 71788c2ecf20Sopenharmony_ci 71798c2ecf20Sopenharmony_ci /* parse add command */ 71808c2ecf20Sopenharmony_ci rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); 71818c2ecf20Sopenharmony_ci if (rc < 0) 71828c2ecf20Sopenharmony_ci goto out; 71838c2ecf20Sopenharmony_ci 71848c2ecf20Sopenharmony_ci rbdc = rbd_get_client(ceph_opts); 71858c2ecf20Sopenharmony_ci if (IS_ERR(rbdc)) { 71868c2ecf20Sopenharmony_ci rc = PTR_ERR(rbdc); 71878c2ecf20Sopenharmony_ci goto err_out_args; 71888c2ecf20Sopenharmony_ci } 71898c2ecf20Sopenharmony_ci 71908c2ecf20Sopenharmony_ci /* pick the pool */ 71918c2ecf20Sopenharmony_ci rc = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, spec->pool_name); 71928c2ecf20Sopenharmony_ci if (rc < 0) { 71938c2ecf20Sopenharmony_ci if (rc == -ENOENT) 71948c2ecf20Sopenharmony_ci pr_info("pool %s does not exist\n", spec->pool_name); 71958c2ecf20Sopenharmony_ci goto err_out_client; 71968c2ecf20Sopenharmony_ci } 71978c2ecf20Sopenharmony_ci spec->pool_id = (u64)rc; 71988c2ecf20Sopenharmony_ci 71998c2ecf20Sopenharmony_ci rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts); 72008c2ecf20Sopenharmony_ci if (!rbd_dev) { 72018c2ecf20Sopenharmony_ci rc = -ENOMEM; 72028c2ecf20Sopenharmony_ci goto err_out_client; 72038c2ecf20Sopenharmony_ci } 72048c2ecf20Sopenharmony_ci rbdc = NULL; /* rbd_dev now owns this */ 72058c2ecf20Sopenharmony_ci spec = NULL; /* rbd_dev now owns this */ 72068c2ecf20Sopenharmony_ci rbd_opts = NULL; /* rbd_dev now owns this */ 72078c2ecf20Sopenharmony_ci 72088c2ecf20Sopenharmony_ci /* if we are mapping a snapshot it will be a read-only mapping */ 72098c2ecf20Sopenharmony_ci if (rbd_dev->opts->read_only || 72108c2ecf20Sopenharmony_ci strcmp(rbd_dev->spec->snap_name, RBD_SNAP_HEAD_NAME)) 72118c2ecf20Sopenharmony_ci __set_bit(RBD_DEV_FLAG_READONLY, &rbd_dev->flags); 72128c2ecf20Sopenharmony_ci 72138c2ecf20Sopenharmony_ci rbd_dev->config_info = kstrdup(buf, GFP_KERNEL); 72148c2ecf20Sopenharmony_ci if (!rbd_dev->config_info) { 72158c2ecf20Sopenharmony_ci rc = -ENOMEM; 72168c2ecf20Sopenharmony_ci goto err_out_rbd_dev; 72178c2ecf20Sopenharmony_ci } 72188c2ecf20Sopenharmony_ci 72198c2ecf20Sopenharmony_ci rc = rbd_dev_image_probe(rbd_dev, 0); 72208c2ecf20Sopenharmony_ci if (rc < 0) 72218c2ecf20Sopenharmony_ci goto err_out_rbd_dev; 72228c2ecf20Sopenharmony_ci 72238c2ecf20Sopenharmony_ci if (rbd_dev->opts->alloc_size > rbd_dev->layout.object_size) { 72248c2ecf20Sopenharmony_ci rbd_warn(rbd_dev, "alloc_size adjusted to %u", 72258c2ecf20Sopenharmony_ci rbd_dev->layout.object_size); 72268c2ecf20Sopenharmony_ci rbd_dev->opts->alloc_size = rbd_dev->layout.object_size; 72278c2ecf20Sopenharmony_ci } 72288c2ecf20Sopenharmony_ci 72298c2ecf20Sopenharmony_ci rc = rbd_dev_device_setup(rbd_dev); 72308c2ecf20Sopenharmony_ci if (rc) 72318c2ecf20Sopenharmony_ci goto err_out_image_probe; 72328c2ecf20Sopenharmony_ci 72338c2ecf20Sopenharmony_ci rc = rbd_add_acquire_lock(rbd_dev); 72348c2ecf20Sopenharmony_ci if (rc) 72358c2ecf20Sopenharmony_ci goto err_out_image_lock; 72368c2ecf20Sopenharmony_ci 72378c2ecf20Sopenharmony_ci /* Everything's ready. Announce the disk to the world. */ 72388c2ecf20Sopenharmony_ci 72398c2ecf20Sopenharmony_ci rc = device_add(&rbd_dev->dev); 72408c2ecf20Sopenharmony_ci if (rc) 72418c2ecf20Sopenharmony_ci goto err_out_image_lock; 72428c2ecf20Sopenharmony_ci 72438c2ecf20Sopenharmony_ci device_add_disk(&rbd_dev->dev, rbd_dev->disk, NULL); 72448c2ecf20Sopenharmony_ci /* see rbd_init_disk() */ 72458c2ecf20Sopenharmony_ci blk_put_queue(rbd_dev->disk->queue); 72468c2ecf20Sopenharmony_ci 72478c2ecf20Sopenharmony_ci spin_lock(&rbd_dev_list_lock); 72488c2ecf20Sopenharmony_ci list_add_tail(&rbd_dev->node, &rbd_dev_list); 72498c2ecf20Sopenharmony_ci spin_unlock(&rbd_dev_list_lock); 72508c2ecf20Sopenharmony_ci 72518c2ecf20Sopenharmony_ci pr_info("%s: capacity %llu features 0x%llx\n", rbd_dev->disk->disk_name, 72528c2ecf20Sopenharmony_ci (unsigned long long)get_capacity(rbd_dev->disk) << SECTOR_SHIFT, 72538c2ecf20Sopenharmony_ci rbd_dev->header.features); 72548c2ecf20Sopenharmony_ci rc = count; 72558c2ecf20Sopenharmony_ciout: 72568c2ecf20Sopenharmony_ci module_put(THIS_MODULE); 72578c2ecf20Sopenharmony_ci return rc; 72588c2ecf20Sopenharmony_ci 72598c2ecf20Sopenharmony_cierr_out_image_lock: 72608c2ecf20Sopenharmony_ci rbd_dev_image_unlock(rbd_dev); 72618c2ecf20Sopenharmony_ci rbd_dev_device_release(rbd_dev); 72628c2ecf20Sopenharmony_cierr_out_image_probe: 72638c2ecf20Sopenharmony_ci rbd_dev_image_release(rbd_dev); 72648c2ecf20Sopenharmony_cierr_out_rbd_dev: 72658c2ecf20Sopenharmony_ci rbd_dev_destroy(rbd_dev); 72668c2ecf20Sopenharmony_cierr_out_client: 72678c2ecf20Sopenharmony_ci rbd_put_client(rbdc); 72688c2ecf20Sopenharmony_cierr_out_args: 72698c2ecf20Sopenharmony_ci rbd_spec_put(spec); 72708c2ecf20Sopenharmony_ci kfree(rbd_opts); 72718c2ecf20Sopenharmony_ci goto out; 72728c2ecf20Sopenharmony_ci} 72738c2ecf20Sopenharmony_ci 72748c2ecf20Sopenharmony_cistatic ssize_t add_store(struct bus_type *bus, const char *buf, size_t count) 72758c2ecf20Sopenharmony_ci{ 72768c2ecf20Sopenharmony_ci if (single_major) 72778c2ecf20Sopenharmony_ci return -EINVAL; 72788c2ecf20Sopenharmony_ci 72798c2ecf20Sopenharmony_ci return do_rbd_add(bus, buf, count); 72808c2ecf20Sopenharmony_ci} 72818c2ecf20Sopenharmony_ci 72828c2ecf20Sopenharmony_cistatic ssize_t add_single_major_store(struct bus_type *bus, const char *buf, 72838c2ecf20Sopenharmony_ci size_t count) 72848c2ecf20Sopenharmony_ci{ 72858c2ecf20Sopenharmony_ci return do_rbd_add(bus, buf, count); 72868c2ecf20Sopenharmony_ci} 72878c2ecf20Sopenharmony_ci 72888c2ecf20Sopenharmony_cistatic void rbd_dev_remove_parent(struct rbd_device *rbd_dev) 72898c2ecf20Sopenharmony_ci{ 72908c2ecf20Sopenharmony_ci while (rbd_dev->parent) { 72918c2ecf20Sopenharmony_ci struct rbd_device *first = rbd_dev; 72928c2ecf20Sopenharmony_ci struct rbd_device *second = first->parent; 72938c2ecf20Sopenharmony_ci struct rbd_device *third; 72948c2ecf20Sopenharmony_ci 72958c2ecf20Sopenharmony_ci /* 72968c2ecf20Sopenharmony_ci * Follow to the parent with no grandparent and 72978c2ecf20Sopenharmony_ci * remove it. 72988c2ecf20Sopenharmony_ci */ 72998c2ecf20Sopenharmony_ci while (second && (third = second->parent)) { 73008c2ecf20Sopenharmony_ci first = second; 73018c2ecf20Sopenharmony_ci second = third; 73028c2ecf20Sopenharmony_ci } 73038c2ecf20Sopenharmony_ci rbd_assert(second); 73048c2ecf20Sopenharmony_ci rbd_dev_image_release(second); 73058c2ecf20Sopenharmony_ci rbd_dev_destroy(second); 73068c2ecf20Sopenharmony_ci first->parent = NULL; 73078c2ecf20Sopenharmony_ci first->parent_overlap = 0; 73088c2ecf20Sopenharmony_ci 73098c2ecf20Sopenharmony_ci rbd_assert(first->parent_spec); 73108c2ecf20Sopenharmony_ci rbd_spec_put(first->parent_spec); 73118c2ecf20Sopenharmony_ci first->parent_spec = NULL; 73128c2ecf20Sopenharmony_ci } 73138c2ecf20Sopenharmony_ci} 73148c2ecf20Sopenharmony_ci 73158c2ecf20Sopenharmony_cistatic ssize_t do_rbd_remove(struct bus_type *bus, 73168c2ecf20Sopenharmony_ci const char *buf, 73178c2ecf20Sopenharmony_ci size_t count) 73188c2ecf20Sopenharmony_ci{ 73198c2ecf20Sopenharmony_ci struct rbd_device *rbd_dev = NULL; 73208c2ecf20Sopenharmony_ci struct list_head *tmp; 73218c2ecf20Sopenharmony_ci int dev_id; 73228c2ecf20Sopenharmony_ci char opt_buf[6]; 73238c2ecf20Sopenharmony_ci bool force = false; 73248c2ecf20Sopenharmony_ci int ret; 73258c2ecf20Sopenharmony_ci 73268c2ecf20Sopenharmony_ci if (!capable(CAP_SYS_ADMIN)) 73278c2ecf20Sopenharmony_ci return -EPERM; 73288c2ecf20Sopenharmony_ci 73298c2ecf20Sopenharmony_ci dev_id = -1; 73308c2ecf20Sopenharmony_ci opt_buf[0] = '\0'; 73318c2ecf20Sopenharmony_ci sscanf(buf, "%d %5s", &dev_id, opt_buf); 73328c2ecf20Sopenharmony_ci if (dev_id < 0) { 73338c2ecf20Sopenharmony_ci pr_err("dev_id out of range\n"); 73348c2ecf20Sopenharmony_ci return -EINVAL; 73358c2ecf20Sopenharmony_ci } 73368c2ecf20Sopenharmony_ci if (opt_buf[0] != '\0') { 73378c2ecf20Sopenharmony_ci if (!strcmp(opt_buf, "force")) { 73388c2ecf20Sopenharmony_ci force = true; 73398c2ecf20Sopenharmony_ci } else { 73408c2ecf20Sopenharmony_ci pr_err("bad remove option at '%s'\n", opt_buf); 73418c2ecf20Sopenharmony_ci return -EINVAL; 73428c2ecf20Sopenharmony_ci } 73438c2ecf20Sopenharmony_ci } 73448c2ecf20Sopenharmony_ci 73458c2ecf20Sopenharmony_ci ret = -ENOENT; 73468c2ecf20Sopenharmony_ci spin_lock(&rbd_dev_list_lock); 73478c2ecf20Sopenharmony_ci list_for_each(tmp, &rbd_dev_list) { 73488c2ecf20Sopenharmony_ci rbd_dev = list_entry(tmp, struct rbd_device, node); 73498c2ecf20Sopenharmony_ci if (rbd_dev->dev_id == dev_id) { 73508c2ecf20Sopenharmony_ci ret = 0; 73518c2ecf20Sopenharmony_ci break; 73528c2ecf20Sopenharmony_ci } 73538c2ecf20Sopenharmony_ci } 73548c2ecf20Sopenharmony_ci if (!ret) { 73558c2ecf20Sopenharmony_ci spin_lock_irq(&rbd_dev->lock); 73568c2ecf20Sopenharmony_ci if (rbd_dev->open_count && !force) 73578c2ecf20Sopenharmony_ci ret = -EBUSY; 73588c2ecf20Sopenharmony_ci else if (test_and_set_bit(RBD_DEV_FLAG_REMOVING, 73598c2ecf20Sopenharmony_ci &rbd_dev->flags)) 73608c2ecf20Sopenharmony_ci ret = -EINPROGRESS; 73618c2ecf20Sopenharmony_ci spin_unlock_irq(&rbd_dev->lock); 73628c2ecf20Sopenharmony_ci } 73638c2ecf20Sopenharmony_ci spin_unlock(&rbd_dev_list_lock); 73648c2ecf20Sopenharmony_ci if (ret) 73658c2ecf20Sopenharmony_ci return ret; 73668c2ecf20Sopenharmony_ci 73678c2ecf20Sopenharmony_ci if (force) { 73688c2ecf20Sopenharmony_ci /* 73698c2ecf20Sopenharmony_ci * Prevent new IO from being queued and wait for existing 73708c2ecf20Sopenharmony_ci * IO to complete/fail. 73718c2ecf20Sopenharmony_ci */ 73728c2ecf20Sopenharmony_ci blk_mq_freeze_queue(rbd_dev->disk->queue); 73738c2ecf20Sopenharmony_ci blk_set_queue_dying(rbd_dev->disk->queue); 73748c2ecf20Sopenharmony_ci } 73758c2ecf20Sopenharmony_ci 73768c2ecf20Sopenharmony_ci del_gendisk(rbd_dev->disk); 73778c2ecf20Sopenharmony_ci spin_lock(&rbd_dev_list_lock); 73788c2ecf20Sopenharmony_ci list_del_init(&rbd_dev->node); 73798c2ecf20Sopenharmony_ci spin_unlock(&rbd_dev_list_lock); 73808c2ecf20Sopenharmony_ci device_del(&rbd_dev->dev); 73818c2ecf20Sopenharmony_ci 73828c2ecf20Sopenharmony_ci rbd_dev_image_unlock(rbd_dev); 73838c2ecf20Sopenharmony_ci rbd_dev_device_release(rbd_dev); 73848c2ecf20Sopenharmony_ci rbd_dev_image_release(rbd_dev); 73858c2ecf20Sopenharmony_ci rbd_dev_destroy(rbd_dev); 73868c2ecf20Sopenharmony_ci return count; 73878c2ecf20Sopenharmony_ci} 73888c2ecf20Sopenharmony_ci 73898c2ecf20Sopenharmony_cistatic ssize_t remove_store(struct bus_type *bus, const char *buf, size_t count) 73908c2ecf20Sopenharmony_ci{ 73918c2ecf20Sopenharmony_ci if (single_major) 73928c2ecf20Sopenharmony_ci return -EINVAL; 73938c2ecf20Sopenharmony_ci 73948c2ecf20Sopenharmony_ci return do_rbd_remove(bus, buf, count); 73958c2ecf20Sopenharmony_ci} 73968c2ecf20Sopenharmony_ci 73978c2ecf20Sopenharmony_cistatic ssize_t remove_single_major_store(struct bus_type *bus, const char *buf, 73988c2ecf20Sopenharmony_ci size_t count) 73998c2ecf20Sopenharmony_ci{ 74008c2ecf20Sopenharmony_ci return do_rbd_remove(bus, buf, count); 74018c2ecf20Sopenharmony_ci} 74028c2ecf20Sopenharmony_ci 74038c2ecf20Sopenharmony_ci/* 74048c2ecf20Sopenharmony_ci * create control files in sysfs 74058c2ecf20Sopenharmony_ci * /sys/bus/rbd/... 74068c2ecf20Sopenharmony_ci */ 74078c2ecf20Sopenharmony_cistatic int __init rbd_sysfs_init(void) 74088c2ecf20Sopenharmony_ci{ 74098c2ecf20Sopenharmony_ci int ret; 74108c2ecf20Sopenharmony_ci 74118c2ecf20Sopenharmony_ci ret = device_register(&rbd_root_dev); 74128c2ecf20Sopenharmony_ci if (ret < 0) 74138c2ecf20Sopenharmony_ci return ret; 74148c2ecf20Sopenharmony_ci 74158c2ecf20Sopenharmony_ci ret = bus_register(&rbd_bus_type); 74168c2ecf20Sopenharmony_ci if (ret < 0) 74178c2ecf20Sopenharmony_ci device_unregister(&rbd_root_dev); 74188c2ecf20Sopenharmony_ci 74198c2ecf20Sopenharmony_ci return ret; 74208c2ecf20Sopenharmony_ci} 74218c2ecf20Sopenharmony_ci 74228c2ecf20Sopenharmony_cistatic void __exit rbd_sysfs_cleanup(void) 74238c2ecf20Sopenharmony_ci{ 74248c2ecf20Sopenharmony_ci bus_unregister(&rbd_bus_type); 74258c2ecf20Sopenharmony_ci device_unregister(&rbd_root_dev); 74268c2ecf20Sopenharmony_ci} 74278c2ecf20Sopenharmony_ci 74288c2ecf20Sopenharmony_cistatic int __init rbd_slab_init(void) 74298c2ecf20Sopenharmony_ci{ 74308c2ecf20Sopenharmony_ci rbd_assert(!rbd_img_request_cache); 74318c2ecf20Sopenharmony_ci rbd_img_request_cache = KMEM_CACHE(rbd_img_request, 0); 74328c2ecf20Sopenharmony_ci if (!rbd_img_request_cache) 74338c2ecf20Sopenharmony_ci return -ENOMEM; 74348c2ecf20Sopenharmony_ci 74358c2ecf20Sopenharmony_ci rbd_assert(!rbd_obj_request_cache); 74368c2ecf20Sopenharmony_ci rbd_obj_request_cache = KMEM_CACHE(rbd_obj_request, 0); 74378c2ecf20Sopenharmony_ci if (!rbd_obj_request_cache) 74388c2ecf20Sopenharmony_ci goto out_err; 74398c2ecf20Sopenharmony_ci 74408c2ecf20Sopenharmony_ci return 0; 74418c2ecf20Sopenharmony_ci 74428c2ecf20Sopenharmony_ciout_err: 74438c2ecf20Sopenharmony_ci kmem_cache_destroy(rbd_img_request_cache); 74448c2ecf20Sopenharmony_ci rbd_img_request_cache = NULL; 74458c2ecf20Sopenharmony_ci return -ENOMEM; 74468c2ecf20Sopenharmony_ci} 74478c2ecf20Sopenharmony_ci 74488c2ecf20Sopenharmony_cistatic void rbd_slab_exit(void) 74498c2ecf20Sopenharmony_ci{ 74508c2ecf20Sopenharmony_ci rbd_assert(rbd_obj_request_cache); 74518c2ecf20Sopenharmony_ci kmem_cache_destroy(rbd_obj_request_cache); 74528c2ecf20Sopenharmony_ci rbd_obj_request_cache = NULL; 74538c2ecf20Sopenharmony_ci 74548c2ecf20Sopenharmony_ci rbd_assert(rbd_img_request_cache); 74558c2ecf20Sopenharmony_ci kmem_cache_destroy(rbd_img_request_cache); 74568c2ecf20Sopenharmony_ci rbd_img_request_cache = NULL; 74578c2ecf20Sopenharmony_ci} 74588c2ecf20Sopenharmony_ci 74598c2ecf20Sopenharmony_cistatic int __init rbd_init(void) 74608c2ecf20Sopenharmony_ci{ 74618c2ecf20Sopenharmony_ci int rc; 74628c2ecf20Sopenharmony_ci 74638c2ecf20Sopenharmony_ci if (!libceph_compatible(NULL)) { 74648c2ecf20Sopenharmony_ci rbd_warn(NULL, "libceph incompatibility (quitting)"); 74658c2ecf20Sopenharmony_ci return -EINVAL; 74668c2ecf20Sopenharmony_ci } 74678c2ecf20Sopenharmony_ci 74688c2ecf20Sopenharmony_ci rc = rbd_slab_init(); 74698c2ecf20Sopenharmony_ci if (rc) 74708c2ecf20Sopenharmony_ci return rc; 74718c2ecf20Sopenharmony_ci 74728c2ecf20Sopenharmony_ci /* 74738c2ecf20Sopenharmony_ci * The number of active work items is limited by the number of 74748c2ecf20Sopenharmony_ci * rbd devices * queue depth, so leave @max_active at default. 74758c2ecf20Sopenharmony_ci */ 74768c2ecf20Sopenharmony_ci rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0); 74778c2ecf20Sopenharmony_ci if (!rbd_wq) { 74788c2ecf20Sopenharmony_ci rc = -ENOMEM; 74798c2ecf20Sopenharmony_ci goto err_out_slab; 74808c2ecf20Sopenharmony_ci } 74818c2ecf20Sopenharmony_ci 74828c2ecf20Sopenharmony_ci if (single_major) { 74838c2ecf20Sopenharmony_ci rbd_major = register_blkdev(0, RBD_DRV_NAME); 74848c2ecf20Sopenharmony_ci if (rbd_major < 0) { 74858c2ecf20Sopenharmony_ci rc = rbd_major; 74868c2ecf20Sopenharmony_ci goto err_out_wq; 74878c2ecf20Sopenharmony_ci } 74888c2ecf20Sopenharmony_ci } 74898c2ecf20Sopenharmony_ci 74908c2ecf20Sopenharmony_ci rc = rbd_sysfs_init(); 74918c2ecf20Sopenharmony_ci if (rc) 74928c2ecf20Sopenharmony_ci goto err_out_blkdev; 74938c2ecf20Sopenharmony_ci 74948c2ecf20Sopenharmony_ci if (single_major) 74958c2ecf20Sopenharmony_ci pr_info("loaded (major %d)\n", rbd_major); 74968c2ecf20Sopenharmony_ci else 74978c2ecf20Sopenharmony_ci pr_info("loaded\n"); 74988c2ecf20Sopenharmony_ci 74998c2ecf20Sopenharmony_ci return 0; 75008c2ecf20Sopenharmony_ci 75018c2ecf20Sopenharmony_cierr_out_blkdev: 75028c2ecf20Sopenharmony_ci if (single_major) 75038c2ecf20Sopenharmony_ci unregister_blkdev(rbd_major, RBD_DRV_NAME); 75048c2ecf20Sopenharmony_cierr_out_wq: 75058c2ecf20Sopenharmony_ci destroy_workqueue(rbd_wq); 75068c2ecf20Sopenharmony_cierr_out_slab: 75078c2ecf20Sopenharmony_ci rbd_slab_exit(); 75088c2ecf20Sopenharmony_ci return rc; 75098c2ecf20Sopenharmony_ci} 75108c2ecf20Sopenharmony_ci 75118c2ecf20Sopenharmony_cistatic void __exit rbd_exit(void) 75128c2ecf20Sopenharmony_ci{ 75138c2ecf20Sopenharmony_ci ida_destroy(&rbd_dev_id_ida); 75148c2ecf20Sopenharmony_ci rbd_sysfs_cleanup(); 75158c2ecf20Sopenharmony_ci if (single_major) 75168c2ecf20Sopenharmony_ci unregister_blkdev(rbd_major, RBD_DRV_NAME); 75178c2ecf20Sopenharmony_ci destroy_workqueue(rbd_wq); 75188c2ecf20Sopenharmony_ci rbd_slab_exit(); 75198c2ecf20Sopenharmony_ci} 75208c2ecf20Sopenharmony_ci 75218c2ecf20Sopenharmony_cimodule_init(rbd_init); 75228c2ecf20Sopenharmony_cimodule_exit(rbd_exit); 75238c2ecf20Sopenharmony_ci 75248c2ecf20Sopenharmony_ciMODULE_AUTHOR("Alex Elder <elder@inktank.com>"); 75258c2ecf20Sopenharmony_ciMODULE_AUTHOR("Sage Weil <sage@newdream.net>"); 75268c2ecf20Sopenharmony_ciMODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); 75278c2ecf20Sopenharmony_ci/* following authorship retained from original osdblk.c */ 75288c2ecf20Sopenharmony_ciMODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>"); 75298c2ecf20Sopenharmony_ci 75308c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("RADOS Block Device (RBD) driver"); 75318c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 7532