18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * blkfront.c 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * XenLinux virtual block device driver. 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 78c2ecf20Sopenharmony_ci * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 88c2ecf20Sopenharmony_ci * Copyright (c) 2004, Christian Limpach 98c2ecf20Sopenharmony_ci * Copyright (c) 2004, Andrew Warfield 108c2ecf20Sopenharmony_ci * Copyright (c) 2005, Christopher Clark 118c2ecf20Sopenharmony_ci * Copyright (c) 2005, XenSource Ltd 128c2ecf20Sopenharmony_ci * 138c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or 148c2ecf20Sopenharmony_ci * modify it under the terms of the GNU General Public License version 2 158c2ecf20Sopenharmony_ci * as published by the Free Software Foundation; or, when distributed 168c2ecf20Sopenharmony_ci * separately from the Linux kernel or incorporated into other 178c2ecf20Sopenharmony_ci * software packages, subject to the following license: 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a copy 208c2ecf20Sopenharmony_ci * of this source file (the "Software"), to deal in the Software without 218c2ecf20Sopenharmony_ci * restriction, including without limitation the rights to use, copy, modify, 228c2ecf20Sopenharmony_ci * merge, publish, distribute, sublicense, and/or sell copies of the Software, 238c2ecf20Sopenharmony_ci * and to permit persons to whom the Software is furnished to do so, subject to 248c2ecf20Sopenharmony_ci * the following conditions: 258c2ecf20Sopenharmony_ci * 268c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 278c2ecf20Sopenharmony_ci * all copies or substantial portions of the Software. 288c2ecf20Sopenharmony_ci * 298c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 308c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 318c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 328c2ecf20Sopenharmony_ci * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 338c2ecf20Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 348c2ecf20Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 358c2ecf20Sopenharmony_ci * IN THE SOFTWARE. 368c2ecf20Sopenharmony_ci */ 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#include <linux/interrupt.h> 398c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 408c2ecf20Sopenharmony_ci#include <linux/blk-mq.h> 418c2ecf20Sopenharmony_ci#include <linux/hdreg.h> 428c2ecf20Sopenharmony_ci#include <linux/cdrom.h> 438c2ecf20Sopenharmony_ci#include <linux/module.h> 448c2ecf20Sopenharmony_ci#include <linux/slab.h> 458c2ecf20Sopenharmony_ci#include <linux/mutex.h> 468c2ecf20Sopenharmony_ci#include <linux/scatterlist.h> 478c2ecf20Sopenharmony_ci#include <linux/bitmap.h> 488c2ecf20Sopenharmony_ci#include <linux/list.h> 498c2ecf20Sopenharmony_ci#include <linux/workqueue.h> 508c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#include <xen/xen.h> 538c2ecf20Sopenharmony_ci#include <xen/xenbus.h> 548c2ecf20Sopenharmony_ci#include <xen/grant_table.h> 558c2ecf20Sopenharmony_ci#include <xen/events.h> 568c2ecf20Sopenharmony_ci#include <xen/page.h> 578c2ecf20Sopenharmony_ci#include <xen/platform_pci.h> 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci#include <xen/interface/grant_table.h> 608c2ecf20Sopenharmony_ci#include <xen/interface/io/blkif.h> 618c2ecf20Sopenharmony_ci#include <xen/interface/io/protocols.h> 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci#include <asm/xen/hypervisor.h> 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci/* 668c2ecf20Sopenharmony_ci * The minimal size of segment supported by the block framework is PAGE_SIZE. 678c2ecf20Sopenharmony_ci * When Linux is using a different page size than Xen, it may not be possible 688c2ecf20Sopenharmony_ci * to put all the data in a single segment. 698c2ecf20Sopenharmony_ci * This can happen when the backend doesn't support indirect descriptor and 708c2ecf20Sopenharmony_ci * therefore the maximum amount of data that a request can carry is 718c2ecf20Sopenharmony_ci * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE = 44KB 728c2ecf20Sopenharmony_ci * 738c2ecf20Sopenharmony_ci * Note that we only support one extra request. So the Linux page size 748c2ecf20Sopenharmony_ci * should be <= ( 2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) = 758c2ecf20Sopenharmony_ci * 88KB. 768c2ecf20Sopenharmony_ci */ 778c2ecf20Sopenharmony_ci#define HAS_EXTRA_REQ (BLKIF_MAX_SEGMENTS_PER_REQUEST < XEN_PFN_PER_PAGE) 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_cienum blkif_state { 808c2ecf20Sopenharmony_ci BLKIF_STATE_DISCONNECTED, 818c2ecf20Sopenharmony_ci BLKIF_STATE_CONNECTED, 828c2ecf20Sopenharmony_ci BLKIF_STATE_SUSPENDED, 838c2ecf20Sopenharmony_ci BLKIF_STATE_ERROR, 848c2ecf20Sopenharmony_ci}; 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_cistruct grant { 878c2ecf20Sopenharmony_ci grant_ref_t gref; 888c2ecf20Sopenharmony_ci struct page *page; 898c2ecf20Sopenharmony_ci struct list_head node; 908c2ecf20Sopenharmony_ci}; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_cienum blk_req_status { 938c2ecf20Sopenharmony_ci REQ_PROCESSING, 948c2ecf20Sopenharmony_ci REQ_WAITING, 958c2ecf20Sopenharmony_ci REQ_DONE, 968c2ecf20Sopenharmony_ci REQ_ERROR, 978c2ecf20Sopenharmony_ci REQ_EOPNOTSUPP, 988c2ecf20Sopenharmony_ci}; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_cistruct blk_shadow { 1018c2ecf20Sopenharmony_ci struct blkif_request req; 1028c2ecf20Sopenharmony_ci struct request *request; 1038c2ecf20Sopenharmony_ci struct grant **grants_used; 1048c2ecf20Sopenharmony_ci struct grant **indirect_grants; 1058c2ecf20Sopenharmony_ci struct scatterlist *sg; 1068c2ecf20Sopenharmony_ci unsigned int num_sg; 1078c2ecf20Sopenharmony_ci enum blk_req_status status; 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci #define NO_ASSOCIATED_ID ~0UL 1108c2ecf20Sopenharmony_ci /* 1118c2ecf20Sopenharmony_ci * Id of the sibling if we ever need 2 requests when handling a 1128c2ecf20Sopenharmony_ci * block I/O request 1138c2ecf20Sopenharmony_ci */ 1148c2ecf20Sopenharmony_ci unsigned long associated_id; 1158c2ecf20Sopenharmony_ci}; 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_cistruct blkif_req { 1188c2ecf20Sopenharmony_ci blk_status_t error; 1198c2ecf20Sopenharmony_ci}; 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_cistatic inline struct blkif_req *blkif_req(struct request *rq) 1228c2ecf20Sopenharmony_ci{ 1238c2ecf20Sopenharmony_ci return blk_mq_rq_to_pdu(rq); 1248c2ecf20Sopenharmony_ci} 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(blkfront_mutex); 1278c2ecf20Sopenharmony_cistatic const struct block_device_operations xlvbd_block_fops; 1288c2ecf20Sopenharmony_cistatic struct delayed_work blkfront_work; 1298c2ecf20Sopenharmony_cistatic LIST_HEAD(info_list); 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci/* 1328c2ecf20Sopenharmony_ci * Maximum number of segments in indirect requests, the actual value used by 1338c2ecf20Sopenharmony_ci * the frontend driver is the minimum of this value and the value provided 1348c2ecf20Sopenharmony_ci * by the backend driver. 1358c2ecf20Sopenharmony_ci */ 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_cistatic unsigned int xen_blkif_max_segments = 32; 1388c2ecf20Sopenharmony_cimodule_param_named(max_indirect_segments, xen_blkif_max_segments, uint, 0444); 1398c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_indirect_segments, 1408c2ecf20Sopenharmony_ci "Maximum amount of segments in indirect requests (default is 32)"); 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_cistatic unsigned int xen_blkif_max_queues = 4; 1438c2ecf20Sopenharmony_cimodule_param_named(max_queues, xen_blkif_max_queues, uint, 0444); 1448c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk"); 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci/* 1478c2ecf20Sopenharmony_ci * Maximum order of pages to be used for the shared ring between front and 1488c2ecf20Sopenharmony_ci * backend, 4KB page granularity is used. 1498c2ecf20Sopenharmony_ci */ 1508c2ecf20Sopenharmony_cistatic unsigned int xen_blkif_max_ring_order; 1518c2ecf20Sopenharmony_cimodule_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); 1528c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_cistatic bool __read_mostly xen_blkif_trusted = true; 1558c2ecf20Sopenharmony_cimodule_param_named(trusted, xen_blkif_trusted, bool, 0644); 1568c2ecf20Sopenharmony_ciMODULE_PARM_DESC(trusted, "Is the backend trusted"); 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci#define BLK_RING_SIZE(info) \ 1598c2ecf20Sopenharmony_ci __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci/* 1628c2ecf20Sopenharmony_ci * ring-ref%u i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19 1638c2ecf20Sopenharmony_ci * characters are enough. Define to 20 to keep consistent with backend. 1648c2ecf20Sopenharmony_ci */ 1658c2ecf20Sopenharmony_ci#define RINGREF_NAME_LEN (20) 1668c2ecf20Sopenharmony_ci/* 1678c2ecf20Sopenharmony_ci * queue-%u would take 7 + 10(UINT_MAX) = 17 characters. 1688c2ecf20Sopenharmony_ci */ 1698c2ecf20Sopenharmony_ci#define QUEUE_NAME_LEN (17) 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci/* 1728c2ecf20Sopenharmony_ci * Per-ring info. 1738c2ecf20Sopenharmony_ci * Every blkfront device can associate with one or more blkfront_ring_info, 1748c2ecf20Sopenharmony_ci * depending on how many hardware queues/rings to be used. 1758c2ecf20Sopenharmony_ci */ 1768c2ecf20Sopenharmony_cistruct blkfront_ring_info { 1778c2ecf20Sopenharmony_ci /* Lock to protect data in every ring buffer. */ 1788c2ecf20Sopenharmony_ci spinlock_t ring_lock; 1798c2ecf20Sopenharmony_ci struct blkif_front_ring ring; 1808c2ecf20Sopenharmony_ci unsigned int ring_ref[XENBUS_MAX_RING_GRANTS]; 1818c2ecf20Sopenharmony_ci unsigned int evtchn, irq; 1828c2ecf20Sopenharmony_ci struct work_struct work; 1838c2ecf20Sopenharmony_ci struct gnttab_free_callback callback; 1848c2ecf20Sopenharmony_ci struct list_head indirect_pages; 1858c2ecf20Sopenharmony_ci struct list_head grants; 1868c2ecf20Sopenharmony_ci unsigned int persistent_gnts_c; 1878c2ecf20Sopenharmony_ci unsigned long shadow_free; 1888c2ecf20Sopenharmony_ci struct blkfront_info *dev_info; 1898c2ecf20Sopenharmony_ci struct blk_shadow shadow[]; 1908c2ecf20Sopenharmony_ci}; 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci/* 1938c2ecf20Sopenharmony_ci * We have one of these per vbd, whether ide, scsi or 'other'. They 1948c2ecf20Sopenharmony_ci * hang in private_data off the gendisk structure. We may end up 1958c2ecf20Sopenharmony_ci * putting all kinds of interesting stuff here :-) 1968c2ecf20Sopenharmony_ci */ 1978c2ecf20Sopenharmony_cistruct blkfront_info 1988c2ecf20Sopenharmony_ci{ 1998c2ecf20Sopenharmony_ci struct mutex mutex; 2008c2ecf20Sopenharmony_ci struct xenbus_device *xbdev; 2018c2ecf20Sopenharmony_ci struct gendisk *gd; 2028c2ecf20Sopenharmony_ci u16 sector_size; 2038c2ecf20Sopenharmony_ci unsigned int physical_sector_size; 2048c2ecf20Sopenharmony_ci int vdevice; 2058c2ecf20Sopenharmony_ci blkif_vdev_t handle; 2068c2ecf20Sopenharmony_ci enum blkif_state connected; 2078c2ecf20Sopenharmony_ci /* Number of pages per ring buffer. */ 2088c2ecf20Sopenharmony_ci unsigned int nr_ring_pages; 2098c2ecf20Sopenharmony_ci struct request_queue *rq; 2108c2ecf20Sopenharmony_ci unsigned int feature_flush:1; 2118c2ecf20Sopenharmony_ci unsigned int feature_fua:1; 2128c2ecf20Sopenharmony_ci unsigned int feature_discard:1; 2138c2ecf20Sopenharmony_ci unsigned int feature_secdiscard:1; 2148c2ecf20Sopenharmony_ci /* Connect-time cached feature_persistent parameter */ 2158c2ecf20Sopenharmony_ci unsigned int feature_persistent_parm:1; 2168c2ecf20Sopenharmony_ci /* Persistent grants feature negotiation result */ 2178c2ecf20Sopenharmony_ci unsigned int feature_persistent:1; 2188c2ecf20Sopenharmony_ci unsigned int bounce:1; 2198c2ecf20Sopenharmony_ci unsigned int discard_granularity; 2208c2ecf20Sopenharmony_ci unsigned int discard_alignment; 2218c2ecf20Sopenharmony_ci /* Number of 4KB segments handled */ 2228c2ecf20Sopenharmony_ci unsigned int max_indirect_segments; 2238c2ecf20Sopenharmony_ci int is_ready; 2248c2ecf20Sopenharmony_ci struct blk_mq_tag_set tag_set; 2258c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo; 2268c2ecf20Sopenharmony_ci unsigned int nr_rings; 2278c2ecf20Sopenharmony_ci unsigned int rinfo_size; 2288c2ecf20Sopenharmony_ci /* Save uncomplete reqs and bios for migration. */ 2298c2ecf20Sopenharmony_ci struct list_head requests; 2308c2ecf20Sopenharmony_ci struct bio_list bio_list; 2318c2ecf20Sopenharmony_ci struct list_head info_list; 2328c2ecf20Sopenharmony_ci}; 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_cistatic unsigned int nr_minors; 2358c2ecf20Sopenharmony_cistatic unsigned long *minors; 2368c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(minor_lock); 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci#define GRANT_INVALID_REF 0 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci#define PARTS_PER_DISK 16 2418c2ecf20Sopenharmony_ci#define PARTS_PER_EXT_DISK 256 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci#define BLKIF_MAJOR(dev) ((dev)>>8) 2448c2ecf20Sopenharmony_ci#define BLKIF_MINOR(dev) ((dev) & 0xff) 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci#define EXT_SHIFT 28 2478c2ecf20Sopenharmony_ci#define EXTENDED (1<<EXT_SHIFT) 2488c2ecf20Sopenharmony_ci#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) 2498c2ecf20Sopenharmony_ci#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) 2508c2ecf20Sopenharmony_ci#define EMULATED_HD_DISK_MINOR_OFFSET (0) 2518c2ecf20Sopenharmony_ci#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) 2528c2ecf20Sopenharmony_ci#define EMULATED_SD_DISK_MINOR_OFFSET (0) 2538c2ecf20Sopenharmony_ci#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256) 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci#define DEV_NAME "xvd" /* name in /dev */ 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci/* 2588c2ecf20Sopenharmony_ci * Grants are always the same size as a Xen page (i.e 4KB). 2598c2ecf20Sopenharmony_ci * A physical segment is always the same size as a Linux page. 2608c2ecf20Sopenharmony_ci * Number of grants per physical segment 2618c2ecf20Sopenharmony_ci */ 2628c2ecf20Sopenharmony_ci#define GRANTS_PER_PSEG (PAGE_SIZE / XEN_PAGE_SIZE) 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci#define GRANTS_PER_INDIRECT_FRAME \ 2658c2ecf20Sopenharmony_ci (XEN_PAGE_SIZE / sizeof(struct blkif_request_segment)) 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci#define INDIRECT_GREFS(_grants) \ 2688c2ecf20Sopenharmony_ci DIV_ROUND_UP(_grants, GRANTS_PER_INDIRECT_FRAME) 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_cistatic int blkfront_setup_indirect(struct blkfront_ring_info *rinfo); 2718c2ecf20Sopenharmony_cistatic void blkfront_gather_backend_features(struct blkfront_info *info); 2728c2ecf20Sopenharmony_cistatic int negotiate_mq(struct blkfront_info *info); 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci#define for_each_rinfo(info, ptr, idx) \ 2758c2ecf20Sopenharmony_ci for ((ptr) = (info)->rinfo, (idx) = 0; \ 2768c2ecf20Sopenharmony_ci (idx) < (info)->nr_rings; \ 2778c2ecf20Sopenharmony_ci (idx)++, (ptr) = (void *)(ptr) + (info)->rinfo_size) 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_cistatic inline struct blkfront_ring_info * 2808c2ecf20Sopenharmony_ciget_rinfo(const struct blkfront_info *info, unsigned int i) 2818c2ecf20Sopenharmony_ci{ 2828c2ecf20Sopenharmony_ci BUG_ON(i >= info->nr_rings); 2838c2ecf20Sopenharmony_ci return (void *)info->rinfo + i * info->rinfo_size; 2848c2ecf20Sopenharmony_ci} 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_cistatic int get_id_from_freelist(struct blkfront_ring_info *rinfo) 2878c2ecf20Sopenharmony_ci{ 2888c2ecf20Sopenharmony_ci unsigned long free = rinfo->shadow_free; 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci BUG_ON(free >= BLK_RING_SIZE(rinfo->dev_info)); 2918c2ecf20Sopenharmony_ci rinfo->shadow_free = rinfo->shadow[free].req.u.rw.id; 2928c2ecf20Sopenharmony_ci rinfo->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ 2938c2ecf20Sopenharmony_ci return free; 2948c2ecf20Sopenharmony_ci} 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_cistatic int add_id_to_freelist(struct blkfront_ring_info *rinfo, 2978c2ecf20Sopenharmony_ci unsigned long id) 2988c2ecf20Sopenharmony_ci{ 2998c2ecf20Sopenharmony_ci if (rinfo->shadow[id].req.u.rw.id != id) 3008c2ecf20Sopenharmony_ci return -EINVAL; 3018c2ecf20Sopenharmony_ci if (rinfo->shadow[id].request == NULL) 3028c2ecf20Sopenharmony_ci return -EINVAL; 3038c2ecf20Sopenharmony_ci rinfo->shadow[id].req.u.rw.id = rinfo->shadow_free; 3048c2ecf20Sopenharmony_ci rinfo->shadow[id].request = NULL; 3058c2ecf20Sopenharmony_ci rinfo->shadow_free = id; 3068c2ecf20Sopenharmony_ci return 0; 3078c2ecf20Sopenharmony_ci} 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_cistatic int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) 3108c2ecf20Sopenharmony_ci{ 3118c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 3128c2ecf20Sopenharmony_ci struct page *granted_page; 3138c2ecf20Sopenharmony_ci struct grant *gnt_list_entry, *n; 3148c2ecf20Sopenharmony_ci int i = 0; 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci while (i < num) { 3178c2ecf20Sopenharmony_ci gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO); 3188c2ecf20Sopenharmony_ci if (!gnt_list_entry) 3198c2ecf20Sopenharmony_ci goto out_of_memory; 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci if (info->bounce) { 3228c2ecf20Sopenharmony_ci granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); 3238c2ecf20Sopenharmony_ci if (!granted_page) { 3248c2ecf20Sopenharmony_ci kfree(gnt_list_entry); 3258c2ecf20Sopenharmony_ci goto out_of_memory; 3268c2ecf20Sopenharmony_ci } 3278c2ecf20Sopenharmony_ci gnt_list_entry->page = granted_page; 3288c2ecf20Sopenharmony_ci } 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci gnt_list_entry->gref = GRANT_INVALID_REF; 3318c2ecf20Sopenharmony_ci list_add(&gnt_list_entry->node, &rinfo->grants); 3328c2ecf20Sopenharmony_ci i++; 3338c2ecf20Sopenharmony_ci } 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci return 0; 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ciout_of_memory: 3388c2ecf20Sopenharmony_ci list_for_each_entry_safe(gnt_list_entry, n, 3398c2ecf20Sopenharmony_ci &rinfo->grants, node) { 3408c2ecf20Sopenharmony_ci list_del(&gnt_list_entry->node); 3418c2ecf20Sopenharmony_ci if (info->bounce) 3428c2ecf20Sopenharmony_ci __free_page(gnt_list_entry->page); 3438c2ecf20Sopenharmony_ci kfree(gnt_list_entry); 3448c2ecf20Sopenharmony_ci i--; 3458c2ecf20Sopenharmony_ci } 3468c2ecf20Sopenharmony_ci BUG_ON(i != 0); 3478c2ecf20Sopenharmony_ci return -ENOMEM; 3488c2ecf20Sopenharmony_ci} 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_cistatic struct grant *get_free_grant(struct blkfront_ring_info *rinfo) 3518c2ecf20Sopenharmony_ci{ 3528c2ecf20Sopenharmony_ci struct grant *gnt_list_entry; 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci BUG_ON(list_empty(&rinfo->grants)); 3558c2ecf20Sopenharmony_ci gnt_list_entry = list_first_entry(&rinfo->grants, struct grant, 3568c2ecf20Sopenharmony_ci node); 3578c2ecf20Sopenharmony_ci list_del(&gnt_list_entry->node); 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci if (gnt_list_entry->gref != GRANT_INVALID_REF) 3608c2ecf20Sopenharmony_ci rinfo->persistent_gnts_c--; 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci return gnt_list_entry; 3638c2ecf20Sopenharmony_ci} 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_cistatic inline void grant_foreign_access(const struct grant *gnt_list_entry, 3668c2ecf20Sopenharmony_ci const struct blkfront_info *info) 3678c2ecf20Sopenharmony_ci{ 3688c2ecf20Sopenharmony_ci gnttab_page_grant_foreign_access_ref_one(gnt_list_entry->gref, 3698c2ecf20Sopenharmony_ci info->xbdev->otherend_id, 3708c2ecf20Sopenharmony_ci gnt_list_entry->page, 3718c2ecf20Sopenharmony_ci 0); 3728c2ecf20Sopenharmony_ci} 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_cistatic struct grant *get_grant(grant_ref_t *gref_head, 3758c2ecf20Sopenharmony_ci unsigned long gfn, 3768c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo) 3778c2ecf20Sopenharmony_ci{ 3788c2ecf20Sopenharmony_ci struct grant *gnt_list_entry = get_free_grant(rinfo); 3798c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci if (gnt_list_entry->gref != GRANT_INVALID_REF) 3828c2ecf20Sopenharmony_ci return gnt_list_entry; 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci /* Assign a gref to this page */ 3858c2ecf20Sopenharmony_ci gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 3868c2ecf20Sopenharmony_ci BUG_ON(gnt_list_entry->gref == -ENOSPC); 3878c2ecf20Sopenharmony_ci if (info->bounce) 3888c2ecf20Sopenharmony_ci grant_foreign_access(gnt_list_entry, info); 3898c2ecf20Sopenharmony_ci else { 3908c2ecf20Sopenharmony_ci /* Grant access to the GFN passed by the caller */ 3918c2ecf20Sopenharmony_ci gnttab_grant_foreign_access_ref(gnt_list_entry->gref, 3928c2ecf20Sopenharmony_ci info->xbdev->otherend_id, 3938c2ecf20Sopenharmony_ci gfn, 0); 3948c2ecf20Sopenharmony_ci } 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci return gnt_list_entry; 3978c2ecf20Sopenharmony_ci} 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_cistatic struct grant *get_indirect_grant(grant_ref_t *gref_head, 4008c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo) 4018c2ecf20Sopenharmony_ci{ 4028c2ecf20Sopenharmony_ci struct grant *gnt_list_entry = get_free_grant(rinfo); 4038c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci if (gnt_list_entry->gref != GRANT_INVALID_REF) 4068c2ecf20Sopenharmony_ci return gnt_list_entry; 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci /* Assign a gref to this page */ 4098c2ecf20Sopenharmony_ci gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); 4108c2ecf20Sopenharmony_ci BUG_ON(gnt_list_entry->gref == -ENOSPC); 4118c2ecf20Sopenharmony_ci if (!info->bounce) { 4128c2ecf20Sopenharmony_ci struct page *indirect_page; 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci /* Fetch a pre-allocated page to use for indirect grefs */ 4158c2ecf20Sopenharmony_ci BUG_ON(list_empty(&rinfo->indirect_pages)); 4168c2ecf20Sopenharmony_ci indirect_page = list_first_entry(&rinfo->indirect_pages, 4178c2ecf20Sopenharmony_ci struct page, lru); 4188c2ecf20Sopenharmony_ci list_del(&indirect_page->lru); 4198c2ecf20Sopenharmony_ci gnt_list_entry->page = indirect_page; 4208c2ecf20Sopenharmony_ci } 4218c2ecf20Sopenharmony_ci grant_foreign_access(gnt_list_entry, info); 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci return gnt_list_entry; 4248c2ecf20Sopenharmony_ci} 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_cistatic const char *op_name(int op) 4278c2ecf20Sopenharmony_ci{ 4288c2ecf20Sopenharmony_ci static const char *const names[] = { 4298c2ecf20Sopenharmony_ci [BLKIF_OP_READ] = "read", 4308c2ecf20Sopenharmony_ci [BLKIF_OP_WRITE] = "write", 4318c2ecf20Sopenharmony_ci [BLKIF_OP_WRITE_BARRIER] = "barrier", 4328c2ecf20Sopenharmony_ci [BLKIF_OP_FLUSH_DISKCACHE] = "flush", 4338c2ecf20Sopenharmony_ci [BLKIF_OP_DISCARD] = "discard" }; 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci if (op < 0 || op >= ARRAY_SIZE(names)) 4368c2ecf20Sopenharmony_ci return "unknown"; 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci if (!names[op]) 4398c2ecf20Sopenharmony_ci return "reserved"; 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci return names[op]; 4428c2ecf20Sopenharmony_ci} 4438c2ecf20Sopenharmony_cistatic int xlbd_reserve_minors(unsigned int minor, unsigned int nr) 4448c2ecf20Sopenharmony_ci{ 4458c2ecf20Sopenharmony_ci unsigned int end = minor + nr; 4468c2ecf20Sopenharmony_ci int rc; 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci if (end > nr_minors) { 4498c2ecf20Sopenharmony_ci unsigned long *bitmap, *old; 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap), 4528c2ecf20Sopenharmony_ci GFP_KERNEL); 4538c2ecf20Sopenharmony_ci if (bitmap == NULL) 4548c2ecf20Sopenharmony_ci return -ENOMEM; 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci spin_lock(&minor_lock); 4578c2ecf20Sopenharmony_ci if (end > nr_minors) { 4588c2ecf20Sopenharmony_ci old = minors; 4598c2ecf20Sopenharmony_ci memcpy(bitmap, minors, 4608c2ecf20Sopenharmony_ci BITS_TO_LONGS(nr_minors) * sizeof(*bitmap)); 4618c2ecf20Sopenharmony_ci minors = bitmap; 4628c2ecf20Sopenharmony_ci nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG; 4638c2ecf20Sopenharmony_ci } else 4648c2ecf20Sopenharmony_ci old = bitmap; 4658c2ecf20Sopenharmony_ci spin_unlock(&minor_lock); 4668c2ecf20Sopenharmony_ci kfree(old); 4678c2ecf20Sopenharmony_ci } 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci spin_lock(&minor_lock); 4708c2ecf20Sopenharmony_ci if (find_next_bit(minors, end, minor) >= end) { 4718c2ecf20Sopenharmony_ci bitmap_set(minors, minor, nr); 4728c2ecf20Sopenharmony_ci rc = 0; 4738c2ecf20Sopenharmony_ci } else 4748c2ecf20Sopenharmony_ci rc = -EBUSY; 4758c2ecf20Sopenharmony_ci spin_unlock(&minor_lock); 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci return rc; 4788c2ecf20Sopenharmony_ci} 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_cistatic void xlbd_release_minors(unsigned int minor, unsigned int nr) 4818c2ecf20Sopenharmony_ci{ 4828c2ecf20Sopenharmony_ci unsigned int end = minor + nr; 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci BUG_ON(end > nr_minors); 4858c2ecf20Sopenharmony_ci spin_lock(&minor_lock); 4868c2ecf20Sopenharmony_ci bitmap_clear(minors, minor, nr); 4878c2ecf20Sopenharmony_ci spin_unlock(&minor_lock); 4888c2ecf20Sopenharmony_ci} 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_cistatic void blkif_restart_queue_callback(void *arg) 4918c2ecf20Sopenharmony_ci{ 4928c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)arg; 4938c2ecf20Sopenharmony_ci schedule_work(&rinfo->work); 4948c2ecf20Sopenharmony_ci} 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_cistatic int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) 4978c2ecf20Sopenharmony_ci{ 4988c2ecf20Sopenharmony_ci /* We don't have real geometry info, but let's at least return 4998c2ecf20Sopenharmony_ci values consistent with the size of the device */ 5008c2ecf20Sopenharmony_ci sector_t nsect = get_capacity(bd->bd_disk); 5018c2ecf20Sopenharmony_ci sector_t cylinders = nsect; 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci hg->heads = 0xff; 5048c2ecf20Sopenharmony_ci hg->sectors = 0x3f; 5058c2ecf20Sopenharmony_ci sector_div(cylinders, hg->heads * hg->sectors); 5068c2ecf20Sopenharmony_ci hg->cylinders = cylinders; 5078c2ecf20Sopenharmony_ci if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect) 5088c2ecf20Sopenharmony_ci hg->cylinders = 0xffff; 5098c2ecf20Sopenharmony_ci return 0; 5108c2ecf20Sopenharmony_ci} 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_cistatic int blkif_ioctl(struct block_device *bdev, fmode_t mode, 5138c2ecf20Sopenharmony_ci unsigned command, unsigned long argument) 5148c2ecf20Sopenharmony_ci{ 5158c2ecf20Sopenharmony_ci struct blkfront_info *info = bdev->bd_disk->private_data; 5168c2ecf20Sopenharmony_ci int i; 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ci dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n", 5198c2ecf20Sopenharmony_ci command, (long)argument); 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci switch (command) { 5228c2ecf20Sopenharmony_ci case CDROMMULTISESSION: 5238c2ecf20Sopenharmony_ci dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n"); 5248c2ecf20Sopenharmony_ci for (i = 0; i < sizeof(struct cdrom_multisession); i++) 5258c2ecf20Sopenharmony_ci if (put_user(0, (char __user *)(argument + i))) 5268c2ecf20Sopenharmony_ci return -EFAULT; 5278c2ecf20Sopenharmony_ci return 0; 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci case CDROM_GET_CAPABILITY: { 5308c2ecf20Sopenharmony_ci struct gendisk *gd = info->gd; 5318c2ecf20Sopenharmony_ci if (gd->flags & GENHD_FL_CD) 5328c2ecf20Sopenharmony_ci return 0; 5338c2ecf20Sopenharmony_ci return -EINVAL; 5348c2ecf20Sopenharmony_ci } 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ci default: 5378c2ecf20Sopenharmony_ci /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", 5388c2ecf20Sopenharmony_ci command);*/ 5398c2ecf20Sopenharmony_ci return -EINVAL; /* same return as native Linux */ 5408c2ecf20Sopenharmony_ci } 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci return 0; 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_cistatic unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, 5468c2ecf20Sopenharmony_ci struct request *req, 5478c2ecf20Sopenharmony_ci struct blkif_request **ring_req) 5488c2ecf20Sopenharmony_ci{ 5498c2ecf20Sopenharmony_ci unsigned long id; 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci *ring_req = RING_GET_REQUEST(&rinfo->ring, rinfo->ring.req_prod_pvt); 5528c2ecf20Sopenharmony_ci rinfo->ring.req_prod_pvt++; 5538c2ecf20Sopenharmony_ci 5548c2ecf20Sopenharmony_ci id = get_id_from_freelist(rinfo); 5558c2ecf20Sopenharmony_ci rinfo->shadow[id].request = req; 5568c2ecf20Sopenharmony_ci rinfo->shadow[id].status = REQ_PROCESSING; 5578c2ecf20Sopenharmony_ci rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID; 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci rinfo->shadow[id].req.u.rw.id = id; 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci return id; 5628c2ecf20Sopenharmony_ci} 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_cistatic int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo) 5658c2ecf20Sopenharmony_ci{ 5668c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 5678c2ecf20Sopenharmony_ci struct blkif_request *ring_req, *final_ring_req; 5688c2ecf20Sopenharmony_ci unsigned long id; 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci /* Fill out a communications ring structure. */ 5718c2ecf20Sopenharmony_ci id = blkif_ring_get_request(rinfo, req, &final_ring_req); 5728c2ecf20Sopenharmony_ci ring_req = &rinfo->shadow[id].req; 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci ring_req->operation = BLKIF_OP_DISCARD; 5758c2ecf20Sopenharmony_ci ring_req->u.discard.nr_sectors = blk_rq_sectors(req); 5768c2ecf20Sopenharmony_ci ring_req->u.discard.id = id; 5778c2ecf20Sopenharmony_ci ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); 5788c2ecf20Sopenharmony_ci if (req_op(req) == REQ_OP_SECURE_ERASE && info->feature_secdiscard) 5798c2ecf20Sopenharmony_ci ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; 5808c2ecf20Sopenharmony_ci else 5818c2ecf20Sopenharmony_ci ring_req->u.discard.flag = 0; 5828c2ecf20Sopenharmony_ci 5838c2ecf20Sopenharmony_ci /* Copy the request to the ring page. */ 5848c2ecf20Sopenharmony_ci *final_ring_req = *ring_req; 5858c2ecf20Sopenharmony_ci rinfo->shadow[id].status = REQ_WAITING; 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ci return 0; 5888c2ecf20Sopenharmony_ci} 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_cistruct setup_rw_req { 5918c2ecf20Sopenharmony_ci unsigned int grant_idx; 5928c2ecf20Sopenharmony_ci struct blkif_request_segment *segments; 5938c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo; 5948c2ecf20Sopenharmony_ci struct blkif_request *ring_req; 5958c2ecf20Sopenharmony_ci grant_ref_t gref_head; 5968c2ecf20Sopenharmony_ci unsigned int id; 5978c2ecf20Sopenharmony_ci /* Only used when persistent grant is used and it's a read request */ 5988c2ecf20Sopenharmony_ci bool need_copy; 5998c2ecf20Sopenharmony_ci unsigned int bvec_off; 6008c2ecf20Sopenharmony_ci char *bvec_data; 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci bool require_extra_req; 6038c2ecf20Sopenharmony_ci struct blkif_request *extra_ring_req; 6048c2ecf20Sopenharmony_ci}; 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_cistatic void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset, 6078c2ecf20Sopenharmony_ci unsigned int len, void *data) 6088c2ecf20Sopenharmony_ci{ 6098c2ecf20Sopenharmony_ci struct setup_rw_req *setup = data; 6108c2ecf20Sopenharmony_ci int n, ref; 6118c2ecf20Sopenharmony_ci struct grant *gnt_list_entry; 6128c2ecf20Sopenharmony_ci unsigned int fsect, lsect; 6138c2ecf20Sopenharmony_ci /* Convenient aliases */ 6148c2ecf20Sopenharmony_ci unsigned int grant_idx = setup->grant_idx; 6158c2ecf20Sopenharmony_ci struct blkif_request *ring_req = setup->ring_req; 6168c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo = setup->rinfo; 6178c2ecf20Sopenharmony_ci /* 6188c2ecf20Sopenharmony_ci * We always use the shadow of the first request to store the list 6198c2ecf20Sopenharmony_ci * of grant associated to the block I/O request. This made the 6208c2ecf20Sopenharmony_ci * completion more easy to handle even if the block I/O request is 6218c2ecf20Sopenharmony_ci * split. 6228c2ecf20Sopenharmony_ci */ 6238c2ecf20Sopenharmony_ci struct blk_shadow *shadow = &rinfo->shadow[setup->id]; 6248c2ecf20Sopenharmony_ci 6258c2ecf20Sopenharmony_ci if (unlikely(setup->require_extra_req && 6268c2ecf20Sopenharmony_ci grant_idx >= BLKIF_MAX_SEGMENTS_PER_REQUEST)) { 6278c2ecf20Sopenharmony_ci /* 6288c2ecf20Sopenharmony_ci * We are using the second request, setup grant_idx 6298c2ecf20Sopenharmony_ci * to be the index of the segment array. 6308c2ecf20Sopenharmony_ci */ 6318c2ecf20Sopenharmony_ci grant_idx -= BLKIF_MAX_SEGMENTS_PER_REQUEST; 6328c2ecf20Sopenharmony_ci ring_req = setup->extra_ring_req; 6338c2ecf20Sopenharmony_ci } 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci if ((ring_req->operation == BLKIF_OP_INDIRECT) && 6368c2ecf20Sopenharmony_ci (grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) { 6378c2ecf20Sopenharmony_ci if (setup->segments) 6388c2ecf20Sopenharmony_ci kunmap_atomic(setup->segments); 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci n = grant_idx / GRANTS_PER_INDIRECT_FRAME; 6418c2ecf20Sopenharmony_ci gnt_list_entry = get_indirect_grant(&setup->gref_head, rinfo); 6428c2ecf20Sopenharmony_ci shadow->indirect_grants[n] = gnt_list_entry; 6438c2ecf20Sopenharmony_ci setup->segments = kmap_atomic(gnt_list_entry->page); 6448c2ecf20Sopenharmony_ci ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; 6458c2ecf20Sopenharmony_ci } 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci gnt_list_entry = get_grant(&setup->gref_head, gfn, rinfo); 6488c2ecf20Sopenharmony_ci ref = gnt_list_entry->gref; 6498c2ecf20Sopenharmony_ci /* 6508c2ecf20Sopenharmony_ci * All the grants are stored in the shadow of the first 6518c2ecf20Sopenharmony_ci * request. Therefore we have to use the global index. 6528c2ecf20Sopenharmony_ci */ 6538c2ecf20Sopenharmony_ci shadow->grants_used[setup->grant_idx] = gnt_list_entry; 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_ci if (setup->need_copy) { 6568c2ecf20Sopenharmony_ci void *shared_data; 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci shared_data = kmap_atomic(gnt_list_entry->page); 6598c2ecf20Sopenharmony_ci /* 6608c2ecf20Sopenharmony_ci * this does not wipe data stored outside the 6618c2ecf20Sopenharmony_ci * range sg->offset..sg->offset+sg->length. 6628c2ecf20Sopenharmony_ci * Therefore, blkback *could* see data from 6638c2ecf20Sopenharmony_ci * previous requests. This is OK as long as 6648c2ecf20Sopenharmony_ci * persistent grants are shared with just one 6658c2ecf20Sopenharmony_ci * domain. It may need refactoring if this 6668c2ecf20Sopenharmony_ci * changes 6678c2ecf20Sopenharmony_ci */ 6688c2ecf20Sopenharmony_ci memcpy(shared_data + offset, 6698c2ecf20Sopenharmony_ci setup->bvec_data + setup->bvec_off, 6708c2ecf20Sopenharmony_ci len); 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_ci kunmap_atomic(shared_data); 6738c2ecf20Sopenharmony_ci setup->bvec_off += len; 6748c2ecf20Sopenharmony_ci } 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_ci fsect = offset >> 9; 6778c2ecf20Sopenharmony_ci lsect = fsect + (len >> 9) - 1; 6788c2ecf20Sopenharmony_ci if (ring_req->operation != BLKIF_OP_INDIRECT) { 6798c2ecf20Sopenharmony_ci ring_req->u.rw.seg[grant_idx] = 6808c2ecf20Sopenharmony_ci (struct blkif_request_segment) { 6818c2ecf20Sopenharmony_ci .gref = ref, 6828c2ecf20Sopenharmony_ci .first_sect = fsect, 6838c2ecf20Sopenharmony_ci .last_sect = lsect }; 6848c2ecf20Sopenharmony_ci } else { 6858c2ecf20Sopenharmony_ci setup->segments[grant_idx % GRANTS_PER_INDIRECT_FRAME] = 6868c2ecf20Sopenharmony_ci (struct blkif_request_segment) { 6878c2ecf20Sopenharmony_ci .gref = ref, 6888c2ecf20Sopenharmony_ci .first_sect = fsect, 6898c2ecf20Sopenharmony_ci .last_sect = lsect }; 6908c2ecf20Sopenharmony_ci } 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci (setup->grant_idx)++; 6938c2ecf20Sopenharmony_ci} 6948c2ecf20Sopenharmony_ci 6958c2ecf20Sopenharmony_cistatic void blkif_setup_extra_req(struct blkif_request *first, 6968c2ecf20Sopenharmony_ci struct blkif_request *second) 6978c2ecf20Sopenharmony_ci{ 6988c2ecf20Sopenharmony_ci uint16_t nr_segments = first->u.rw.nr_segments; 6998c2ecf20Sopenharmony_ci 7008c2ecf20Sopenharmony_ci /* 7018c2ecf20Sopenharmony_ci * The second request is only present when the first request uses 7028c2ecf20Sopenharmony_ci * all its segments. It's always the continuity of the first one. 7038c2ecf20Sopenharmony_ci */ 7048c2ecf20Sopenharmony_ci first->u.rw.nr_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci second->u.rw.nr_segments = nr_segments - BLKIF_MAX_SEGMENTS_PER_REQUEST; 7078c2ecf20Sopenharmony_ci second->u.rw.sector_number = first->u.rw.sector_number + 7088c2ecf20Sopenharmony_ci (BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) / 512; 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_ci second->u.rw.handle = first->u.rw.handle; 7118c2ecf20Sopenharmony_ci second->operation = first->operation; 7128c2ecf20Sopenharmony_ci} 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_cistatic int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *rinfo) 7158c2ecf20Sopenharmony_ci{ 7168c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 7178c2ecf20Sopenharmony_ci struct blkif_request *ring_req, *extra_ring_req = NULL; 7188c2ecf20Sopenharmony_ci struct blkif_request *final_ring_req, *final_extra_ring_req = NULL; 7198c2ecf20Sopenharmony_ci unsigned long id, extra_id = NO_ASSOCIATED_ID; 7208c2ecf20Sopenharmony_ci bool require_extra_req = false; 7218c2ecf20Sopenharmony_ci int i; 7228c2ecf20Sopenharmony_ci struct setup_rw_req setup = { 7238c2ecf20Sopenharmony_ci .grant_idx = 0, 7248c2ecf20Sopenharmony_ci .segments = NULL, 7258c2ecf20Sopenharmony_ci .rinfo = rinfo, 7268c2ecf20Sopenharmony_ci .need_copy = rq_data_dir(req) && info->bounce, 7278c2ecf20Sopenharmony_ci }; 7288c2ecf20Sopenharmony_ci 7298c2ecf20Sopenharmony_ci /* 7308c2ecf20Sopenharmony_ci * Used to store if we are able to queue the request by just using 7318c2ecf20Sopenharmony_ci * existing persistent grants, or if we have to get new grants, 7328c2ecf20Sopenharmony_ci * as there are not sufficiently many free. 7338c2ecf20Sopenharmony_ci */ 7348c2ecf20Sopenharmony_ci bool new_persistent_gnts = false; 7358c2ecf20Sopenharmony_ci struct scatterlist *sg; 7368c2ecf20Sopenharmony_ci int num_sg, max_grefs, num_grant; 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci max_grefs = req->nr_phys_segments * GRANTS_PER_PSEG; 7398c2ecf20Sopenharmony_ci if (max_grefs > BLKIF_MAX_SEGMENTS_PER_REQUEST) 7408c2ecf20Sopenharmony_ci /* 7418c2ecf20Sopenharmony_ci * If we are using indirect segments we need to account 7428c2ecf20Sopenharmony_ci * for the indirect grefs used in the request. 7438c2ecf20Sopenharmony_ci */ 7448c2ecf20Sopenharmony_ci max_grefs += INDIRECT_GREFS(max_grefs); 7458c2ecf20Sopenharmony_ci 7468c2ecf20Sopenharmony_ci /* Check if we have enough persistent grants to allocate a requests */ 7478c2ecf20Sopenharmony_ci if (rinfo->persistent_gnts_c < max_grefs) { 7488c2ecf20Sopenharmony_ci new_persistent_gnts = true; 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_ci if (gnttab_alloc_grant_references( 7518c2ecf20Sopenharmony_ci max_grefs - rinfo->persistent_gnts_c, 7528c2ecf20Sopenharmony_ci &setup.gref_head) < 0) { 7538c2ecf20Sopenharmony_ci gnttab_request_free_callback( 7548c2ecf20Sopenharmony_ci &rinfo->callback, 7558c2ecf20Sopenharmony_ci blkif_restart_queue_callback, 7568c2ecf20Sopenharmony_ci rinfo, 7578c2ecf20Sopenharmony_ci max_grefs - rinfo->persistent_gnts_c); 7588c2ecf20Sopenharmony_ci return 1; 7598c2ecf20Sopenharmony_ci } 7608c2ecf20Sopenharmony_ci } 7618c2ecf20Sopenharmony_ci 7628c2ecf20Sopenharmony_ci /* Fill out a communications ring structure. */ 7638c2ecf20Sopenharmony_ci id = blkif_ring_get_request(rinfo, req, &final_ring_req); 7648c2ecf20Sopenharmony_ci ring_req = &rinfo->shadow[id].req; 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ci num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg); 7678c2ecf20Sopenharmony_ci num_grant = 0; 7688c2ecf20Sopenharmony_ci /* Calculate the number of grant used */ 7698c2ecf20Sopenharmony_ci for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) 7708c2ecf20Sopenharmony_ci num_grant += gnttab_count_grant(sg->offset, sg->length); 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_ci require_extra_req = info->max_indirect_segments == 0 && 7738c2ecf20Sopenharmony_ci num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST; 7748c2ecf20Sopenharmony_ci BUG_ON(!HAS_EXTRA_REQ && require_extra_req); 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_ci rinfo->shadow[id].num_sg = num_sg; 7778c2ecf20Sopenharmony_ci if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST && 7788c2ecf20Sopenharmony_ci likely(!require_extra_req)) { 7798c2ecf20Sopenharmony_ci /* 7808c2ecf20Sopenharmony_ci * The indirect operation can only be a BLKIF_OP_READ or 7818c2ecf20Sopenharmony_ci * BLKIF_OP_WRITE 7828c2ecf20Sopenharmony_ci */ 7838c2ecf20Sopenharmony_ci BUG_ON(req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA); 7848c2ecf20Sopenharmony_ci ring_req->operation = BLKIF_OP_INDIRECT; 7858c2ecf20Sopenharmony_ci ring_req->u.indirect.indirect_op = rq_data_dir(req) ? 7868c2ecf20Sopenharmony_ci BLKIF_OP_WRITE : BLKIF_OP_READ; 7878c2ecf20Sopenharmony_ci ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req); 7888c2ecf20Sopenharmony_ci ring_req->u.indirect.handle = info->handle; 7898c2ecf20Sopenharmony_ci ring_req->u.indirect.nr_segments = num_grant; 7908c2ecf20Sopenharmony_ci } else { 7918c2ecf20Sopenharmony_ci ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); 7928c2ecf20Sopenharmony_ci ring_req->u.rw.handle = info->handle; 7938c2ecf20Sopenharmony_ci ring_req->operation = rq_data_dir(req) ? 7948c2ecf20Sopenharmony_ci BLKIF_OP_WRITE : BLKIF_OP_READ; 7958c2ecf20Sopenharmony_ci if (req_op(req) == REQ_OP_FLUSH || 7968c2ecf20Sopenharmony_ci (req_op(req) == REQ_OP_WRITE && (req->cmd_flags & REQ_FUA))) { 7978c2ecf20Sopenharmony_ci /* 7988c2ecf20Sopenharmony_ci * Ideally we can do an unordered flush-to-disk. 7998c2ecf20Sopenharmony_ci * In case the backend onlysupports barriers, use that. 8008c2ecf20Sopenharmony_ci * A barrier request a superset of FUA, so we can 8018c2ecf20Sopenharmony_ci * implement it the same way. (It's also a FLUSH+FUA, 8028c2ecf20Sopenharmony_ci * since it is guaranteed ordered WRT previous writes.) 8038c2ecf20Sopenharmony_ci */ 8048c2ecf20Sopenharmony_ci if (info->feature_flush && info->feature_fua) 8058c2ecf20Sopenharmony_ci ring_req->operation = 8068c2ecf20Sopenharmony_ci BLKIF_OP_WRITE_BARRIER; 8078c2ecf20Sopenharmony_ci else if (info->feature_flush) 8088c2ecf20Sopenharmony_ci ring_req->operation = 8098c2ecf20Sopenharmony_ci BLKIF_OP_FLUSH_DISKCACHE; 8108c2ecf20Sopenharmony_ci else 8118c2ecf20Sopenharmony_ci ring_req->operation = 0; 8128c2ecf20Sopenharmony_ci } 8138c2ecf20Sopenharmony_ci ring_req->u.rw.nr_segments = num_grant; 8148c2ecf20Sopenharmony_ci if (unlikely(require_extra_req)) { 8158c2ecf20Sopenharmony_ci extra_id = blkif_ring_get_request(rinfo, req, 8168c2ecf20Sopenharmony_ci &final_extra_ring_req); 8178c2ecf20Sopenharmony_ci extra_ring_req = &rinfo->shadow[extra_id].req; 8188c2ecf20Sopenharmony_ci 8198c2ecf20Sopenharmony_ci /* 8208c2ecf20Sopenharmony_ci * Only the first request contains the scatter-gather 8218c2ecf20Sopenharmony_ci * list. 8228c2ecf20Sopenharmony_ci */ 8238c2ecf20Sopenharmony_ci rinfo->shadow[extra_id].num_sg = 0; 8248c2ecf20Sopenharmony_ci 8258c2ecf20Sopenharmony_ci blkif_setup_extra_req(ring_req, extra_ring_req); 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_ci /* Link the 2 requests together */ 8288c2ecf20Sopenharmony_ci rinfo->shadow[extra_id].associated_id = id; 8298c2ecf20Sopenharmony_ci rinfo->shadow[id].associated_id = extra_id; 8308c2ecf20Sopenharmony_ci } 8318c2ecf20Sopenharmony_ci } 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci setup.ring_req = ring_req; 8348c2ecf20Sopenharmony_ci setup.id = id; 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci setup.require_extra_req = require_extra_req; 8378c2ecf20Sopenharmony_ci if (unlikely(require_extra_req)) 8388c2ecf20Sopenharmony_ci setup.extra_ring_req = extra_ring_req; 8398c2ecf20Sopenharmony_ci 8408c2ecf20Sopenharmony_ci for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) { 8418c2ecf20Sopenharmony_ci BUG_ON(sg->offset + sg->length > PAGE_SIZE); 8428c2ecf20Sopenharmony_ci 8438c2ecf20Sopenharmony_ci if (setup.need_copy) { 8448c2ecf20Sopenharmony_ci setup.bvec_off = sg->offset; 8458c2ecf20Sopenharmony_ci setup.bvec_data = kmap_atomic(sg_page(sg)); 8468c2ecf20Sopenharmony_ci } 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci gnttab_foreach_grant_in_range(sg_page(sg), 8498c2ecf20Sopenharmony_ci sg->offset, 8508c2ecf20Sopenharmony_ci sg->length, 8518c2ecf20Sopenharmony_ci blkif_setup_rw_req_grant, 8528c2ecf20Sopenharmony_ci &setup); 8538c2ecf20Sopenharmony_ci 8548c2ecf20Sopenharmony_ci if (setup.need_copy) 8558c2ecf20Sopenharmony_ci kunmap_atomic(setup.bvec_data); 8568c2ecf20Sopenharmony_ci } 8578c2ecf20Sopenharmony_ci if (setup.segments) 8588c2ecf20Sopenharmony_ci kunmap_atomic(setup.segments); 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_ci /* Copy request(s) to the ring page. */ 8618c2ecf20Sopenharmony_ci *final_ring_req = *ring_req; 8628c2ecf20Sopenharmony_ci rinfo->shadow[id].status = REQ_WAITING; 8638c2ecf20Sopenharmony_ci if (unlikely(require_extra_req)) { 8648c2ecf20Sopenharmony_ci *final_extra_ring_req = *extra_ring_req; 8658c2ecf20Sopenharmony_ci rinfo->shadow[extra_id].status = REQ_WAITING; 8668c2ecf20Sopenharmony_ci } 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_ci if (new_persistent_gnts) 8698c2ecf20Sopenharmony_ci gnttab_free_grant_references(setup.gref_head); 8708c2ecf20Sopenharmony_ci 8718c2ecf20Sopenharmony_ci return 0; 8728c2ecf20Sopenharmony_ci} 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci/* 8758c2ecf20Sopenharmony_ci * Generate a Xen blkfront IO request from a blk layer request. Reads 8768c2ecf20Sopenharmony_ci * and writes are handled as expected. 8778c2ecf20Sopenharmony_ci * 8788c2ecf20Sopenharmony_ci * @req: a request struct 8798c2ecf20Sopenharmony_ci */ 8808c2ecf20Sopenharmony_cistatic int blkif_queue_request(struct request *req, struct blkfront_ring_info *rinfo) 8818c2ecf20Sopenharmony_ci{ 8828c2ecf20Sopenharmony_ci if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED)) 8838c2ecf20Sopenharmony_ci return 1; 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci if (unlikely(req_op(req) == REQ_OP_DISCARD || 8868c2ecf20Sopenharmony_ci req_op(req) == REQ_OP_SECURE_ERASE)) 8878c2ecf20Sopenharmony_ci return blkif_queue_discard_req(req, rinfo); 8888c2ecf20Sopenharmony_ci else 8898c2ecf20Sopenharmony_ci return blkif_queue_rw_req(req, rinfo); 8908c2ecf20Sopenharmony_ci} 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_cistatic inline void flush_requests(struct blkfront_ring_info *rinfo) 8938c2ecf20Sopenharmony_ci{ 8948c2ecf20Sopenharmony_ci int notify; 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rinfo->ring, notify); 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci if (notify) 8998c2ecf20Sopenharmony_ci notify_remote_via_irq(rinfo->irq); 9008c2ecf20Sopenharmony_ci} 9018c2ecf20Sopenharmony_ci 9028c2ecf20Sopenharmony_cistatic inline bool blkif_request_flush_invalid(struct request *req, 9038c2ecf20Sopenharmony_ci struct blkfront_info *info) 9048c2ecf20Sopenharmony_ci{ 9058c2ecf20Sopenharmony_ci return (blk_rq_is_passthrough(req) || 9068c2ecf20Sopenharmony_ci ((req_op(req) == REQ_OP_FLUSH) && 9078c2ecf20Sopenharmony_ci !info->feature_flush) || 9088c2ecf20Sopenharmony_ci ((req->cmd_flags & REQ_FUA) && 9098c2ecf20Sopenharmony_ci !info->feature_fua)); 9108c2ecf20Sopenharmony_ci} 9118c2ecf20Sopenharmony_ci 9128c2ecf20Sopenharmony_cistatic blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx, 9138c2ecf20Sopenharmony_ci const struct blk_mq_queue_data *qd) 9148c2ecf20Sopenharmony_ci{ 9158c2ecf20Sopenharmony_ci unsigned long flags; 9168c2ecf20Sopenharmony_ci int qid = hctx->queue_num; 9178c2ecf20Sopenharmony_ci struct blkfront_info *info = hctx->queue->queuedata; 9188c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo = NULL; 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci rinfo = get_rinfo(info, qid); 9218c2ecf20Sopenharmony_ci blk_mq_start_request(qd->rq); 9228c2ecf20Sopenharmony_ci spin_lock_irqsave(&rinfo->ring_lock, flags); 9238c2ecf20Sopenharmony_ci if (RING_FULL(&rinfo->ring)) 9248c2ecf20Sopenharmony_ci goto out_busy; 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info)) 9278c2ecf20Sopenharmony_ci goto out_err; 9288c2ecf20Sopenharmony_ci 9298c2ecf20Sopenharmony_ci if (blkif_queue_request(qd->rq, rinfo)) 9308c2ecf20Sopenharmony_ci goto out_busy; 9318c2ecf20Sopenharmony_ci 9328c2ecf20Sopenharmony_ci flush_requests(rinfo); 9338c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rinfo->ring_lock, flags); 9348c2ecf20Sopenharmony_ci return BLK_STS_OK; 9358c2ecf20Sopenharmony_ci 9368c2ecf20Sopenharmony_ciout_err: 9378c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rinfo->ring_lock, flags); 9388c2ecf20Sopenharmony_ci return BLK_STS_IOERR; 9398c2ecf20Sopenharmony_ci 9408c2ecf20Sopenharmony_ciout_busy: 9418c2ecf20Sopenharmony_ci blk_mq_stop_hw_queue(hctx); 9428c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rinfo->ring_lock, flags); 9438c2ecf20Sopenharmony_ci return BLK_STS_DEV_RESOURCE; 9448c2ecf20Sopenharmony_ci} 9458c2ecf20Sopenharmony_ci 9468c2ecf20Sopenharmony_cistatic void blkif_complete_rq(struct request *rq) 9478c2ecf20Sopenharmony_ci{ 9488c2ecf20Sopenharmony_ci blk_mq_end_request(rq, blkif_req(rq)->error); 9498c2ecf20Sopenharmony_ci} 9508c2ecf20Sopenharmony_ci 9518c2ecf20Sopenharmony_cistatic const struct blk_mq_ops blkfront_mq_ops = { 9528c2ecf20Sopenharmony_ci .queue_rq = blkif_queue_rq, 9538c2ecf20Sopenharmony_ci .complete = blkif_complete_rq, 9548c2ecf20Sopenharmony_ci}; 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_cistatic void blkif_set_queue_limits(struct blkfront_info *info) 9578c2ecf20Sopenharmony_ci{ 9588c2ecf20Sopenharmony_ci struct request_queue *rq = info->rq; 9598c2ecf20Sopenharmony_ci struct gendisk *gd = info->gd; 9608c2ecf20Sopenharmony_ci unsigned int segments = info->max_indirect_segments ? : 9618c2ecf20Sopenharmony_ci BLKIF_MAX_SEGMENTS_PER_REQUEST; 9628c2ecf20Sopenharmony_ci 9638c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); 9648c2ecf20Sopenharmony_ci 9658c2ecf20Sopenharmony_ci if (info->feature_discard) { 9668c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq); 9678c2ecf20Sopenharmony_ci blk_queue_max_discard_sectors(rq, get_capacity(gd)); 9688c2ecf20Sopenharmony_ci rq->limits.discard_granularity = info->discard_granularity ?: 9698c2ecf20Sopenharmony_ci info->physical_sector_size; 9708c2ecf20Sopenharmony_ci rq->limits.discard_alignment = info->discard_alignment; 9718c2ecf20Sopenharmony_ci if (info->feature_secdiscard) 9728c2ecf20Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq); 9738c2ecf20Sopenharmony_ci } 9748c2ecf20Sopenharmony_ci 9758c2ecf20Sopenharmony_ci /* Hard sector size and max sectors impersonate the equiv. hardware. */ 9768c2ecf20Sopenharmony_ci blk_queue_logical_block_size(rq, info->sector_size); 9778c2ecf20Sopenharmony_ci blk_queue_physical_block_size(rq, info->physical_sector_size); 9788c2ecf20Sopenharmony_ci blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512); 9798c2ecf20Sopenharmony_ci 9808c2ecf20Sopenharmony_ci /* Each segment in a request is up to an aligned page in size. */ 9818c2ecf20Sopenharmony_ci blk_queue_segment_boundary(rq, PAGE_SIZE - 1); 9828c2ecf20Sopenharmony_ci blk_queue_max_segment_size(rq, PAGE_SIZE); 9838c2ecf20Sopenharmony_ci 9848c2ecf20Sopenharmony_ci /* Ensure a merged request will fit in a single I/O ring slot. */ 9858c2ecf20Sopenharmony_ci blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG); 9868c2ecf20Sopenharmony_ci 9878c2ecf20Sopenharmony_ci /* Make sure buffer addresses are sector-aligned. */ 9888c2ecf20Sopenharmony_ci blk_queue_dma_alignment(rq, 511); 9898c2ecf20Sopenharmony_ci} 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_cistatic int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, 9928c2ecf20Sopenharmony_ci unsigned int physical_sector_size) 9938c2ecf20Sopenharmony_ci{ 9948c2ecf20Sopenharmony_ci struct request_queue *rq; 9958c2ecf20Sopenharmony_ci struct blkfront_info *info = gd->private_data; 9968c2ecf20Sopenharmony_ci 9978c2ecf20Sopenharmony_ci memset(&info->tag_set, 0, sizeof(info->tag_set)); 9988c2ecf20Sopenharmony_ci info->tag_set.ops = &blkfront_mq_ops; 9998c2ecf20Sopenharmony_ci info->tag_set.nr_hw_queues = info->nr_rings; 10008c2ecf20Sopenharmony_ci if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) { 10018c2ecf20Sopenharmony_ci /* 10028c2ecf20Sopenharmony_ci * When indirect descriptior is not supported, the I/O request 10038c2ecf20Sopenharmony_ci * will be split between multiple request in the ring. 10048c2ecf20Sopenharmony_ci * To avoid problems when sending the request, divide by 10058c2ecf20Sopenharmony_ci * 2 the depth of the queue. 10068c2ecf20Sopenharmony_ci */ 10078c2ecf20Sopenharmony_ci info->tag_set.queue_depth = BLK_RING_SIZE(info) / 2; 10088c2ecf20Sopenharmony_ci } else 10098c2ecf20Sopenharmony_ci info->tag_set.queue_depth = BLK_RING_SIZE(info); 10108c2ecf20Sopenharmony_ci info->tag_set.numa_node = NUMA_NO_NODE; 10118c2ecf20Sopenharmony_ci info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 10128c2ecf20Sopenharmony_ci info->tag_set.cmd_size = sizeof(struct blkif_req); 10138c2ecf20Sopenharmony_ci info->tag_set.driver_data = info; 10148c2ecf20Sopenharmony_ci 10158c2ecf20Sopenharmony_ci if (blk_mq_alloc_tag_set(&info->tag_set)) 10168c2ecf20Sopenharmony_ci return -EINVAL; 10178c2ecf20Sopenharmony_ci rq = blk_mq_init_queue(&info->tag_set); 10188c2ecf20Sopenharmony_ci if (IS_ERR(rq)) { 10198c2ecf20Sopenharmony_ci blk_mq_free_tag_set(&info->tag_set); 10208c2ecf20Sopenharmony_ci return PTR_ERR(rq); 10218c2ecf20Sopenharmony_ci } 10228c2ecf20Sopenharmony_ci 10238c2ecf20Sopenharmony_ci rq->queuedata = info; 10248c2ecf20Sopenharmony_ci info->rq = gd->queue = rq; 10258c2ecf20Sopenharmony_ci info->gd = gd; 10268c2ecf20Sopenharmony_ci info->sector_size = sector_size; 10278c2ecf20Sopenharmony_ci info->physical_sector_size = physical_sector_size; 10288c2ecf20Sopenharmony_ci blkif_set_queue_limits(info); 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_ci return 0; 10318c2ecf20Sopenharmony_ci} 10328c2ecf20Sopenharmony_ci 10338c2ecf20Sopenharmony_cistatic const char *flush_info(struct blkfront_info *info) 10348c2ecf20Sopenharmony_ci{ 10358c2ecf20Sopenharmony_ci if (info->feature_flush && info->feature_fua) 10368c2ecf20Sopenharmony_ci return "barrier: enabled;"; 10378c2ecf20Sopenharmony_ci else if (info->feature_flush) 10388c2ecf20Sopenharmony_ci return "flush diskcache: enabled;"; 10398c2ecf20Sopenharmony_ci else 10408c2ecf20Sopenharmony_ci return "barrier or flush: disabled;"; 10418c2ecf20Sopenharmony_ci} 10428c2ecf20Sopenharmony_ci 10438c2ecf20Sopenharmony_cistatic void xlvbd_flush(struct blkfront_info *info) 10448c2ecf20Sopenharmony_ci{ 10458c2ecf20Sopenharmony_ci blk_queue_write_cache(info->rq, info->feature_flush ? true : false, 10468c2ecf20Sopenharmony_ci info->feature_fua ? true : false); 10478c2ecf20Sopenharmony_ci pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", 10488c2ecf20Sopenharmony_ci info->gd->disk_name, flush_info(info), 10498c2ecf20Sopenharmony_ci "persistent grants:", info->feature_persistent ? 10508c2ecf20Sopenharmony_ci "enabled;" : "disabled;", "indirect descriptors:", 10518c2ecf20Sopenharmony_ci info->max_indirect_segments ? "enabled;" : "disabled;", 10528c2ecf20Sopenharmony_ci "bounce buffer:", info->bounce ? "enabled" : "disabled;"); 10538c2ecf20Sopenharmony_ci} 10548c2ecf20Sopenharmony_ci 10558c2ecf20Sopenharmony_cistatic int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) 10568c2ecf20Sopenharmony_ci{ 10578c2ecf20Sopenharmony_ci int major; 10588c2ecf20Sopenharmony_ci major = BLKIF_MAJOR(vdevice); 10598c2ecf20Sopenharmony_ci *minor = BLKIF_MINOR(vdevice); 10608c2ecf20Sopenharmony_ci switch (major) { 10618c2ecf20Sopenharmony_ci case XEN_IDE0_MAJOR: 10628c2ecf20Sopenharmony_ci *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET; 10638c2ecf20Sopenharmony_ci *minor = ((*minor / 64) * PARTS_PER_DISK) + 10648c2ecf20Sopenharmony_ci EMULATED_HD_DISK_MINOR_OFFSET; 10658c2ecf20Sopenharmony_ci break; 10668c2ecf20Sopenharmony_ci case XEN_IDE1_MAJOR: 10678c2ecf20Sopenharmony_ci *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET; 10688c2ecf20Sopenharmony_ci *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) + 10698c2ecf20Sopenharmony_ci EMULATED_HD_DISK_MINOR_OFFSET; 10708c2ecf20Sopenharmony_ci break; 10718c2ecf20Sopenharmony_ci case XEN_SCSI_DISK0_MAJOR: 10728c2ecf20Sopenharmony_ci *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET; 10738c2ecf20Sopenharmony_ci *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET; 10748c2ecf20Sopenharmony_ci break; 10758c2ecf20Sopenharmony_ci case XEN_SCSI_DISK1_MAJOR: 10768c2ecf20Sopenharmony_ci case XEN_SCSI_DISK2_MAJOR: 10778c2ecf20Sopenharmony_ci case XEN_SCSI_DISK3_MAJOR: 10788c2ecf20Sopenharmony_ci case XEN_SCSI_DISK4_MAJOR: 10798c2ecf20Sopenharmony_ci case XEN_SCSI_DISK5_MAJOR: 10808c2ecf20Sopenharmony_ci case XEN_SCSI_DISK6_MAJOR: 10818c2ecf20Sopenharmony_ci case XEN_SCSI_DISK7_MAJOR: 10828c2ecf20Sopenharmony_ci *offset = (*minor / PARTS_PER_DISK) + 10838c2ecf20Sopenharmony_ci ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) + 10848c2ecf20Sopenharmony_ci EMULATED_SD_DISK_NAME_OFFSET; 10858c2ecf20Sopenharmony_ci *minor = *minor + 10868c2ecf20Sopenharmony_ci ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) + 10878c2ecf20Sopenharmony_ci EMULATED_SD_DISK_MINOR_OFFSET; 10888c2ecf20Sopenharmony_ci break; 10898c2ecf20Sopenharmony_ci case XEN_SCSI_DISK8_MAJOR: 10908c2ecf20Sopenharmony_ci case XEN_SCSI_DISK9_MAJOR: 10918c2ecf20Sopenharmony_ci case XEN_SCSI_DISK10_MAJOR: 10928c2ecf20Sopenharmony_ci case XEN_SCSI_DISK11_MAJOR: 10938c2ecf20Sopenharmony_ci case XEN_SCSI_DISK12_MAJOR: 10948c2ecf20Sopenharmony_ci case XEN_SCSI_DISK13_MAJOR: 10958c2ecf20Sopenharmony_ci case XEN_SCSI_DISK14_MAJOR: 10968c2ecf20Sopenharmony_ci case XEN_SCSI_DISK15_MAJOR: 10978c2ecf20Sopenharmony_ci *offset = (*minor / PARTS_PER_DISK) + 10988c2ecf20Sopenharmony_ci ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) + 10998c2ecf20Sopenharmony_ci EMULATED_SD_DISK_NAME_OFFSET; 11008c2ecf20Sopenharmony_ci *minor = *minor + 11018c2ecf20Sopenharmony_ci ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) + 11028c2ecf20Sopenharmony_ci EMULATED_SD_DISK_MINOR_OFFSET; 11038c2ecf20Sopenharmony_ci break; 11048c2ecf20Sopenharmony_ci case XENVBD_MAJOR: 11058c2ecf20Sopenharmony_ci *offset = *minor / PARTS_PER_DISK; 11068c2ecf20Sopenharmony_ci break; 11078c2ecf20Sopenharmony_ci default: 11088c2ecf20Sopenharmony_ci printk(KERN_WARNING "blkfront: your disk configuration is " 11098c2ecf20Sopenharmony_ci "incorrect, please use an xvd device instead\n"); 11108c2ecf20Sopenharmony_ci return -ENODEV; 11118c2ecf20Sopenharmony_ci } 11128c2ecf20Sopenharmony_ci return 0; 11138c2ecf20Sopenharmony_ci} 11148c2ecf20Sopenharmony_ci 11158c2ecf20Sopenharmony_cistatic char *encode_disk_name(char *ptr, unsigned int n) 11168c2ecf20Sopenharmony_ci{ 11178c2ecf20Sopenharmony_ci if (n >= 26) 11188c2ecf20Sopenharmony_ci ptr = encode_disk_name(ptr, n / 26 - 1); 11198c2ecf20Sopenharmony_ci *ptr = 'a' + n % 26; 11208c2ecf20Sopenharmony_ci return ptr + 1; 11218c2ecf20Sopenharmony_ci} 11228c2ecf20Sopenharmony_ci 11238c2ecf20Sopenharmony_cistatic int xlvbd_alloc_gendisk(blkif_sector_t capacity, 11248c2ecf20Sopenharmony_ci struct blkfront_info *info, 11258c2ecf20Sopenharmony_ci u16 vdisk_info, u16 sector_size, 11268c2ecf20Sopenharmony_ci unsigned int physical_sector_size) 11278c2ecf20Sopenharmony_ci{ 11288c2ecf20Sopenharmony_ci struct gendisk *gd; 11298c2ecf20Sopenharmony_ci int nr_minors = 1; 11308c2ecf20Sopenharmony_ci int err; 11318c2ecf20Sopenharmony_ci unsigned int offset; 11328c2ecf20Sopenharmony_ci int minor; 11338c2ecf20Sopenharmony_ci int nr_parts; 11348c2ecf20Sopenharmony_ci char *ptr; 11358c2ecf20Sopenharmony_ci 11368c2ecf20Sopenharmony_ci BUG_ON(info->gd != NULL); 11378c2ecf20Sopenharmony_ci BUG_ON(info->rq != NULL); 11388c2ecf20Sopenharmony_ci 11398c2ecf20Sopenharmony_ci if ((info->vdevice>>EXT_SHIFT) > 1) { 11408c2ecf20Sopenharmony_ci /* this is above the extended range; something is wrong */ 11418c2ecf20Sopenharmony_ci printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); 11428c2ecf20Sopenharmony_ci return -ENODEV; 11438c2ecf20Sopenharmony_ci } 11448c2ecf20Sopenharmony_ci 11458c2ecf20Sopenharmony_ci if (!VDEV_IS_EXTENDED(info->vdevice)) { 11468c2ecf20Sopenharmony_ci err = xen_translate_vdev(info->vdevice, &minor, &offset); 11478c2ecf20Sopenharmony_ci if (err) 11488c2ecf20Sopenharmony_ci return err; 11498c2ecf20Sopenharmony_ci nr_parts = PARTS_PER_DISK; 11508c2ecf20Sopenharmony_ci } else { 11518c2ecf20Sopenharmony_ci minor = BLKIF_MINOR_EXT(info->vdevice); 11528c2ecf20Sopenharmony_ci nr_parts = PARTS_PER_EXT_DISK; 11538c2ecf20Sopenharmony_ci offset = minor / nr_parts; 11548c2ecf20Sopenharmony_ci if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4) 11558c2ecf20Sopenharmony_ci printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " 11568c2ecf20Sopenharmony_ci "emulated IDE disks,\n\t choose an xvd device name" 11578c2ecf20Sopenharmony_ci "from xvde on\n", info->vdevice); 11588c2ecf20Sopenharmony_ci } 11598c2ecf20Sopenharmony_ci if (minor >> MINORBITS) { 11608c2ecf20Sopenharmony_ci pr_warn("blkfront: %#x's minor (%#x) out of range; ignoring\n", 11618c2ecf20Sopenharmony_ci info->vdevice, minor); 11628c2ecf20Sopenharmony_ci return -ENODEV; 11638c2ecf20Sopenharmony_ci } 11648c2ecf20Sopenharmony_ci 11658c2ecf20Sopenharmony_ci if ((minor % nr_parts) == 0) 11668c2ecf20Sopenharmony_ci nr_minors = nr_parts; 11678c2ecf20Sopenharmony_ci 11688c2ecf20Sopenharmony_ci err = xlbd_reserve_minors(minor, nr_minors); 11698c2ecf20Sopenharmony_ci if (err) 11708c2ecf20Sopenharmony_ci goto out; 11718c2ecf20Sopenharmony_ci err = -ENODEV; 11728c2ecf20Sopenharmony_ci 11738c2ecf20Sopenharmony_ci gd = alloc_disk(nr_minors); 11748c2ecf20Sopenharmony_ci if (gd == NULL) 11758c2ecf20Sopenharmony_ci goto release; 11768c2ecf20Sopenharmony_ci 11778c2ecf20Sopenharmony_ci strcpy(gd->disk_name, DEV_NAME); 11788c2ecf20Sopenharmony_ci ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset); 11798c2ecf20Sopenharmony_ci BUG_ON(ptr >= gd->disk_name + DISK_NAME_LEN); 11808c2ecf20Sopenharmony_ci if (nr_minors > 1) 11818c2ecf20Sopenharmony_ci *ptr = 0; 11828c2ecf20Sopenharmony_ci else 11838c2ecf20Sopenharmony_ci snprintf(ptr, gd->disk_name + DISK_NAME_LEN - ptr, 11848c2ecf20Sopenharmony_ci "%d", minor & (nr_parts - 1)); 11858c2ecf20Sopenharmony_ci 11868c2ecf20Sopenharmony_ci gd->major = XENVBD_MAJOR; 11878c2ecf20Sopenharmony_ci gd->first_minor = minor; 11888c2ecf20Sopenharmony_ci gd->fops = &xlvbd_block_fops; 11898c2ecf20Sopenharmony_ci gd->private_data = info; 11908c2ecf20Sopenharmony_ci set_capacity(gd, capacity); 11918c2ecf20Sopenharmony_ci 11928c2ecf20Sopenharmony_ci if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { 11938c2ecf20Sopenharmony_ci del_gendisk(gd); 11948c2ecf20Sopenharmony_ci goto release; 11958c2ecf20Sopenharmony_ci } 11968c2ecf20Sopenharmony_ci 11978c2ecf20Sopenharmony_ci xlvbd_flush(info); 11988c2ecf20Sopenharmony_ci 11998c2ecf20Sopenharmony_ci if (vdisk_info & VDISK_READONLY) 12008c2ecf20Sopenharmony_ci set_disk_ro(gd, 1); 12018c2ecf20Sopenharmony_ci 12028c2ecf20Sopenharmony_ci if (vdisk_info & VDISK_REMOVABLE) 12038c2ecf20Sopenharmony_ci gd->flags |= GENHD_FL_REMOVABLE; 12048c2ecf20Sopenharmony_ci 12058c2ecf20Sopenharmony_ci if (vdisk_info & VDISK_CDROM) 12068c2ecf20Sopenharmony_ci gd->flags |= GENHD_FL_CD; 12078c2ecf20Sopenharmony_ci 12088c2ecf20Sopenharmony_ci return 0; 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci release: 12118c2ecf20Sopenharmony_ci xlbd_release_minors(minor, nr_minors); 12128c2ecf20Sopenharmony_ci out: 12138c2ecf20Sopenharmony_ci return err; 12148c2ecf20Sopenharmony_ci} 12158c2ecf20Sopenharmony_ci 12168c2ecf20Sopenharmony_cistatic void xlvbd_release_gendisk(struct blkfront_info *info) 12178c2ecf20Sopenharmony_ci{ 12188c2ecf20Sopenharmony_ci unsigned int minor, nr_minors, i; 12198c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo; 12208c2ecf20Sopenharmony_ci 12218c2ecf20Sopenharmony_ci if (info->rq == NULL) 12228c2ecf20Sopenharmony_ci return; 12238c2ecf20Sopenharmony_ci 12248c2ecf20Sopenharmony_ci /* No more blkif_request(). */ 12258c2ecf20Sopenharmony_ci blk_mq_stop_hw_queues(info->rq); 12268c2ecf20Sopenharmony_ci 12278c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, i) { 12288c2ecf20Sopenharmony_ci /* No more gnttab callback work. */ 12298c2ecf20Sopenharmony_ci gnttab_cancel_free_callback(&rinfo->callback); 12308c2ecf20Sopenharmony_ci 12318c2ecf20Sopenharmony_ci /* Flush gnttab callback work. Must be done with no locks held. */ 12328c2ecf20Sopenharmony_ci flush_work(&rinfo->work); 12338c2ecf20Sopenharmony_ci } 12348c2ecf20Sopenharmony_ci 12358c2ecf20Sopenharmony_ci del_gendisk(info->gd); 12368c2ecf20Sopenharmony_ci 12378c2ecf20Sopenharmony_ci minor = info->gd->first_minor; 12388c2ecf20Sopenharmony_ci nr_minors = info->gd->minors; 12398c2ecf20Sopenharmony_ci xlbd_release_minors(minor, nr_minors); 12408c2ecf20Sopenharmony_ci 12418c2ecf20Sopenharmony_ci blk_cleanup_queue(info->rq); 12428c2ecf20Sopenharmony_ci blk_mq_free_tag_set(&info->tag_set); 12438c2ecf20Sopenharmony_ci info->rq = NULL; 12448c2ecf20Sopenharmony_ci 12458c2ecf20Sopenharmony_ci put_disk(info->gd); 12468c2ecf20Sopenharmony_ci info->gd = NULL; 12478c2ecf20Sopenharmony_ci} 12488c2ecf20Sopenharmony_ci 12498c2ecf20Sopenharmony_ci/* Already hold rinfo->ring_lock. */ 12508c2ecf20Sopenharmony_cistatic inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo) 12518c2ecf20Sopenharmony_ci{ 12528c2ecf20Sopenharmony_ci if (!RING_FULL(&rinfo->ring)) 12538c2ecf20Sopenharmony_ci blk_mq_start_stopped_hw_queues(rinfo->dev_info->rq, true); 12548c2ecf20Sopenharmony_ci} 12558c2ecf20Sopenharmony_ci 12568c2ecf20Sopenharmony_cistatic void kick_pending_request_queues(struct blkfront_ring_info *rinfo) 12578c2ecf20Sopenharmony_ci{ 12588c2ecf20Sopenharmony_ci unsigned long flags; 12598c2ecf20Sopenharmony_ci 12608c2ecf20Sopenharmony_ci spin_lock_irqsave(&rinfo->ring_lock, flags); 12618c2ecf20Sopenharmony_ci kick_pending_request_queues_locked(rinfo); 12628c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rinfo->ring_lock, flags); 12638c2ecf20Sopenharmony_ci} 12648c2ecf20Sopenharmony_ci 12658c2ecf20Sopenharmony_cistatic void blkif_restart_queue(struct work_struct *work) 12668c2ecf20Sopenharmony_ci{ 12678c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo = container_of(work, struct blkfront_ring_info, work); 12688c2ecf20Sopenharmony_ci 12698c2ecf20Sopenharmony_ci if (rinfo->dev_info->connected == BLKIF_STATE_CONNECTED) 12708c2ecf20Sopenharmony_ci kick_pending_request_queues(rinfo); 12718c2ecf20Sopenharmony_ci} 12728c2ecf20Sopenharmony_ci 12738c2ecf20Sopenharmony_cistatic void blkif_free_ring(struct blkfront_ring_info *rinfo) 12748c2ecf20Sopenharmony_ci{ 12758c2ecf20Sopenharmony_ci struct grant *persistent_gnt, *n; 12768c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 12778c2ecf20Sopenharmony_ci int i, j, segs; 12788c2ecf20Sopenharmony_ci 12798c2ecf20Sopenharmony_ci /* 12808c2ecf20Sopenharmony_ci * Remove indirect pages, this only happens when using indirect 12818c2ecf20Sopenharmony_ci * descriptors but not persistent grants 12828c2ecf20Sopenharmony_ci */ 12838c2ecf20Sopenharmony_ci if (!list_empty(&rinfo->indirect_pages)) { 12848c2ecf20Sopenharmony_ci struct page *indirect_page, *n; 12858c2ecf20Sopenharmony_ci 12868c2ecf20Sopenharmony_ci BUG_ON(info->bounce); 12878c2ecf20Sopenharmony_ci list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { 12888c2ecf20Sopenharmony_ci list_del(&indirect_page->lru); 12898c2ecf20Sopenharmony_ci __free_page(indirect_page); 12908c2ecf20Sopenharmony_ci } 12918c2ecf20Sopenharmony_ci } 12928c2ecf20Sopenharmony_ci 12938c2ecf20Sopenharmony_ci /* Remove all persistent grants. */ 12948c2ecf20Sopenharmony_ci if (!list_empty(&rinfo->grants)) { 12958c2ecf20Sopenharmony_ci list_for_each_entry_safe(persistent_gnt, n, 12968c2ecf20Sopenharmony_ci &rinfo->grants, node) { 12978c2ecf20Sopenharmony_ci list_del(&persistent_gnt->node); 12988c2ecf20Sopenharmony_ci if (persistent_gnt->gref != GRANT_INVALID_REF) { 12998c2ecf20Sopenharmony_ci gnttab_end_foreign_access(persistent_gnt->gref, 13008c2ecf20Sopenharmony_ci 0, 0UL); 13018c2ecf20Sopenharmony_ci rinfo->persistent_gnts_c--; 13028c2ecf20Sopenharmony_ci } 13038c2ecf20Sopenharmony_ci if (info->bounce) 13048c2ecf20Sopenharmony_ci __free_page(persistent_gnt->page); 13058c2ecf20Sopenharmony_ci kfree(persistent_gnt); 13068c2ecf20Sopenharmony_ci } 13078c2ecf20Sopenharmony_ci } 13088c2ecf20Sopenharmony_ci BUG_ON(rinfo->persistent_gnts_c != 0); 13098c2ecf20Sopenharmony_ci 13108c2ecf20Sopenharmony_ci for (i = 0; i < BLK_RING_SIZE(info); i++) { 13118c2ecf20Sopenharmony_ci /* 13128c2ecf20Sopenharmony_ci * Clear persistent grants present in requests already 13138c2ecf20Sopenharmony_ci * on the shared ring 13148c2ecf20Sopenharmony_ci */ 13158c2ecf20Sopenharmony_ci if (!rinfo->shadow[i].request) 13168c2ecf20Sopenharmony_ci goto free_shadow; 13178c2ecf20Sopenharmony_ci 13188c2ecf20Sopenharmony_ci segs = rinfo->shadow[i].req.operation == BLKIF_OP_INDIRECT ? 13198c2ecf20Sopenharmony_ci rinfo->shadow[i].req.u.indirect.nr_segments : 13208c2ecf20Sopenharmony_ci rinfo->shadow[i].req.u.rw.nr_segments; 13218c2ecf20Sopenharmony_ci for (j = 0; j < segs; j++) { 13228c2ecf20Sopenharmony_ci persistent_gnt = rinfo->shadow[i].grants_used[j]; 13238c2ecf20Sopenharmony_ci gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); 13248c2ecf20Sopenharmony_ci if (info->bounce) 13258c2ecf20Sopenharmony_ci __free_page(persistent_gnt->page); 13268c2ecf20Sopenharmony_ci kfree(persistent_gnt); 13278c2ecf20Sopenharmony_ci } 13288c2ecf20Sopenharmony_ci 13298c2ecf20Sopenharmony_ci if (rinfo->shadow[i].req.operation != BLKIF_OP_INDIRECT) 13308c2ecf20Sopenharmony_ci /* 13318c2ecf20Sopenharmony_ci * If this is not an indirect operation don't try to 13328c2ecf20Sopenharmony_ci * free indirect segments 13338c2ecf20Sopenharmony_ci */ 13348c2ecf20Sopenharmony_ci goto free_shadow; 13358c2ecf20Sopenharmony_ci 13368c2ecf20Sopenharmony_ci for (j = 0; j < INDIRECT_GREFS(segs); j++) { 13378c2ecf20Sopenharmony_ci persistent_gnt = rinfo->shadow[i].indirect_grants[j]; 13388c2ecf20Sopenharmony_ci gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); 13398c2ecf20Sopenharmony_ci __free_page(persistent_gnt->page); 13408c2ecf20Sopenharmony_ci kfree(persistent_gnt); 13418c2ecf20Sopenharmony_ci } 13428c2ecf20Sopenharmony_ci 13438c2ecf20Sopenharmony_cifree_shadow: 13448c2ecf20Sopenharmony_ci kvfree(rinfo->shadow[i].grants_used); 13458c2ecf20Sopenharmony_ci rinfo->shadow[i].grants_used = NULL; 13468c2ecf20Sopenharmony_ci kvfree(rinfo->shadow[i].indirect_grants); 13478c2ecf20Sopenharmony_ci rinfo->shadow[i].indirect_grants = NULL; 13488c2ecf20Sopenharmony_ci kvfree(rinfo->shadow[i].sg); 13498c2ecf20Sopenharmony_ci rinfo->shadow[i].sg = NULL; 13508c2ecf20Sopenharmony_ci } 13518c2ecf20Sopenharmony_ci 13528c2ecf20Sopenharmony_ci /* No more gnttab callback work. */ 13538c2ecf20Sopenharmony_ci gnttab_cancel_free_callback(&rinfo->callback); 13548c2ecf20Sopenharmony_ci 13558c2ecf20Sopenharmony_ci /* Flush gnttab callback work. Must be done with no locks held. */ 13568c2ecf20Sopenharmony_ci flush_work(&rinfo->work); 13578c2ecf20Sopenharmony_ci 13588c2ecf20Sopenharmony_ci /* Free resources associated with old device channel. */ 13598c2ecf20Sopenharmony_ci for (i = 0; i < info->nr_ring_pages; i++) { 13608c2ecf20Sopenharmony_ci if (rinfo->ring_ref[i] != GRANT_INVALID_REF) { 13618c2ecf20Sopenharmony_ci gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0); 13628c2ecf20Sopenharmony_ci rinfo->ring_ref[i] = GRANT_INVALID_REF; 13638c2ecf20Sopenharmony_ci } 13648c2ecf20Sopenharmony_ci } 13658c2ecf20Sopenharmony_ci free_pages_exact(rinfo->ring.sring, 13668c2ecf20Sopenharmony_ci info->nr_ring_pages * XEN_PAGE_SIZE); 13678c2ecf20Sopenharmony_ci rinfo->ring.sring = NULL; 13688c2ecf20Sopenharmony_ci 13698c2ecf20Sopenharmony_ci if (rinfo->irq) 13708c2ecf20Sopenharmony_ci unbind_from_irqhandler(rinfo->irq, rinfo); 13718c2ecf20Sopenharmony_ci rinfo->evtchn = rinfo->irq = 0; 13728c2ecf20Sopenharmony_ci} 13738c2ecf20Sopenharmony_ci 13748c2ecf20Sopenharmony_cistatic void blkif_free(struct blkfront_info *info, int suspend) 13758c2ecf20Sopenharmony_ci{ 13768c2ecf20Sopenharmony_ci unsigned int i; 13778c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo; 13788c2ecf20Sopenharmony_ci 13798c2ecf20Sopenharmony_ci /* Prevent new requests being issued until we fix things up. */ 13808c2ecf20Sopenharmony_ci info->connected = suspend ? 13818c2ecf20Sopenharmony_ci BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 13828c2ecf20Sopenharmony_ci /* No more blkif_request(). */ 13838c2ecf20Sopenharmony_ci if (info->rq) 13848c2ecf20Sopenharmony_ci blk_mq_stop_hw_queues(info->rq); 13858c2ecf20Sopenharmony_ci 13868c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, i) 13878c2ecf20Sopenharmony_ci blkif_free_ring(rinfo); 13888c2ecf20Sopenharmony_ci 13898c2ecf20Sopenharmony_ci kvfree(info->rinfo); 13908c2ecf20Sopenharmony_ci info->rinfo = NULL; 13918c2ecf20Sopenharmony_ci info->nr_rings = 0; 13928c2ecf20Sopenharmony_ci} 13938c2ecf20Sopenharmony_ci 13948c2ecf20Sopenharmony_cistruct copy_from_grant { 13958c2ecf20Sopenharmony_ci const struct blk_shadow *s; 13968c2ecf20Sopenharmony_ci unsigned int grant_idx; 13978c2ecf20Sopenharmony_ci unsigned int bvec_offset; 13988c2ecf20Sopenharmony_ci char *bvec_data; 13998c2ecf20Sopenharmony_ci}; 14008c2ecf20Sopenharmony_ci 14018c2ecf20Sopenharmony_cistatic void blkif_copy_from_grant(unsigned long gfn, unsigned int offset, 14028c2ecf20Sopenharmony_ci unsigned int len, void *data) 14038c2ecf20Sopenharmony_ci{ 14048c2ecf20Sopenharmony_ci struct copy_from_grant *info = data; 14058c2ecf20Sopenharmony_ci char *shared_data; 14068c2ecf20Sopenharmony_ci /* Convenient aliases */ 14078c2ecf20Sopenharmony_ci const struct blk_shadow *s = info->s; 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_ci shared_data = kmap_atomic(s->grants_used[info->grant_idx]->page); 14108c2ecf20Sopenharmony_ci 14118c2ecf20Sopenharmony_ci memcpy(info->bvec_data + info->bvec_offset, 14128c2ecf20Sopenharmony_ci shared_data + offset, len); 14138c2ecf20Sopenharmony_ci 14148c2ecf20Sopenharmony_ci info->bvec_offset += len; 14158c2ecf20Sopenharmony_ci info->grant_idx++; 14168c2ecf20Sopenharmony_ci 14178c2ecf20Sopenharmony_ci kunmap_atomic(shared_data); 14188c2ecf20Sopenharmony_ci} 14198c2ecf20Sopenharmony_ci 14208c2ecf20Sopenharmony_cistatic enum blk_req_status blkif_rsp_to_req_status(int rsp) 14218c2ecf20Sopenharmony_ci{ 14228c2ecf20Sopenharmony_ci switch (rsp) 14238c2ecf20Sopenharmony_ci { 14248c2ecf20Sopenharmony_ci case BLKIF_RSP_OKAY: 14258c2ecf20Sopenharmony_ci return REQ_DONE; 14268c2ecf20Sopenharmony_ci case BLKIF_RSP_EOPNOTSUPP: 14278c2ecf20Sopenharmony_ci return REQ_EOPNOTSUPP; 14288c2ecf20Sopenharmony_ci case BLKIF_RSP_ERROR: 14298c2ecf20Sopenharmony_ci default: 14308c2ecf20Sopenharmony_ci return REQ_ERROR; 14318c2ecf20Sopenharmony_ci } 14328c2ecf20Sopenharmony_ci} 14338c2ecf20Sopenharmony_ci 14348c2ecf20Sopenharmony_ci/* 14358c2ecf20Sopenharmony_ci * Get the final status of the block request based on two ring response 14368c2ecf20Sopenharmony_ci */ 14378c2ecf20Sopenharmony_cistatic int blkif_get_final_status(enum blk_req_status s1, 14388c2ecf20Sopenharmony_ci enum blk_req_status s2) 14398c2ecf20Sopenharmony_ci{ 14408c2ecf20Sopenharmony_ci BUG_ON(s1 < REQ_DONE); 14418c2ecf20Sopenharmony_ci BUG_ON(s2 < REQ_DONE); 14428c2ecf20Sopenharmony_ci 14438c2ecf20Sopenharmony_ci if (s1 == REQ_ERROR || s2 == REQ_ERROR) 14448c2ecf20Sopenharmony_ci return BLKIF_RSP_ERROR; 14458c2ecf20Sopenharmony_ci else if (s1 == REQ_EOPNOTSUPP || s2 == REQ_EOPNOTSUPP) 14468c2ecf20Sopenharmony_ci return BLKIF_RSP_EOPNOTSUPP; 14478c2ecf20Sopenharmony_ci return BLKIF_RSP_OKAY; 14488c2ecf20Sopenharmony_ci} 14498c2ecf20Sopenharmony_ci 14508c2ecf20Sopenharmony_ci/* 14518c2ecf20Sopenharmony_ci * Return values: 14528c2ecf20Sopenharmony_ci * 1 response processed. 14538c2ecf20Sopenharmony_ci * 0 missing further responses. 14548c2ecf20Sopenharmony_ci * -1 error while processing. 14558c2ecf20Sopenharmony_ci */ 14568c2ecf20Sopenharmony_cistatic int blkif_completion(unsigned long *id, 14578c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo, 14588c2ecf20Sopenharmony_ci struct blkif_response *bret) 14598c2ecf20Sopenharmony_ci{ 14608c2ecf20Sopenharmony_ci int i = 0; 14618c2ecf20Sopenharmony_ci struct scatterlist *sg; 14628c2ecf20Sopenharmony_ci int num_sg, num_grant; 14638c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 14648c2ecf20Sopenharmony_ci struct blk_shadow *s = &rinfo->shadow[*id]; 14658c2ecf20Sopenharmony_ci struct copy_from_grant data = { 14668c2ecf20Sopenharmony_ci .grant_idx = 0, 14678c2ecf20Sopenharmony_ci }; 14688c2ecf20Sopenharmony_ci 14698c2ecf20Sopenharmony_ci num_grant = s->req.operation == BLKIF_OP_INDIRECT ? 14708c2ecf20Sopenharmony_ci s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; 14718c2ecf20Sopenharmony_ci 14728c2ecf20Sopenharmony_ci /* The I/O request may be split in two. */ 14738c2ecf20Sopenharmony_ci if (unlikely(s->associated_id != NO_ASSOCIATED_ID)) { 14748c2ecf20Sopenharmony_ci struct blk_shadow *s2 = &rinfo->shadow[s->associated_id]; 14758c2ecf20Sopenharmony_ci 14768c2ecf20Sopenharmony_ci /* Keep the status of the current response in shadow. */ 14778c2ecf20Sopenharmony_ci s->status = blkif_rsp_to_req_status(bret->status); 14788c2ecf20Sopenharmony_ci 14798c2ecf20Sopenharmony_ci /* Wait the second response if not yet here. */ 14808c2ecf20Sopenharmony_ci if (s2->status < REQ_DONE) 14818c2ecf20Sopenharmony_ci return 0; 14828c2ecf20Sopenharmony_ci 14838c2ecf20Sopenharmony_ci bret->status = blkif_get_final_status(s->status, 14848c2ecf20Sopenharmony_ci s2->status); 14858c2ecf20Sopenharmony_ci 14868c2ecf20Sopenharmony_ci /* 14878c2ecf20Sopenharmony_ci * All the grants is stored in the first shadow in order 14888c2ecf20Sopenharmony_ci * to make the completion code simpler. 14898c2ecf20Sopenharmony_ci */ 14908c2ecf20Sopenharmony_ci num_grant += s2->req.u.rw.nr_segments; 14918c2ecf20Sopenharmony_ci 14928c2ecf20Sopenharmony_ci /* 14938c2ecf20Sopenharmony_ci * The two responses may not come in order. Only the 14948c2ecf20Sopenharmony_ci * first request will store the scatter-gather list. 14958c2ecf20Sopenharmony_ci */ 14968c2ecf20Sopenharmony_ci if (s2->num_sg != 0) { 14978c2ecf20Sopenharmony_ci /* Update "id" with the ID of the first response. */ 14988c2ecf20Sopenharmony_ci *id = s->associated_id; 14998c2ecf20Sopenharmony_ci s = s2; 15008c2ecf20Sopenharmony_ci } 15018c2ecf20Sopenharmony_ci 15028c2ecf20Sopenharmony_ci /* 15038c2ecf20Sopenharmony_ci * We don't need anymore the second request, so recycling 15048c2ecf20Sopenharmony_ci * it now. 15058c2ecf20Sopenharmony_ci */ 15068c2ecf20Sopenharmony_ci if (add_id_to_freelist(rinfo, s->associated_id)) 15078c2ecf20Sopenharmony_ci WARN(1, "%s: can't recycle the second part (id = %ld) of the request\n", 15088c2ecf20Sopenharmony_ci info->gd->disk_name, s->associated_id); 15098c2ecf20Sopenharmony_ci } 15108c2ecf20Sopenharmony_ci 15118c2ecf20Sopenharmony_ci data.s = s; 15128c2ecf20Sopenharmony_ci num_sg = s->num_sg; 15138c2ecf20Sopenharmony_ci 15148c2ecf20Sopenharmony_ci if (bret->operation == BLKIF_OP_READ && info->bounce) { 15158c2ecf20Sopenharmony_ci for_each_sg(s->sg, sg, num_sg, i) { 15168c2ecf20Sopenharmony_ci BUG_ON(sg->offset + sg->length > PAGE_SIZE); 15178c2ecf20Sopenharmony_ci 15188c2ecf20Sopenharmony_ci data.bvec_offset = sg->offset; 15198c2ecf20Sopenharmony_ci data.bvec_data = kmap_atomic(sg_page(sg)); 15208c2ecf20Sopenharmony_ci 15218c2ecf20Sopenharmony_ci gnttab_foreach_grant_in_range(sg_page(sg), 15228c2ecf20Sopenharmony_ci sg->offset, 15238c2ecf20Sopenharmony_ci sg->length, 15248c2ecf20Sopenharmony_ci blkif_copy_from_grant, 15258c2ecf20Sopenharmony_ci &data); 15268c2ecf20Sopenharmony_ci 15278c2ecf20Sopenharmony_ci kunmap_atomic(data.bvec_data); 15288c2ecf20Sopenharmony_ci } 15298c2ecf20Sopenharmony_ci } 15308c2ecf20Sopenharmony_ci /* Add the persistent grant into the list of free grants */ 15318c2ecf20Sopenharmony_ci for (i = 0; i < num_grant; i++) { 15328c2ecf20Sopenharmony_ci if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) { 15338c2ecf20Sopenharmony_ci /* 15348c2ecf20Sopenharmony_ci * If the grant is still mapped by the backend (the 15358c2ecf20Sopenharmony_ci * backend has chosen to make this grant persistent) 15368c2ecf20Sopenharmony_ci * we add it at the head of the list, so it will be 15378c2ecf20Sopenharmony_ci * reused first. 15388c2ecf20Sopenharmony_ci */ 15398c2ecf20Sopenharmony_ci if (!info->feature_persistent) { 15408c2ecf20Sopenharmony_ci pr_alert("backed has not unmapped grant: %u\n", 15418c2ecf20Sopenharmony_ci s->grants_used[i]->gref); 15428c2ecf20Sopenharmony_ci return -1; 15438c2ecf20Sopenharmony_ci } 15448c2ecf20Sopenharmony_ci list_add(&s->grants_used[i]->node, &rinfo->grants); 15458c2ecf20Sopenharmony_ci rinfo->persistent_gnts_c++; 15468c2ecf20Sopenharmony_ci } else { 15478c2ecf20Sopenharmony_ci /* 15488c2ecf20Sopenharmony_ci * If the grant is not mapped by the backend we add it 15498c2ecf20Sopenharmony_ci * to the tail of the list, so it will not be picked 15508c2ecf20Sopenharmony_ci * again unless we run out of persistent grants. 15518c2ecf20Sopenharmony_ci */ 15528c2ecf20Sopenharmony_ci s->grants_used[i]->gref = GRANT_INVALID_REF; 15538c2ecf20Sopenharmony_ci list_add_tail(&s->grants_used[i]->node, &rinfo->grants); 15548c2ecf20Sopenharmony_ci } 15558c2ecf20Sopenharmony_ci } 15568c2ecf20Sopenharmony_ci if (s->req.operation == BLKIF_OP_INDIRECT) { 15578c2ecf20Sopenharmony_ci for (i = 0; i < INDIRECT_GREFS(num_grant); i++) { 15588c2ecf20Sopenharmony_ci if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) { 15598c2ecf20Sopenharmony_ci if (!info->feature_persistent) { 15608c2ecf20Sopenharmony_ci pr_alert("backed has not unmapped grant: %u\n", 15618c2ecf20Sopenharmony_ci s->indirect_grants[i]->gref); 15628c2ecf20Sopenharmony_ci return -1; 15638c2ecf20Sopenharmony_ci } 15648c2ecf20Sopenharmony_ci list_add(&s->indirect_grants[i]->node, &rinfo->grants); 15658c2ecf20Sopenharmony_ci rinfo->persistent_gnts_c++; 15668c2ecf20Sopenharmony_ci } else { 15678c2ecf20Sopenharmony_ci struct page *indirect_page; 15688c2ecf20Sopenharmony_ci 15698c2ecf20Sopenharmony_ci /* 15708c2ecf20Sopenharmony_ci * Add the used indirect page back to the list of 15718c2ecf20Sopenharmony_ci * available pages for indirect grefs. 15728c2ecf20Sopenharmony_ci */ 15738c2ecf20Sopenharmony_ci if (!info->bounce) { 15748c2ecf20Sopenharmony_ci indirect_page = s->indirect_grants[i]->page; 15758c2ecf20Sopenharmony_ci list_add(&indirect_page->lru, &rinfo->indirect_pages); 15768c2ecf20Sopenharmony_ci } 15778c2ecf20Sopenharmony_ci s->indirect_grants[i]->gref = GRANT_INVALID_REF; 15788c2ecf20Sopenharmony_ci list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants); 15798c2ecf20Sopenharmony_ci } 15808c2ecf20Sopenharmony_ci } 15818c2ecf20Sopenharmony_ci } 15828c2ecf20Sopenharmony_ci 15838c2ecf20Sopenharmony_ci return 1; 15848c2ecf20Sopenharmony_ci} 15858c2ecf20Sopenharmony_ci 15868c2ecf20Sopenharmony_cistatic irqreturn_t blkif_interrupt(int irq, void *dev_id) 15878c2ecf20Sopenharmony_ci{ 15888c2ecf20Sopenharmony_ci struct request *req; 15898c2ecf20Sopenharmony_ci struct blkif_response bret; 15908c2ecf20Sopenharmony_ci RING_IDX i, rp; 15918c2ecf20Sopenharmony_ci unsigned long flags; 15928c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; 15938c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 15948c2ecf20Sopenharmony_ci unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; 15958c2ecf20Sopenharmony_ci 15968c2ecf20Sopenharmony_ci if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 15978c2ecf20Sopenharmony_ci xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); 15988c2ecf20Sopenharmony_ci return IRQ_HANDLED; 15998c2ecf20Sopenharmony_ci } 16008c2ecf20Sopenharmony_ci 16018c2ecf20Sopenharmony_ci spin_lock_irqsave(&rinfo->ring_lock, flags); 16028c2ecf20Sopenharmony_ci again: 16038c2ecf20Sopenharmony_ci rp = READ_ONCE(rinfo->ring.sring->rsp_prod); 16048c2ecf20Sopenharmony_ci virt_rmb(); /* Ensure we see queued responses up to 'rp'. */ 16058c2ecf20Sopenharmony_ci if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { 16068c2ecf20Sopenharmony_ci pr_alert("%s: illegal number of responses %u\n", 16078c2ecf20Sopenharmony_ci info->gd->disk_name, rp - rinfo->ring.rsp_cons); 16088c2ecf20Sopenharmony_ci goto err; 16098c2ecf20Sopenharmony_ci } 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ci for (i = rinfo->ring.rsp_cons; i != rp; i++) { 16128c2ecf20Sopenharmony_ci unsigned long id; 16138c2ecf20Sopenharmony_ci unsigned int op; 16148c2ecf20Sopenharmony_ci 16158c2ecf20Sopenharmony_ci eoiflag = 0; 16168c2ecf20Sopenharmony_ci 16178c2ecf20Sopenharmony_ci RING_COPY_RESPONSE(&rinfo->ring, i, &bret); 16188c2ecf20Sopenharmony_ci id = bret.id; 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci /* 16218c2ecf20Sopenharmony_ci * The backend has messed up and given us an id that we would 16228c2ecf20Sopenharmony_ci * never have given to it (we stamp it up to BLK_RING_SIZE - 16238c2ecf20Sopenharmony_ci * look in get_id_from_freelist. 16248c2ecf20Sopenharmony_ci */ 16258c2ecf20Sopenharmony_ci if (id >= BLK_RING_SIZE(info)) { 16268c2ecf20Sopenharmony_ci pr_alert("%s: response has incorrect id (%ld)\n", 16278c2ecf20Sopenharmony_ci info->gd->disk_name, id); 16288c2ecf20Sopenharmony_ci goto err; 16298c2ecf20Sopenharmony_ci } 16308c2ecf20Sopenharmony_ci if (rinfo->shadow[id].status != REQ_WAITING) { 16318c2ecf20Sopenharmony_ci pr_alert("%s: response references no pending request\n", 16328c2ecf20Sopenharmony_ci info->gd->disk_name); 16338c2ecf20Sopenharmony_ci goto err; 16348c2ecf20Sopenharmony_ci } 16358c2ecf20Sopenharmony_ci 16368c2ecf20Sopenharmony_ci rinfo->shadow[id].status = REQ_PROCESSING; 16378c2ecf20Sopenharmony_ci req = rinfo->shadow[id].request; 16388c2ecf20Sopenharmony_ci 16398c2ecf20Sopenharmony_ci op = rinfo->shadow[id].req.operation; 16408c2ecf20Sopenharmony_ci if (op == BLKIF_OP_INDIRECT) 16418c2ecf20Sopenharmony_ci op = rinfo->shadow[id].req.u.indirect.indirect_op; 16428c2ecf20Sopenharmony_ci if (bret.operation != op) { 16438c2ecf20Sopenharmony_ci pr_alert("%s: response has wrong operation (%u instead of %u)\n", 16448c2ecf20Sopenharmony_ci info->gd->disk_name, bret.operation, op); 16458c2ecf20Sopenharmony_ci goto err; 16468c2ecf20Sopenharmony_ci } 16478c2ecf20Sopenharmony_ci 16488c2ecf20Sopenharmony_ci if (bret.operation != BLKIF_OP_DISCARD) { 16498c2ecf20Sopenharmony_ci int ret; 16508c2ecf20Sopenharmony_ci 16518c2ecf20Sopenharmony_ci /* 16528c2ecf20Sopenharmony_ci * We may need to wait for an extra response if the 16538c2ecf20Sopenharmony_ci * I/O request is split in 2 16548c2ecf20Sopenharmony_ci */ 16558c2ecf20Sopenharmony_ci ret = blkif_completion(&id, rinfo, &bret); 16568c2ecf20Sopenharmony_ci if (!ret) 16578c2ecf20Sopenharmony_ci continue; 16588c2ecf20Sopenharmony_ci if (unlikely(ret < 0)) 16598c2ecf20Sopenharmony_ci goto err; 16608c2ecf20Sopenharmony_ci } 16618c2ecf20Sopenharmony_ci 16628c2ecf20Sopenharmony_ci if (add_id_to_freelist(rinfo, id)) { 16638c2ecf20Sopenharmony_ci WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", 16648c2ecf20Sopenharmony_ci info->gd->disk_name, op_name(bret.operation), id); 16658c2ecf20Sopenharmony_ci continue; 16668c2ecf20Sopenharmony_ci } 16678c2ecf20Sopenharmony_ci 16688c2ecf20Sopenharmony_ci if (bret.status == BLKIF_RSP_OKAY) 16698c2ecf20Sopenharmony_ci blkif_req(req)->error = BLK_STS_OK; 16708c2ecf20Sopenharmony_ci else 16718c2ecf20Sopenharmony_ci blkif_req(req)->error = BLK_STS_IOERR; 16728c2ecf20Sopenharmony_ci 16738c2ecf20Sopenharmony_ci switch (bret.operation) { 16748c2ecf20Sopenharmony_ci case BLKIF_OP_DISCARD: 16758c2ecf20Sopenharmony_ci if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { 16768c2ecf20Sopenharmony_ci struct request_queue *rq = info->rq; 16778c2ecf20Sopenharmony_ci 16788c2ecf20Sopenharmony_ci pr_warn_ratelimited("blkfront: %s: %s op failed\n", 16798c2ecf20Sopenharmony_ci info->gd->disk_name, op_name(bret.operation)); 16808c2ecf20Sopenharmony_ci blkif_req(req)->error = BLK_STS_NOTSUPP; 16818c2ecf20Sopenharmony_ci info->feature_discard = 0; 16828c2ecf20Sopenharmony_ci info->feature_secdiscard = 0; 16838c2ecf20Sopenharmony_ci blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq); 16848c2ecf20Sopenharmony_ci blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq); 16858c2ecf20Sopenharmony_ci } 16868c2ecf20Sopenharmony_ci break; 16878c2ecf20Sopenharmony_ci case BLKIF_OP_FLUSH_DISKCACHE: 16888c2ecf20Sopenharmony_ci case BLKIF_OP_WRITE_BARRIER: 16898c2ecf20Sopenharmony_ci if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { 16908c2ecf20Sopenharmony_ci pr_warn_ratelimited("blkfront: %s: %s op failed\n", 16918c2ecf20Sopenharmony_ci info->gd->disk_name, op_name(bret.operation)); 16928c2ecf20Sopenharmony_ci blkif_req(req)->error = BLK_STS_NOTSUPP; 16938c2ecf20Sopenharmony_ci } 16948c2ecf20Sopenharmony_ci if (unlikely(bret.status == BLKIF_RSP_ERROR && 16958c2ecf20Sopenharmony_ci rinfo->shadow[id].req.u.rw.nr_segments == 0)) { 16968c2ecf20Sopenharmony_ci pr_warn_ratelimited("blkfront: %s: empty %s op failed\n", 16978c2ecf20Sopenharmony_ci info->gd->disk_name, op_name(bret.operation)); 16988c2ecf20Sopenharmony_ci blkif_req(req)->error = BLK_STS_NOTSUPP; 16998c2ecf20Sopenharmony_ci } 17008c2ecf20Sopenharmony_ci if (unlikely(blkif_req(req)->error)) { 17018c2ecf20Sopenharmony_ci if (blkif_req(req)->error == BLK_STS_NOTSUPP) 17028c2ecf20Sopenharmony_ci blkif_req(req)->error = BLK_STS_OK; 17038c2ecf20Sopenharmony_ci info->feature_fua = 0; 17048c2ecf20Sopenharmony_ci info->feature_flush = 0; 17058c2ecf20Sopenharmony_ci xlvbd_flush(info); 17068c2ecf20Sopenharmony_ci } 17078c2ecf20Sopenharmony_ci fallthrough; 17088c2ecf20Sopenharmony_ci case BLKIF_OP_READ: 17098c2ecf20Sopenharmony_ci case BLKIF_OP_WRITE: 17108c2ecf20Sopenharmony_ci if (unlikely(bret.status != BLKIF_RSP_OKAY)) 17118c2ecf20Sopenharmony_ci dev_dbg_ratelimited(&info->xbdev->dev, 17128c2ecf20Sopenharmony_ci "Bad return from blkdev data request: %#x\n", 17138c2ecf20Sopenharmony_ci bret.status); 17148c2ecf20Sopenharmony_ci 17158c2ecf20Sopenharmony_ci break; 17168c2ecf20Sopenharmony_ci default: 17178c2ecf20Sopenharmony_ci BUG(); 17188c2ecf20Sopenharmony_ci } 17198c2ecf20Sopenharmony_ci 17208c2ecf20Sopenharmony_ci if (likely(!blk_should_fake_timeout(req->q))) 17218c2ecf20Sopenharmony_ci blk_mq_complete_request(req); 17228c2ecf20Sopenharmony_ci } 17238c2ecf20Sopenharmony_ci 17248c2ecf20Sopenharmony_ci rinfo->ring.rsp_cons = i; 17258c2ecf20Sopenharmony_ci 17268c2ecf20Sopenharmony_ci if (i != rinfo->ring.req_prod_pvt) { 17278c2ecf20Sopenharmony_ci int more_to_do; 17288c2ecf20Sopenharmony_ci RING_FINAL_CHECK_FOR_RESPONSES(&rinfo->ring, more_to_do); 17298c2ecf20Sopenharmony_ci if (more_to_do) 17308c2ecf20Sopenharmony_ci goto again; 17318c2ecf20Sopenharmony_ci } else 17328c2ecf20Sopenharmony_ci rinfo->ring.sring->rsp_event = i + 1; 17338c2ecf20Sopenharmony_ci 17348c2ecf20Sopenharmony_ci kick_pending_request_queues_locked(rinfo); 17358c2ecf20Sopenharmony_ci 17368c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rinfo->ring_lock, flags); 17378c2ecf20Sopenharmony_ci 17388c2ecf20Sopenharmony_ci xen_irq_lateeoi(irq, eoiflag); 17398c2ecf20Sopenharmony_ci 17408c2ecf20Sopenharmony_ci return IRQ_HANDLED; 17418c2ecf20Sopenharmony_ci 17428c2ecf20Sopenharmony_ci err: 17438c2ecf20Sopenharmony_ci info->connected = BLKIF_STATE_ERROR; 17448c2ecf20Sopenharmony_ci 17458c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rinfo->ring_lock, flags); 17468c2ecf20Sopenharmony_ci 17478c2ecf20Sopenharmony_ci /* No EOI in order to avoid further interrupts. */ 17488c2ecf20Sopenharmony_ci 17498c2ecf20Sopenharmony_ci pr_alert("%s disabled for further use\n", info->gd->disk_name); 17508c2ecf20Sopenharmony_ci return IRQ_HANDLED; 17518c2ecf20Sopenharmony_ci} 17528c2ecf20Sopenharmony_ci 17538c2ecf20Sopenharmony_ci 17548c2ecf20Sopenharmony_cistatic int setup_blkring(struct xenbus_device *dev, 17558c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo) 17568c2ecf20Sopenharmony_ci{ 17578c2ecf20Sopenharmony_ci struct blkif_sring *sring; 17588c2ecf20Sopenharmony_ci int err, i; 17598c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 17608c2ecf20Sopenharmony_ci unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE; 17618c2ecf20Sopenharmony_ci grant_ref_t gref[XENBUS_MAX_RING_GRANTS]; 17628c2ecf20Sopenharmony_ci 17638c2ecf20Sopenharmony_ci for (i = 0; i < info->nr_ring_pages; i++) 17648c2ecf20Sopenharmony_ci rinfo->ring_ref[i] = GRANT_INVALID_REF; 17658c2ecf20Sopenharmony_ci 17668c2ecf20Sopenharmony_ci sring = alloc_pages_exact(ring_size, GFP_NOIO | __GFP_ZERO); 17678c2ecf20Sopenharmony_ci if (!sring) { 17688c2ecf20Sopenharmony_ci xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); 17698c2ecf20Sopenharmony_ci return -ENOMEM; 17708c2ecf20Sopenharmony_ci } 17718c2ecf20Sopenharmony_ci SHARED_RING_INIT(sring); 17728c2ecf20Sopenharmony_ci FRONT_RING_INIT(&rinfo->ring, sring, ring_size); 17738c2ecf20Sopenharmony_ci 17748c2ecf20Sopenharmony_ci err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref); 17758c2ecf20Sopenharmony_ci if (err < 0) { 17768c2ecf20Sopenharmony_ci free_pages_exact(sring, ring_size); 17778c2ecf20Sopenharmony_ci rinfo->ring.sring = NULL; 17788c2ecf20Sopenharmony_ci goto fail; 17798c2ecf20Sopenharmony_ci } 17808c2ecf20Sopenharmony_ci for (i = 0; i < info->nr_ring_pages; i++) 17818c2ecf20Sopenharmony_ci rinfo->ring_ref[i] = gref[i]; 17828c2ecf20Sopenharmony_ci 17838c2ecf20Sopenharmony_ci err = xenbus_alloc_evtchn(dev, &rinfo->evtchn); 17848c2ecf20Sopenharmony_ci if (err) 17858c2ecf20Sopenharmony_ci goto fail; 17868c2ecf20Sopenharmony_ci 17878c2ecf20Sopenharmony_ci err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt, 17888c2ecf20Sopenharmony_ci 0, "blkif", rinfo); 17898c2ecf20Sopenharmony_ci if (err <= 0) { 17908c2ecf20Sopenharmony_ci xenbus_dev_fatal(dev, err, 17918c2ecf20Sopenharmony_ci "bind_evtchn_to_irqhandler failed"); 17928c2ecf20Sopenharmony_ci goto fail; 17938c2ecf20Sopenharmony_ci } 17948c2ecf20Sopenharmony_ci rinfo->irq = err; 17958c2ecf20Sopenharmony_ci 17968c2ecf20Sopenharmony_ci return 0; 17978c2ecf20Sopenharmony_cifail: 17988c2ecf20Sopenharmony_ci blkif_free(info, 0); 17998c2ecf20Sopenharmony_ci return err; 18008c2ecf20Sopenharmony_ci} 18018c2ecf20Sopenharmony_ci 18028c2ecf20Sopenharmony_ci/* 18038c2ecf20Sopenharmony_ci * Write out per-ring/queue nodes including ring-ref and event-channel, and each 18048c2ecf20Sopenharmony_ci * ring buffer may have multi pages depending on ->nr_ring_pages. 18058c2ecf20Sopenharmony_ci */ 18068c2ecf20Sopenharmony_cistatic int write_per_ring_nodes(struct xenbus_transaction xbt, 18078c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo, const char *dir) 18088c2ecf20Sopenharmony_ci{ 18098c2ecf20Sopenharmony_ci int err; 18108c2ecf20Sopenharmony_ci unsigned int i; 18118c2ecf20Sopenharmony_ci const char *message = NULL; 18128c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 18138c2ecf20Sopenharmony_ci 18148c2ecf20Sopenharmony_ci if (info->nr_ring_pages == 1) { 18158c2ecf20Sopenharmony_ci err = xenbus_printf(xbt, dir, "ring-ref", "%u", rinfo->ring_ref[0]); 18168c2ecf20Sopenharmony_ci if (err) { 18178c2ecf20Sopenharmony_ci message = "writing ring-ref"; 18188c2ecf20Sopenharmony_ci goto abort_transaction; 18198c2ecf20Sopenharmony_ci } 18208c2ecf20Sopenharmony_ci } else { 18218c2ecf20Sopenharmony_ci for (i = 0; i < info->nr_ring_pages; i++) { 18228c2ecf20Sopenharmony_ci char ring_ref_name[RINGREF_NAME_LEN]; 18238c2ecf20Sopenharmony_ci 18248c2ecf20Sopenharmony_ci snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); 18258c2ecf20Sopenharmony_ci err = xenbus_printf(xbt, dir, ring_ref_name, 18268c2ecf20Sopenharmony_ci "%u", rinfo->ring_ref[i]); 18278c2ecf20Sopenharmony_ci if (err) { 18288c2ecf20Sopenharmony_ci message = "writing ring-ref"; 18298c2ecf20Sopenharmony_ci goto abort_transaction; 18308c2ecf20Sopenharmony_ci } 18318c2ecf20Sopenharmony_ci } 18328c2ecf20Sopenharmony_ci } 18338c2ecf20Sopenharmony_ci 18348c2ecf20Sopenharmony_ci err = xenbus_printf(xbt, dir, "event-channel", "%u", rinfo->evtchn); 18358c2ecf20Sopenharmony_ci if (err) { 18368c2ecf20Sopenharmony_ci message = "writing event-channel"; 18378c2ecf20Sopenharmony_ci goto abort_transaction; 18388c2ecf20Sopenharmony_ci } 18398c2ecf20Sopenharmony_ci 18408c2ecf20Sopenharmony_ci return 0; 18418c2ecf20Sopenharmony_ci 18428c2ecf20Sopenharmony_ciabort_transaction: 18438c2ecf20Sopenharmony_ci xenbus_transaction_end(xbt, 1); 18448c2ecf20Sopenharmony_ci if (message) 18458c2ecf20Sopenharmony_ci xenbus_dev_fatal(info->xbdev, err, "%s", message); 18468c2ecf20Sopenharmony_ci 18478c2ecf20Sopenharmony_ci return err; 18488c2ecf20Sopenharmony_ci} 18498c2ecf20Sopenharmony_ci 18508c2ecf20Sopenharmony_cistatic void free_info(struct blkfront_info *info) 18518c2ecf20Sopenharmony_ci{ 18528c2ecf20Sopenharmony_ci list_del(&info->info_list); 18538c2ecf20Sopenharmony_ci kfree(info); 18548c2ecf20Sopenharmony_ci} 18558c2ecf20Sopenharmony_ci 18568c2ecf20Sopenharmony_ci/* Enable the persistent grants feature. */ 18578c2ecf20Sopenharmony_cistatic bool feature_persistent = true; 18588c2ecf20Sopenharmony_cimodule_param(feature_persistent, bool, 0644); 18598c2ecf20Sopenharmony_ciMODULE_PARM_DESC(feature_persistent, 18608c2ecf20Sopenharmony_ci "Enables the persistent grants feature"); 18618c2ecf20Sopenharmony_ci 18628c2ecf20Sopenharmony_ci/* Common code used when first setting up, and when resuming. */ 18638c2ecf20Sopenharmony_cistatic int talk_to_blkback(struct xenbus_device *dev, 18648c2ecf20Sopenharmony_ci struct blkfront_info *info) 18658c2ecf20Sopenharmony_ci{ 18668c2ecf20Sopenharmony_ci const char *message = NULL; 18678c2ecf20Sopenharmony_ci struct xenbus_transaction xbt; 18688c2ecf20Sopenharmony_ci int err; 18698c2ecf20Sopenharmony_ci unsigned int i, max_page_order; 18708c2ecf20Sopenharmony_ci unsigned int ring_page_order; 18718c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo; 18728c2ecf20Sopenharmony_ci 18738c2ecf20Sopenharmony_ci if (!info) 18748c2ecf20Sopenharmony_ci return -ENODEV; 18758c2ecf20Sopenharmony_ci 18768c2ecf20Sopenharmony_ci /* Check if backend is trusted. */ 18778c2ecf20Sopenharmony_ci info->bounce = !xen_blkif_trusted || 18788c2ecf20Sopenharmony_ci !xenbus_read_unsigned(dev->nodename, "trusted", 1); 18798c2ecf20Sopenharmony_ci 18808c2ecf20Sopenharmony_ci max_page_order = xenbus_read_unsigned(info->xbdev->otherend, 18818c2ecf20Sopenharmony_ci "max-ring-page-order", 0); 18828c2ecf20Sopenharmony_ci ring_page_order = min(xen_blkif_max_ring_order, max_page_order); 18838c2ecf20Sopenharmony_ci info->nr_ring_pages = 1 << ring_page_order; 18848c2ecf20Sopenharmony_ci 18858c2ecf20Sopenharmony_ci err = negotiate_mq(info); 18868c2ecf20Sopenharmony_ci if (err) 18878c2ecf20Sopenharmony_ci goto destroy_blkring; 18888c2ecf20Sopenharmony_ci 18898c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, i) { 18908c2ecf20Sopenharmony_ci /* Create shared ring, alloc event channel. */ 18918c2ecf20Sopenharmony_ci err = setup_blkring(dev, rinfo); 18928c2ecf20Sopenharmony_ci if (err) 18938c2ecf20Sopenharmony_ci goto destroy_blkring; 18948c2ecf20Sopenharmony_ci } 18958c2ecf20Sopenharmony_ci 18968c2ecf20Sopenharmony_ciagain: 18978c2ecf20Sopenharmony_ci err = xenbus_transaction_start(&xbt); 18988c2ecf20Sopenharmony_ci if (err) { 18998c2ecf20Sopenharmony_ci xenbus_dev_fatal(dev, err, "starting transaction"); 19008c2ecf20Sopenharmony_ci goto destroy_blkring; 19018c2ecf20Sopenharmony_ci } 19028c2ecf20Sopenharmony_ci 19038c2ecf20Sopenharmony_ci if (info->nr_ring_pages > 1) { 19048c2ecf20Sopenharmony_ci err = xenbus_printf(xbt, dev->nodename, "ring-page-order", "%u", 19058c2ecf20Sopenharmony_ci ring_page_order); 19068c2ecf20Sopenharmony_ci if (err) { 19078c2ecf20Sopenharmony_ci message = "writing ring-page-order"; 19088c2ecf20Sopenharmony_ci goto abort_transaction; 19098c2ecf20Sopenharmony_ci } 19108c2ecf20Sopenharmony_ci } 19118c2ecf20Sopenharmony_ci 19128c2ecf20Sopenharmony_ci /* We already got the number of queues/rings in _probe */ 19138c2ecf20Sopenharmony_ci if (info->nr_rings == 1) { 19148c2ecf20Sopenharmony_ci err = write_per_ring_nodes(xbt, info->rinfo, dev->nodename); 19158c2ecf20Sopenharmony_ci if (err) 19168c2ecf20Sopenharmony_ci goto destroy_blkring; 19178c2ecf20Sopenharmony_ci } else { 19188c2ecf20Sopenharmony_ci char *path; 19198c2ecf20Sopenharmony_ci size_t pathsize; 19208c2ecf20Sopenharmony_ci 19218c2ecf20Sopenharmony_ci err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues", "%u", 19228c2ecf20Sopenharmony_ci info->nr_rings); 19238c2ecf20Sopenharmony_ci if (err) { 19248c2ecf20Sopenharmony_ci message = "writing multi-queue-num-queues"; 19258c2ecf20Sopenharmony_ci goto abort_transaction; 19268c2ecf20Sopenharmony_ci } 19278c2ecf20Sopenharmony_ci 19288c2ecf20Sopenharmony_ci pathsize = strlen(dev->nodename) + QUEUE_NAME_LEN; 19298c2ecf20Sopenharmony_ci path = kmalloc(pathsize, GFP_KERNEL); 19308c2ecf20Sopenharmony_ci if (!path) { 19318c2ecf20Sopenharmony_ci err = -ENOMEM; 19328c2ecf20Sopenharmony_ci message = "ENOMEM while writing ring references"; 19338c2ecf20Sopenharmony_ci goto abort_transaction; 19348c2ecf20Sopenharmony_ci } 19358c2ecf20Sopenharmony_ci 19368c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, i) { 19378c2ecf20Sopenharmony_ci memset(path, 0, pathsize); 19388c2ecf20Sopenharmony_ci snprintf(path, pathsize, "%s/queue-%u", dev->nodename, i); 19398c2ecf20Sopenharmony_ci err = write_per_ring_nodes(xbt, rinfo, path); 19408c2ecf20Sopenharmony_ci if (err) { 19418c2ecf20Sopenharmony_ci kfree(path); 19428c2ecf20Sopenharmony_ci goto destroy_blkring; 19438c2ecf20Sopenharmony_ci } 19448c2ecf20Sopenharmony_ci } 19458c2ecf20Sopenharmony_ci kfree(path); 19468c2ecf20Sopenharmony_ci } 19478c2ecf20Sopenharmony_ci err = xenbus_printf(xbt, dev->nodename, "protocol", "%s", 19488c2ecf20Sopenharmony_ci XEN_IO_PROTO_ABI_NATIVE); 19498c2ecf20Sopenharmony_ci if (err) { 19508c2ecf20Sopenharmony_ci message = "writing protocol"; 19518c2ecf20Sopenharmony_ci goto abort_transaction; 19528c2ecf20Sopenharmony_ci } 19538c2ecf20Sopenharmony_ci info->feature_persistent_parm = feature_persistent; 19548c2ecf20Sopenharmony_ci err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 19558c2ecf20Sopenharmony_ci info->feature_persistent_parm); 19568c2ecf20Sopenharmony_ci if (err) 19578c2ecf20Sopenharmony_ci dev_warn(&dev->dev, 19588c2ecf20Sopenharmony_ci "writing persistent grants feature to xenbus"); 19598c2ecf20Sopenharmony_ci 19608c2ecf20Sopenharmony_ci err = xenbus_transaction_end(xbt, 0); 19618c2ecf20Sopenharmony_ci if (err) { 19628c2ecf20Sopenharmony_ci if (err == -EAGAIN) 19638c2ecf20Sopenharmony_ci goto again; 19648c2ecf20Sopenharmony_ci xenbus_dev_fatal(dev, err, "completing transaction"); 19658c2ecf20Sopenharmony_ci goto destroy_blkring; 19668c2ecf20Sopenharmony_ci } 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, i) { 19698c2ecf20Sopenharmony_ci unsigned int j; 19708c2ecf20Sopenharmony_ci 19718c2ecf20Sopenharmony_ci for (j = 0; j < BLK_RING_SIZE(info); j++) 19728c2ecf20Sopenharmony_ci rinfo->shadow[j].req.u.rw.id = j + 1; 19738c2ecf20Sopenharmony_ci rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff; 19748c2ecf20Sopenharmony_ci } 19758c2ecf20Sopenharmony_ci xenbus_switch_state(dev, XenbusStateInitialised); 19768c2ecf20Sopenharmony_ci 19778c2ecf20Sopenharmony_ci return 0; 19788c2ecf20Sopenharmony_ci 19798c2ecf20Sopenharmony_ci abort_transaction: 19808c2ecf20Sopenharmony_ci xenbus_transaction_end(xbt, 1); 19818c2ecf20Sopenharmony_ci if (message) 19828c2ecf20Sopenharmony_ci xenbus_dev_fatal(dev, err, "%s", message); 19838c2ecf20Sopenharmony_ci destroy_blkring: 19848c2ecf20Sopenharmony_ci blkif_free(info, 0); 19858c2ecf20Sopenharmony_ci 19868c2ecf20Sopenharmony_ci mutex_lock(&blkfront_mutex); 19878c2ecf20Sopenharmony_ci free_info(info); 19888c2ecf20Sopenharmony_ci mutex_unlock(&blkfront_mutex); 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci dev_set_drvdata(&dev->dev, NULL); 19918c2ecf20Sopenharmony_ci 19928c2ecf20Sopenharmony_ci return err; 19938c2ecf20Sopenharmony_ci} 19948c2ecf20Sopenharmony_ci 19958c2ecf20Sopenharmony_cistatic int negotiate_mq(struct blkfront_info *info) 19968c2ecf20Sopenharmony_ci{ 19978c2ecf20Sopenharmony_ci unsigned int backend_max_queues; 19988c2ecf20Sopenharmony_ci unsigned int i; 19998c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo; 20008c2ecf20Sopenharmony_ci 20018c2ecf20Sopenharmony_ci BUG_ON(info->nr_rings); 20028c2ecf20Sopenharmony_ci 20038c2ecf20Sopenharmony_ci /* Check if backend supports multiple queues. */ 20048c2ecf20Sopenharmony_ci backend_max_queues = xenbus_read_unsigned(info->xbdev->otherend, 20058c2ecf20Sopenharmony_ci "multi-queue-max-queues", 1); 20068c2ecf20Sopenharmony_ci info->nr_rings = min(backend_max_queues, xen_blkif_max_queues); 20078c2ecf20Sopenharmony_ci /* We need at least one ring. */ 20088c2ecf20Sopenharmony_ci if (!info->nr_rings) 20098c2ecf20Sopenharmony_ci info->nr_rings = 1; 20108c2ecf20Sopenharmony_ci 20118c2ecf20Sopenharmony_ci info->rinfo_size = struct_size(info->rinfo, shadow, 20128c2ecf20Sopenharmony_ci BLK_RING_SIZE(info)); 20138c2ecf20Sopenharmony_ci info->rinfo = kvcalloc(info->nr_rings, info->rinfo_size, GFP_KERNEL); 20148c2ecf20Sopenharmony_ci if (!info->rinfo) { 20158c2ecf20Sopenharmony_ci xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure"); 20168c2ecf20Sopenharmony_ci info->nr_rings = 0; 20178c2ecf20Sopenharmony_ci return -ENOMEM; 20188c2ecf20Sopenharmony_ci } 20198c2ecf20Sopenharmony_ci 20208c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, i) { 20218c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rinfo->indirect_pages); 20228c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rinfo->grants); 20238c2ecf20Sopenharmony_ci rinfo->dev_info = info; 20248c2ecf20Sopenharmony_ci INIT_WORK(&rinfo->work, blkif_restart_queue); 20258c2ecf20Sopenharmony_ci spin_lock_init(&rinfo->ring_lock); 20268c2ecf20Sopenharmony_ci } 20278c2ecf20Sopenharmony_ci return 0; 20288c2ecf20Sopenharmony_ci} 20298c2ecf20Sopenharmony_ci 20308c2ecf20Sopenharmony_ci/** 20318c2ecf20Sopenharmony_ci * Entry point to this code when a new device is created. Allocate the basic 20328c2ecf20Sopenharmony_ci * structures and the ring buffer for communication with the backend, and 20338c2ecf20Sopenharmony_ci * inform the backend of the appropriate details for those. Switch to 20348c2ecf20Sopenharmony_ci * Initialised state. 20358c2ecf20Sopenharmony_ci */ 20368c2ecf20Sopenharmony_cistatic int blkfront_probe(struct xenbus_device *dev, 20378c2ecf20Sopenharmony_ci const struct xenbus_device_id *id) 20388c2ecf20Sopenharmony_ci{ 20398c2ecf20Sopenharmony_ci int err, vdevice; 20408c2ecf20Sopenharmony_ci struct blkfront_info *info; 20418c2ecf20Sopenharmony_ci 20428c2ecf20Sopenharmony_ci /* FIXME: Use dynamic device id if this is not set. */ 20438c2ecf20Sopenharmony_ci err = xenbus_scanf(XBT_NIL, dev->nodename, 20448c2ecf20Sopenharmony_ci "virtual-device", "%i", &vdevice); 20458c2ecf20Sopenharmony_ci if (err != 1) { 20468c2ecf20Sopenharmony_ci /* go looking in the extended area instead */ 20478c2ecf20Sopenharmony_ci err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", 20488c2ecf20Sopenharmony_ci "%i", &vdevice); 20498c2ecf20Sopenharmony_ci if (err != 1) { 20508c2ecf20Sopenharmony_ci xenbus_dev_fatal(dev, err, "reading virtual-device"); 20518c2ecf20Sopenharmony_ci return err; 20528c2ecf20Sopenharmony_ci } 20538c2ecf20Sopenharmony_ci } 20548c2ecf20Sopenharmony_ci 20558c2ecf20Sopenharmony_ci if (xen_hvm_domain()) { 20568c2ecf20Sopenharmony_ci char *type; 20578c2ecf20Sopenharmony_ci int len; 20588c2ecf20Sopenharmony_ci /* no unplug has been done: do not hook devices != xen vbds */ 20598c2ecf20Sopenharmony_ci if (xen_has_pv_and_legacy_disk_devices()) { 20608c2ecf20Sopenharmony_ci int major; 20618c2ecf20Sopenharmony_ci 20628c2ecf20Sopenharmony_ci if (!VDEV_IS_EXTENDED(vdevice)) 20638c2ecf20Sopenharmony_ci major = BLKIF_MAJOR(vdevice); 20648c2ecf20Sopenharmony_ci else 20658c2ecf20Sopenharmony_ci major = XENVBD_MAJOR; 20668c2ecf20Sopenharmony_ci 20678c2ecf20Sopenharmony_ci if (major != XENVBD_MAJOR) { 20688c2ecf20Sopenharmony_ci printk(KERN_INFO 20698c2ecf20Sopenharmony_ci "%s: HVM does not support vbd %d as xen block device\n", 20708c2ecf20Sopenharmony_ci __func__, vdevice); 20718c2ecf20Sopenharmony_ci return -ENODEV; 20728c2ecf20Sopenharmony_ci } 20738c2ecf20Sopenharmony_ci } 20748c2ecf20Sopenharmony_ci /* do not create a PV cdrom device if we are an HVM guest */ 20758c2ecf20Sopenharmony_ci type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len); 20768c2ecf20Sopenharmony_ci if (IS_ERR(type)) 20778c2ecf20Sopenharmony_ci return -ENODEV; 20788c2ecf20Sopenharmony_ci if (strncmp(type, "cdrom", 5) == 0) { 20798c2ecf20Sopenharmony_ci kfree(type); 20808c2ecf20Sopenharmony_ci return -ENODEV; 20818c2ecf20Sopenharmony_ci } 20828c2ecf20Sopenharmony_ci kfree(type); 20838c2ecf20Sopenharmony_ci } 20848c2ecf20Sopenharmony_ci info = kzalloc(sizeof(*info), GFP_KERNEL); 20858c2ecf20Sopenharmony_ci if (!info) { 20868c2ecf20Sopenharmony_ci xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); 20878c2ecf20Sopenharmony_ci return -ENOMEM; 20888c2ecf20Sopenharmony_ci } 20898c2ecf20Sopenharmony_ci 20908c2ecf20Sopenharmony_ci info->xbdev = dev; 20918c2ecf20Sopenharmony_ci 20928c2ecf20Sopenharmony_ci mutex_init(&info->mutex); 20938c2ecf20Sopenharmony_ci info->vdevice = vdevice; 20948c2ecf20Sopenharmony_ci info->connected = BLKIF_STATE_DISCONNECTED; 20958c2ecf20Sopenharmony_ci 20968c2ecf20Sopenharmony_ci /* Front end dir is a number, which is used as the id. */ 20978c2ecf20Sopenharmony_ci info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); 20988c2ecf20Sopenharmony_ci dev_set_drvdata(&dev->dev, info); 20998c2ecf20Sopenharmony_ci 21008c2ecf20Sopenharmony_ci mutex_lock(&blkfront_mutex); 21018c2ecf20Sopenharmony_ci list_add(&info->info_list, &info_list); 21028c2ecf20Sopenharmony_ci mutex_unlock(&blkfront_mutex); 21038c2ecf20Sopenharmony_ci 21048c2ecf20Sopenharmony_ci return 0; 21058c2ecf20Sopenharmony_ci} 21068c2ecf20Sopenharmony_ci 21078c2ecf20Sopenharmony_cistatic int blkif_recover(struct blkfront_info *info) 21088c2ecf20Sopenharmony_ci{ 21098c2ecf20Sopenharmony_ci unsigned int r_index; 21108c2ecf20Sopenharmony_ci struct request *req, *n; 21118c2ecf20Sopenharmony_ci int rc; 21128c2ecf20Sopenharmony_ci struct bio *bio; 21138c2ecf20Sopenharmony_ci unsigned int segs; 21148c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo; 21158c2ecf20Sopenharmony_ci 21168c2ecf20Sopenharmony_ci blkfront_gather_backend_features(info); 21178c2ecf20Sopenharmony_ci /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ 21188c2ecf20Sopenharmony_ci blkif_set_queue_limits(info); 21198c2ecf20Sopenharmony_ci segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; 21208c2ecf20Sopenharmony_ci blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); 21218c2ecf20Sopenharmony_ci 21228c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, r_index) { 21238c2ecf20Sopenharmony_ci rc = blkfront_setup_indirect(rinfo); 21248c2ecf20Sopenharmony_ci if (rc) 21258c2ecf20Sopenharmony_ci return rc; 21268c2ecf20Sopenharmony_ci } 21278c2ecf20Sopenharmony_ci xenbus_switch_state(info->xbdev, XenbusStateConnected); 21288c2ecf20Sopenharmony_ci 21298c2ecf20Sopenharmony_ci /* Now safe for us to use the shared ring */ 21308c2ecf20Sopenharmony_ci info->connected = BLKIF_STATE_CONNECTED; 21318c2ecf20Sopenharmony_ci 21328c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, r_index) { 21338c2ecf20Sopenharmony_ci /* Kick any other new requests queued since we resumed */ 21348c2ecf20Sopenharmony_ci kick_pending_request_queues(rinfo); 21358c2ecf20Sopenharmony_ci } 21368c2ecf20Sopenharmony_ci 21378c2ecf20Sopenharmony_ci list_for_each_entry_safe(req, n, &info->requests, queuelist) { 21388c2ecf20Sopenharmony_ci /* Requeue pending requests (flush or discard) */ 21398c2ecf20Sopenharmony_ci list_del_init(&req->queuelist); 21408c2ecf20Sopenharmony_ci BUG_ON(req->nr_phys_segments > segs); 21418c2ecf20Sopenharmony_ci blk_mq_requeue_request(req, false); 21428c2ecf20Sopenharmony_ci } 21438c2ecf20Sopenharmony_ci blk_mq_start_stopped_hw_queues(info->rq, true); 21448c2ecf20Sopenharmony_ci blk_mq_kick_requeue_list(info->rq); 21458c2ecf20Sopenharmony_ci 21468c2ecf20Sopenharmony_ci while ((bio = bio_list_pop(&info->bio_list)) != NULL) { 21478c2ecf20Sopenharmony_ci /* Traverse the list of pending bios and re-queue them */ 21488c2ecf20Sopenharmony_ci submit_bio(bio); 21498c2ecf20Sopenharmony_ci } 21508c2ecf20Sopenharmony_ci 21518c2ecf20Sopenharmony_ci return 0; 21528c2ecf20Sopenharmony_ci} 21538c2ecf20Sopenharmony_ci 21548c2ecf20Sopenharmony_ci/** 21558c2ecf20Sopenharmony_ci * We are reconnecting to the backend, due to a suspend/resume, or a backend 21568c2ecf20Sopenharmony_ci * driver restart. We tear down our blkif structure and recreate it, but 21578c2ecf20Sopenharmony_ci * leave the device-layer structures intact so that this is transparent to the 21588c2ecf20Sopenharmony_ci * rest of the kernel. 21598c2ecf20Sopenharmony_ci */ 21608c2ecf20Sopenharmony_cistatic int blkfront_resume(struct xenbus_device *dev) 21618c2ecf20Sopenharmony_ci{ 21628c2ecf20Sopenharmony_ci struct blkfront_info *info = dev_get_drvdata(&dev->dev); 21638c2ecf20Sopenharmony_ci int err = 0; 21648c2ecf20Sopenharmony_ci unsigned int i, j; 21658c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo; 21668c2ecf20Sopenharmony_ci 21678c2ecf20Sopenharmony_ci dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); 21688c2ecf20Sopenharmony_ci 21698c2ecf20Sopenharmony_ci bio_list_init(&info->bio_list); 21708c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&info->requests); 21718c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, i) { 21728c2ecf20Sopenharmony_ci struct bio_list merge_bio; 21738c2ecf20Sopenharmony_ci struct blk_shadow *shadow = rinfo->shadow; 21748c2ecf20Sopenharmony_ci 21758c2ecf20Sopenharmony_ci for (j = 0; j < BLK_RING_SIZE(info); j++) { 21768c2ecf20Sopenharmony_ci /* Not in use? */ 21778c2ecf20Sopenharmony_ci if (!shadow[j].request) 21788c2ecf20Sopenharmony_ci continue; 21798c2ecf20Sopenharmony_ci 21808c2ecf20Sopenharmony_ci /* 21818c2ecf20Sopenharmony_ci * Get the bios in the request so we can re-queue them. 21828c2ecf20Sopenharmony_ci */ 21838c2ecf20Sopenharmony_ci if (req_op(shadow[j].request) == REQ_OP_FLUSH || 21848c2ecf20Sopenharmony_ci req_op(shadow[j].request) == REQ_OP_DISCARD || 21858c2ecf20Sopenharmony_ci req_op(shadow[j].request) == REQ_OP_SECURE_ERASE || 21868c2ecf20Sopenharmony_ci shadow[j].request->cmd_flags & REQ_FUA) { 21878c2ecf20Sopenharmony_ci /* 21888c2ecf20Sopenharmony_ci * Flush operations don't contain bios, so 21898c2ecf20Sopenharmony_ci * we need to requeue the whole request 21908c2ecf20Sopenharmony_ci * 21918c2ecf20Sopenharmony_ci * XXX: but this doesn't make any sense for a 21928c2ecf20Sopenharmony_ci * write with the FUA flag set.. 21938c2ecf20Sopenharmony_ci */ 21948c2ecf20Sopenharmony_ci list_add(&shadow[j].request->queuelist, &info->requests); 21958c2ecf20Sopenharmony_ci continue; 21968c2ecf20Sopenharmony_ci } 21978c2ecf20Sopenharmony_ci merge_bio.head = shadow[j].request->bio; 21988c2ecf20Sopenharmony_ci merge_bio.tail = shadow[j].request->biotail; 21998c2ecf20Sopenharmony_ci bio_list_merge(&info->bio_list, &merge_bio); 22008c2ecf20Sopenharmony_ci shadow[j].request->bio = NULL; 22018c2ecf20Sopenharmony_ci blk_mq_end_request(shadow[j].request, BLK_STS_OK); 22028c2ecf20Sopenharmony_ci } 22038c2ecf20Sopenharmony_ci } 22048c2ecf20Sopenharmony_ci 22058c2ecf20Sopenharmony_ci blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); 22068c2ecf20Sopenharmony_ci 22078c2ecf20Sopenharmony_ci err = talk_to_blkback(dev, info); 22088c2ecf20Sopenharmony_ci if (!err) 22098c2ecf20Sopenharmony_ci blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings); 22108c2ecf20Sopenharmony_ci 22118c2ecf20Sopenharmony_ci /* 22128c2ecf20Sopenharmony_ci * We have to wait for the backend to switch to 22138c2ecf20Sopenharmony_ci * connected state, since we want to read which 22148c2ecf20Sopenharmony_ci * features it supports. 22158c2ecf20Sopenharmony_ci */ 22168c2ecf20Sopenharmony_ci 22178c2ecf20Sopenharmony_ci return err; 22188c2ecf20Sopenharmony_ci} 22198c2ecf20Sopenharmony_ci 22208c2ecf20Sopenharmony_cistatic void blkfront_closing(struct blkfront_info *info) 22218c2ecf20Sopenharmony_ci{ 22228c2ecf20Sopenharmony_ci struct xenbus_device *xbdev = info->xbdev; 22238c2ecf20Sopenharmony_ci struct block_device *bdev = NULL; 22248c2ecf20Sopenharmony_ci 22258c2ecf20Sopenharmony_ci mutex_lock(&info->mutex); 22268c2ecf20Sopenharmony_ci 22278c2ecf20Sopenharmony_ci if (xbdev->state == XenbusStateClosing) { 22288c2ecf20Sopenharmony_ci mutex_unlock(&info->mutex); 22298c2ecf20Sopenharmony_ci return; 22308c2ecf20Sopenharmony_ci } 22318c2ecf20Sopenharmony_ci 22328c2ecf20Sopenharmony_ci if (info->gd) 22338c2ecf20Sopenharmony_ci bdev = bdget_disk(info->gd, 0); 22348c2ecf20Sopenharmony_ci 22358c2ecf20Sopenharmony_ci mutex_unlock(&info->mutex); 22368c2ecf20Sopenharmony_ci 22378c2ecf20Sopenharmony_ci if (!bdev) { 22388c2ecf20Sopenharmony_ci xenbus_frontend_closed(xbdev); 22398c2ecf20Sopenharmony_ci return; 22408c2ecf20Sopenharmony_ci } 22418c2ecf20Sopenharmony_ci 22428c2ecf20Sopenharmony_ci mutex_lock(&bdev->bd_mutex); 22438c2ecf20Sopenharmony_ci 22448c2ecf20Sopenharmony_ci if (bdev->bd_openers) { 22458c2ecf20Sopenharmony_ci xenbus_dev_error(xbdev, -EBUSY, 22468c2ecf20Sopenharmony_ci "Device in use; refusing to close"); 22478c2ecf20Sopenharmony_ci xenbus_switch_state(xbdev, XenbusStateClosing); 22488c2ecf20Sopenharmony_ci } else { 22498c2ecf20Sopenharmony_ci xlvbd_release_gendisk(info); 22508c2ecf20Sopenharmony_ci xenbus_frontend_closed(xbdev); 22518c2ecf20Sopenharmony_ci } 22528c2ecf20Sopenharmony_ci 22538c2ecf20Sopenharmony_ci mutex_unlock(&bdev->bd_mutex); 22548c2ecf20Sopenharmony_ci bdput(bdev); 22558c2ecf20Sopenharmony_ci} 22568c2ecf20Sopenharmony_ci 22578c2ecf20Sopenharmony_cistatic void blkfront_setup_discard(struct blkfront_info *info) 22588c2ecf20Sopenharmony_ci{ 22598c2ecf20Sopenharmony_ci info->feature_discard = 1; 22608c2ecf20Sopenharmony_ci info->discard_granularity = xenbus_read_unsigned(info->xbdev->otherend, 22618c2ecf20Sopenharmony_ci "discard-granularity", 22628c2ecf20Sopenharmony_ci 0); 22638c2ecf20Sopenharmony_ci info->discard_alignment = xenbus_read_unsigned(info->xbdev->otherend, 22648c2ecf20Sopenharmony_ci "discard-alignment", 0); 22658c2ecf20Sopenharmony_ci info->feature_secdiscard = 22668c2ecf20Sopenharmony_ci !!xenbus_read_unsigned(info->xbdev->otherend, "discard-secure", 22678c2ecf20Sopenharmony_ci 0); 22688c2ecf20Sopenharmony_ci} 22698c2ecf20Sopenharmony_ci 22708c2ecf20Sopenharmony_cistatic int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) 22718c2ecf20Sopenharmony_ci{ 22728c2ecf20Sopenharmony_ci unsigned int psegs, grants, memflags; 22738c2ecf20Sopenharmony_ci int err, i; 22748c2ecf20Sopenharmony_ci struct blkfront_info *info = rinfo->dev_info; 22758c2ecf20Sopenharmony_ci 22768c2ecf20Sopenharmony_ci memflags = memalloc_noio_save(); 22778c2ecf20Sopenharmony_ci 22788c2ecf20Sopenharmony_ci if (info->max_indirect_segments == 0) { 22798c2ecf20Sopenharmony_ci if (!HAS_EXTRA_REQ) 22808c2ecf20Sopenharmony_ci grants = BLKIF_MAX_SEGMENTS_PER_REQUEST; 22818c2ecf20Sopenharmony_ci else { 22828c2ecf20Sopenharmony_ci /* 22838c2ecf20Sopenharmony_ci * When an extra req is required, the maximum 22848c2ecf20Sopenharmony_ci * grants supported is related to the size of the 22858c2ecf20Sopenharmony_ci * Linux block segment. 22868c2ecf20Sopenharmony_ci */ 22878c2ecf20Sopenharmony_ci grants = GRANTS_PER_PSEG; 22888c2ecf20Sopenharmony_ci } 22898c2ecf20Sopenharmony_ci } 22908c2ecf20Sopenharmony_ci else 22918c2ecf20Sopenharmony_ci grants = info->max_indirect_segments; 22928c2ecf20Sopenharmony_ci psegs = DIV_ROUND_UP(grants, GRANTS_PER_PSEG); 22938c2ecf20Sopenharmony_ci 22948c2ecf20Sopenharmony_ci err = fill_grant_buffer(rinfo, 22958c2ecf20Sopenharmony_ci (grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info)); 22968c2ecf20Sopenharmony_ci if (err) 22978c2ecf20Sopenharmony_ci goto out_of_memory; 22988c2ecf20Sopenharmony_ci 22998c2ecf20Sopenharmony_ci if (!info->bounce && info->max_indirect_segments) { 23008c2ecf20Sopenharmony_ci /* 23018c2ecf20Sopenharmony_ci * We are using indirect descriptors but don't have a bounce 23028c2ecf20Sopenharmony_ci * buffer, we need to allocate a set of pages that can be 23038c2ecf20Sopenharmony_ci * used for mapping indirect grefs 23048c2ecf20Sopenharmony_ci */ 23058c2ecf20Sopenharmony_ci int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); 23068c2ecf20Sopenharmony_ci 23078c2ecf20Sopenharmony_ci BUG_ON(!list_empty(&rinfo->indirect_pages)); 23088c2ecf20Sopenharmony_ci for (i = 0; i < num; i++) { 23098c2ecf20Sopenharmony_ci struct page *indirect_page = alloc_page(GFP_KERNEL | 23108c2ecf20Sopenharmony_ci __GFP_ZERO); 23118c2ecf20Sopenharmony_ci if (!indirect_page) 23128c2ecf20Sopenharmony_ci goto out_of_memory; 23138c2ecf20Sopenharmony_ci list_add(&indirect_page->lru, &rinfo->indirect_pages); 23148c2ecf20Sopenharmony_ci } 23158c2ecf20Sopenharmony_ci } 23168c2ecf20Sopenharmony_ci 23178c2ecf20Sopenharmony_ci for (i = 0; i < BLK_RING_SIZE(info); i++) { 23188c2ecf20Sopenharmony_ci rinfo->shadow[i].grants_used = 23198c2ecf20Sopenharmony_ci kvcalloc(grants, 23208c2ecf20Sopenharmony_ci sizeof(rinfo->shadow[i].grants_used[0]), 23218c2ecf20Sopenharmony_ci GFP_KERNEL); 23228c2ecf20Sopenharmony_ci rinfo->shadow[i].sg = kvcalloc(psegs, 23238c2ecf20Sopenharmony_ci sizeof(rinfo->shadow[i].sg[0]), 23248c2ecf20Sopenharmony_ci GFP_KERNEL); 23258c2ecf20Sopenharmony_ci if (info->max_indirect_segments) 23268c2ecf20Sopenharmony_ci rinfo->shadow[i].indirect_grants = 23278c2ecf20Sopenharmony_ci kvcalloc(INDIRECT_GREFS(grants), 23288c2ecf20Sopenharmony_ci sizeof(rinfo->shadow[i].indirect_grants[0]), 23298c2ecf20Sopenharmony_ci GFP_KERNEL); 23308c2ecf20Sopenharmony_ci if ((rinfo->shadow[i].grants_used == NULL) || 23318c2ecf20Sopenharmony_ci (rinfo->shadow[i].sg == NULL) || 23328c2ecf20Sopenharmony_ci (info->max_indirect_segments && 23338c2ecf20Sopenharmony_ci (rinfo->shadow[i].indirect_grants == NULL))) 23348c2ecf20Sopenharmony_ci goto out_of_memory; 23358c2ecf20Sopenharmony_ci sg_init_table(rinfo->shadow[i].sg, psegs); 23368c2ecf20Sopenharmony_ci } 23378c2ecf20Sopenharmony_ci 23388c2ecf20Sopenharmony_ci memalloc_noio_restore(memflags); 23398c2ecf20Sopenharmony_ci 23408c2ecf20Sopenharmony_ci return 0; 23418c2ecf20Sopenharmony_ci 23428c2ecf20Sopenharmony_ciout_of_memory: 23438c2ecf20Sopenharmony_ci for (i = 0; i < BLK_RING_SIZE(info); i++) { 23448c2ecf20Sopenharmony_ci kvfree(rinfo->shadow[i].grants_used); 23458c2ecf20Sopenharmony_ci rinfo->shadow[i].grants_used = NULL; 23468c2ecf20Sopenharmony_ci kvfree(rinfo->shadow[i].sg); 23478c2ecf20Sopenharmony_ci rinfo->shadow[i].sg = NULL; 23488c2ecf20Sopenharmony_ci kvfree(rinfo->shadow[i].indirect_grants); 23498c2ecf20Sopenharmony_ci rinfo->shadow[i].indirect_grants = NULL; 23508c2ecf20Sopenharmony_ci } 23518c2ecf20Sopenharmony_ci if (!list_empty(&rinfo->indirect_pages)) { 23528c2ecf20Sopenharmony_ci struct page *indirect_page, *n; 23538c2ecf20Sopenharmony_ci list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { 23548c2ecf20Sopenharmony_ci list_del(&indirect_page->lru); 23558c2ecf20Sopenharmony_ci __free_page(indirect_page); 23568c2ecf20Sopenharmony_ci } 23578c2ecf20Sopenharmony_ci } 23588c2ecf20Sopenharmony_ci 23598c2ecf20Sopenharmony_ci memalloc_noio_restore(memflags); 23608c2ecf20Sopenharmony_ci 23618c2ecf20Sopenharmony_ci return -ENOMEM; 23628c2ecf20Sopenharmony_ci} 23638c2ecf20Sopenharmony_ci 23648c2ecf20Sopenharmony_ci/* 23658c2ecf20Sopenharmony_ci * Gather all backend feature-* 23668c2ecf20Sopenharmony_ci */ 23678c2ecf20Sopenharmony_cistatic void blkfront_gather_backend_features(struct blkfront_info *info) 23688c2ecf20Sopenharmony_ci{ 23698c2ecf20Sopenharmony_ci unsigned int indirect_segments; 23708c2ecf20Sopenharmony_ci 23718c2ecf20Sopenharmony_ci info->feature_flush = 0; 23728c2ecf20Sopenharmony_ci info->feature_fua = 0; 23738c2ecf20Sopenharmony_ci 23748c2ecf20Sopenharmony_ci /* 23758c2ecf20Sopenharmony_ci * If there's no "feature-barrier" defined, then it means 23768c2ecf20Sopenharmony_ci * we're dealing with a very old backend which writes 23778c2ecf20Sopenharmony_ci * synchronously; nothing to do. 23788c2ecf20Sopenharmony_ci * 23798c2ecf20Sopenharmony_ci * If there are barriers, then we use flush. 23808c2ecf20Sopenharmony_ci */ 23818c2ecf20Sopenharmony_ci if (xenbus_read_unsigned(info->xbdev->otherend, "feature-barrier", 0)) { 23828c2ecf20Sopenharmony_ci info->feature_flush = 1; 23838c2ecf20Sopenharmony_ci info->feature_fua = 1; 23848c2ecf20Sopenharmony_ci } 23858c2ecf20Sopenharmony_ci 23868c2ecf20Sopenharmony_ci /* 23878c2ecf20Sopenharmony_ci * And if there is "feature-flush-cache" use that above 23888c2ecf20Sopenharmony_ci * barriers. 23898c2ecf20Sopenharmony_ci */ 23908c2ecf20Sopenharmony_ci if (xenbus_read_unsigned(info->xbdev->otherend, "feature-flush-cache", 23918c2ecf20Sopenharmony_ci 0)) { 23928c2ecf20Sopenharmony_ci info->feature_flush = 1; 23938c2ecf20Sopenharmony_ci info->feature_fua = 0; 23948c2ecf20Sopenharmony_ci } 23958c2ecf20Sopenharmony_ci 23968c2ecf20Sopenharmony_ci if (xenbus_read_unsigned(info->xbdev->otherend, "feature-discard", 0)) 23978c2ecf20Sopenharmony_ci blkfront_setup_discard(info); 23988c2ecf20Sopenharmony_ci 23998c2ecf20Sopenharmony_ci if (info->feature_persistent_parm) 24008c2ecf20Sopenharmony_ci info->feature_persistent = 24018c2ecf20Sopenharmony_ci !!xenbus_read_unsigned(info->xbdev->otherend, 24028c2ecf20Sopenharmony_ci "feature-persistent", 0); 24038c2ecf20Sopenharmony_ci if (info->feature_persistent) 24048c2ecf20Sopenharmony_ci info->bounce = true; 24058c2ecf20Sopenharmony_ci 24068c2ecf20Sopenharmony_ci indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, 24078c2ecf20Sopenharmony_ci "feature-max-indirect-segments", 0); 24088c2ecf20Sopenharmony_ci if (indirect_segments > xen_blkif_max_segments) 24098c2ecf20Sopenharmony_ci indirect_segments = xen_blkif_max_segments; 24108c2ecf20Sopenharmony_ci if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST) 24118c2ecf20Sopenharmony_ci indirect_segments = 0; 24128c2ecf20Sopenharmony_ci info->max_indirect_segments = indirect_segments; 24138c2ecf20Sopenharmony_ci 24148c2ecf20Sopenharmony_ci if (info->feature_persistent) { 24158c2ecf20Sopenharmony_ci mutex_lock(&blkfront_mutex); 24168c2ecf20Sopenharmony_ci schedule_delayed_work(&blkfront_work, HZ * 10); 24178c2ecf20Sopenharmony_ci mutex_unlock(&blkfront_mutex); 24188c2ecf20Sopenharmony_ci } 24198c2ecf20Sopenharmony_ci} 24208c2ecf20Sopenharmony_ci 24218c2ecf20Sopenharmony_ci/* 24228c2ecf20Sopenharmony_ci * Invoked when the backend is finally 'ready' (and has told produced 24238c2ecf20Sopenharmony_ci * the details about the physical device - #sectors, size, etc). 24248c2ecf20Sopenharmony_ci */ 24258c2ecf20Sopenharmony_cistatic void blkfront_connect(struct blkfront_info *info) 24268c2ecf20Sopenharmony_ci{ 24278c2ecf20Sopenharmony_ci unsigned long long sectors; 24288c2ecf20Sopenharmony_ci unsigned long sector_size; 24298c2ecf20Sopenharmony_ci unsigned int physical_sector_size; 24308c2ecf20Sopenharmony_ci unsigned int binfo; 24318c2ecf20Sopenharmony_ci int err, i; 24328c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo; 24338c2ecf20Sopenharmony_ci 24348c2ecf20Sopenharmony_ci switch (info->connected) { 24358c2ecf20Sopenharmony_ci case BLKIF_STATE_CONNECTED: 24368c2ecf20Sopenharmony_ci /* 24378c2ecf20Sopenharmony_ci * Potentially, the back-end may be signalling 24388c2ecf20Sopenharmony_ci * a capacity change; update the capacity. 24398c2ecf20Sopenharmony_ci */ 24408c2ecf20Sopenharmony_ci err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, 24418c2ecf20Sopenharmony_ci "sectors", "%Lu", §ors); 24428c2ecf20Sopenharmony_ci if (XENBUS_EXIST_ERR(err)) 24438c2ecf20Sopenharmony_ci return; 24448c2ecf20Sopenharmony_ci printk(KERN_INFO "Setting capacity to %Lu\n", 24458c2ecf20Sopenharmony_ci sectors); 24468c2ecf20Sopenharmony_ci set_capacity_revalidate_and_notify(info->gd, sectors, true); 24478c2ecf20Sopenharmony_ci 24488c2ecf20Sopenharmony_ci return; 24498c2ecf20Sopenharmony_ci case BLKIF_STATE_SUSPENDED: 24508c2ecf20Sopenharmony_ci /* 24518c2ecf20Sopenharmony_ci * If we are recovering from suspension, we need to wait 24528c2ecf20Sopenharmony_ci * for the backend to announce it's features before 24538c2ecf20Sopenharmony_ci * reconnecting, at least we need to know if the backend 24548c2ecf20Sopenharmony_ci * supports indirect descriptors, and how many. 24558c2ecf20Sopenharmony_ci */ 24568c2ecf20Sopenharmony_ci blkif_recover(info); 24578c2ecf20Sopenharmony_ci return; 24588c2ecf20Sopenharmony_ci 24598c2ecf20Sopenharmony_ci default: 24608c2ecf20Sopenharmony_ci break; 24618c2ecf20Sopenharmony_ci } 24628c2ecf20Sopenharmony_ci 24638c2ecf20Sopenharmony_ci dev_dbg(&info->xbdev->dev, "%s:%s.\n", 24648c2ecf20Sopenharmony_ci __func__, info->xbdev->otherend); 24658c2ecf20Sopenharmony_ci 24668c2ecf20Sopenharmony_ci err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 24678c2ecf20Sopenharmony_ci "sectors", "%llu", §ors, 24688c2ecf20Sopenharmony_ci "info", "%u", &binfo, 24698c2ecf20Sopenharmony_ci "sector-size", "%lu", §or_size, 24708c2ecf20Sopenharmony_ci NULL); 24718c2ecf20Sopenharmony_ci if (err) { 24728c2ecf20Sopenharmony_ci xenbus_dev_fatal(info->xbdev, err, 24738c2ecf20Sopenharmony_ci "reading backend fields at %s", 24748c2ecf20Sopenharmony_ci info->xbdev->otherend); 24758c2ecf20Sopenharmony_ci return; 24768c2ecf20Sopenharmony_ci } 24778c2ecf20Sopenharmony_ci 24788c2ecf20Sopenharmony_ci /* 24798c2ecf20Sopenharmony_ci * physcial-sector-size is a newer field, so old backends may not 24808c2ecf20Sopenharmony_ci * provide this. Assume physical sector size to be the same as 24818c2ecf20Sopenharmony_ci * sector_size in that case. 24828c2ecf20Sopenharmony_ci */ 24838c2ecf20Sopenharmony_ci physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend, 24848c2ecf20Sopenharmony_ci "physical-sector-size", 24858c2ecf20Sopenharmony_ci sector_size); 24868c2ecf20Sopenharmony_ci blkfront_gather_backend_features(info); 24878c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, i) { 24888c2ecf20Sopenharmony_ci err = blkfront_setup_indirect(rinfo); 24898c2ecf20Sopenharmony_ci if (err) { 24908c2ecf20Sopenharmony_ci xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", 24918c2ecf20Sopenharmony_ci info->xbdev->otherend); 24928c2ecf20Sopenharmony_ci blkif_free(info, 0); 24938c2ecf20Sopenharmony_ci break; 24948c2ecf20Sopenharmony_ci } 24958c2ecf20Sopenharmony_ci } 24968c2ecf20Sopenharmony_ci 24978c2ecf20Sopenharmony_ci err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size, 24988c2ecf20Sopenharmony_ci physical_sector_size); 24998c2ecf20Sopenharmony_ci if (err) { 25008c2ecf20Sopenharmony_ci xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 25018c2ecf20Sopenharmony_ci info->xbdev->otherend); 25028c2ecf20Sopenharmony_ci goto fail; 25038c2ecf20Sopenharmony_ci } 25048c2ecf20Sopenharmony_ci 25058c2ecf20Sopenharmony_ci xenbus_switch_state(info->xbdev, XenbusStateConnected); 25068c2ecf20Sopenharmony_ci 25078c2ecf20Sopenharmony_ci /* Kick pending requests. */ 25088c2ecf20Sopenharmony_ci info->connected = BLKIF_STATE_CONNECTED; 25098c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, i) 25108c2ecf20Sopenharmony_ci kick_pending_request_queues(rinfo); 25118c2ecf20Sopenharmony_ci 25128c2ecf20Sopenharmony_ci device_add_disk(&info->xbdev->dev, info->gd, NULL); 25138c2ecf20Sopenharmony_ci 25148c2ecf20Sopenharmony_ci info->is_ready = 1; 25158c2ecf20Sopenharmony_ci return; 25168c2ecf20Sopenharmony_ci 25178c2ecf20Sopenharmony_cifail: 25188c2ecf20Sopenharmony_ci blkif_free(info, 0); 25198c2ecf20Sopenharmony_ci return; 25208c2ecf20Sopenharmony_ci} 25218c2ecf20Sopenharmony_ci 25228c2ecf20Sopenharmony_ci/** 25238c2ecf20Sopenharmony_ci * Callback received when the backend's state changes. 25248c2ecf20Sopenharmony_ci */ 25258c2ecf20Sopenharmony_cistatic void blkback_changed(struct xenbus_device *dev, 25268c2ecf20Sopenharmony_ci enum xenbus_state backend_state) 25278c2ecf20Sopenharmony_ci{ 25288c2ecf20Sopenharmony_ci struct blkfront_info *info = dev_get_drvdata(&dev->dev); 25298c2ecf20Sopenharmony_ci 25308c2ecf20Sopenharmony_ci dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state); 25318c2ecf20Sopenharmony_ci 25328c2ecf20Sopenharmony_ci switch (backend_state) { 25338c2ecf20Sopenharmony_ci case XenbusStateInitWait: 25348c2ecf20Sopenharmony_ci if (dev->state != XenbusStateInitialising) 25358c2ecf20Sopenharmony_ci break; 25368c2ecf20Sopenharmony_ci if (talk_to_blkback(dev, info)) 25378c2ecf20Sopenharmony_ci break; 25388c2ecf20Sopenharmony_ci case XenbusStateInitialising: 25398c2ecf20Sopenharmony_ci case XenbusStateInitialised: 25408c2ecf20Sopenharmony_ci case XenbusStateReconfiguring: 25418c2ecf20Sopenharmony_ci case XenbusStateReconfigured: 25428c2ecf20Sopenharmony_ci case XenbusStateUnknown: 25438c2ecf20Sopenharmony_ci break; 25448c2ecf20Sopenharmony_ci 25458c2ecf20Sopenharmony_ci case XenbusStateConnected: 25468c2ecf20Sopenharmony_ci /* 25478c2ecf20Sopenharmony_ci * talk_to_blkback sets state to XenbusStateInitialised 25488c2ecf20Sopenharmony_ci * and blkfront_connect sets it to XenbusStateConnected 25498c2ecf20Sopenharmony_ci * (if connection went OK). 25508c2ecf20Sopenharmony_ci * 25518c2ecf20Sopenharmony_ci * If the backend (or toolstack) decides to poke at backend 25528c2ecf20Sopenharmony_ci * state (and re-trigger the watch by setting the state repeatedly 25538c2ecf20Sopenharmony_ci * to XenbusStateConnected (4)) we need to deal with this. 25548c2ecf20Sopenharmony_ci * This is allowed as this is used to communicate to the guest 25558c2ecf20Sopenharmony_ci * that the size of disk has changed! 25568c2ecf20Sopenharmony_ci */ 25578c2ecf20Sopenharmony_ci if ((dev->state != XenbusStateInitialised) && 25588c2ecf20Sopenharmony_ci (dev->state != XenbusStateConnected)) { 25598c2ecf20Sopenharmony_ci if (talk_to_blkback(dev, info)) 25608c2ecf20Sopenharmony_ci break; 25618c2ecf20Sopenharmony_ci } 25628c2ecf20Sopenharmony_ci 25638c2ecf20Sopenharmony_ci blkfront_connect(info); 25648c2ecf20Sopenharmony_ci break; 25658c2ecf20Sopenharmony_ci 25668c2ecf20Sopenharmony_ci case XenbusStateClosed: 25678c2ecf20Sopenharmony_ci if (dev->state == XenbusStateClosed) 25688c2ecf20Sopenharmony_ci break; 25698c2ecf20Sopenharmony_ci fallthrough; 25708c2ecf20Sopenharmony_ci case XenbusStateClosing: 25718c2ecf20Sopenharmony_ci if (info) 25728c2ecf20Sopenharmony_ci blkfront_closing(info); 25738c2ecf20Sopenharmony_ci break; 25748c2ecf20Sopenharmony_ci } 25758c2ecf20Sopenharmony_ci} 25768c2ecf20Sopenharmony_ci 25778c2ecf20Sopenharmony_cistatic int blkfront_remove(struct xenbus_device *xbdev) 25788c2ecf20Sopenharmony_ci{ 25798c2ecf20Sopenharmony_ci struct blkfront_info *info = dev_get_drvdata(&xbdev->dev); 25808c2ecf20Sopenharmony_ci struct block_device *bdev = NULL; 25818c2ecf20Sopenharmony_ci struct gendisk *disk; 25828c2ecf20Sopenharmony_ci 25838c2ecf20Sopenharmony_ci dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename); 25848c2ecf20Sopenharmony_ci 25858c2ecf20Sopenharmony_ci if (!info) 25868c2ecf20Sopenharmony_ci return 0; 25878c2ecf20Sopenharmony_ci 25888c2ecf20Sopenharmony_ci blkif_free(info, 0); 25898c2ecf20Sopenharmony_ci 25908c2ecf20Sopenharmony_ci mutex_lock(&info->mutex); 25918c2ecf20Sopenharmony_ci 25928c2ecf20Sopenharmony_ci disk = info->gd; 25938c2ecf20Sopenharmony_ci if (disk) 25948c2ecf20Sopenharmony_ci bdev = bdget_disk(disk, 0); 25958c2ecf20Sopenharmony_ci 25968c2ecf20Sopenharmony_ci info->xbdev = NULL; 25978c2ecf20Sopenharmony_ci mutex_unlock(&info->mutex); 25988c2ecf20Sopenharmony_ci 25998c2ecf20Sopenharmony_ci if (!bdev) { 26008c2ecf20Sopenharmony_ci mutex_lock(&blkfront_mutex); 26018c2ecf20Sopenharmony_ci free_info(info); 26028c2ecf20Sopenharmony_ci mutex_unlock(&blkfront_mutex); 26038c2ecf20Sopenharmony_ci return 0; 26048c2ecf20Sopenharmony_ci } 26058c2ecf20Sopenharmony_ci 26068c2ecf20Sopenharmony_ci /* 26078c2ecf20Sopenharmony_ci * The xbdev was removed before we reached the Closed 26088c2ecf20Sopenharmony_ci * state. See if it's safe to remove the disk. If the bdev 26098c2ecf20Sopenharmony_ci * isn't closed yet, we let release take care of it. 26108c2ecf20Sopenharmony_ci */ 26118c2ecf20Sopenharmony_ci 26128c2ecf20Sopenharmony_ci mutex_lock(&bdev->bd_mutex); 26138c2ecf20Sopenharmony_ci info = disk->private_data; 26148c2ecf20Sopenharmony_ci 26158c2ecf20Sopenharmony_ci dev_warn(disk_to_dev(disk), 26168c2ecf20Sopenharmony_ci "%s was hot-unplugged, %d stale handles\n", 26178c2ecf20Sopenharmony_ci xbdev->nodename, bdev->bd_openers); 26188c2ecf20Sopenharmony_ci 26198c2ecf20Sopenharmony_ci if (info && !bdev->bd_openers) { 26208c2ecf20Sopenharmony_ci xlvbd_release_gendisk(info); 26218c2ecf20Sopenharmony_ci disk->private_data = NULL; 26228c2ecf20Sopenharmony_ci mutex_lock(&blkfront_mutex); 26238c2ecf20Sopenharmony_ci free_info(info); 26248c2ecf20Sopenharmony_ci mutex_unlock(&blkfront_mutex); 26258c2ecf20Sopenharmony_ci } 26268c2ecf20Sopenharmony_ci 26278c2ecf20Sopenharmony_ci mutex_unlock(&bdev->bd_mutex); 26288c2ecf20Sopenharmony_ci bdput(bdev); 26298c2ecf20Sopenharmony_ci 26308c2ecf20Sopenharmony_ci return 0; 26318c2ecf20Sopenharmony_ci} 26328c2ecf20Sopenharmony_ci 26338c2ecf20Sopenharmony_cistatic int blkfront_is_ready(struct xenbus_device *dev) 26348c2ecf20Sopenharmony_ci{ 26358c2ecf20Sopenharmony_ci struct blkfront_info *info = dev_get_drvdata(&dev->dev); 26368c2ecf20Sopenharmony_ci 26378c2ecf20Sopenharmony_ci return info->is_ready && info->xbdev; 26388c2ecf20Sopenharmony_ci} 26398c2ecf20Sopenharmony_ci 26408c2ecf20Sopenharmony_cistatic int blkif_open(struct block_device *bdev, fmode_t mode) 26418c2ecf20Sopenharmony_ci{ 26428c2ecf20Sopenharmony_ci struct gendisk *disk = bdev->bd_disk; 26438c2ecf20Sopenharmony_ci struct blkfront_info *info; 26448c2ecf20Sopenharmony_ci int err = 0; 26458c2ecf20Sopenharmony_ci 26468c2ecf20Sopenharmony_ci mutex_lock(&blkfront_mutex); 26478c2ecf20Sopenharmony_ci 26488c2ecf20Sopenharmony_ci info = disk->private_data; 26498c2ecf20Sopenharmony_ci if (!info) { 26508c2ecf20Sopenharmony_ci /* xbdev gone */ 26518c2ecf20Sopenharmony_ci err = -ERESTARTSYS; 26528c2ecf20Sopenharmony_ci goto out; 26538c2ecf20Sopenharmony_ci } 26548c2ecf20Sopenharmony_ci 26558c2ecf20Sopenharmony_ci mutex_lock(&info->mutex); 26568c2ecf20Sopenharmony_ci 26578c2ecf20Sopenharmony_ci if (!info->gd) 26588c2ecf20Sopenharmony_ci /* xbdev is closed */ 26598c2ecf20Sopenharmony_ci err = -ERESTARTSYS; 26608c2ecf20Sopenharmony_ci 26618c2ecf20Sopenharmony_ci mutex_unlock(&info->mutex); 26628c2ecf20Sopenharmony_ci 26638c2ecf20Sopenharmony_ciout: 26648c2ecf20Sopenharmony_ci mutex_unlock(&blkfront_mutex); 26658c2ecf20Sopenharmony_ci return err; 26668c2ecf20Sopenharmony_ci} 26678c2ecf20Sopenharmony_ci 26688c2ecf20Sopenharmony_cistatic void blkif_release(struct gendisk *disk, fmode_t mode) 26698c2ecf20Sopenharmony_ci{ 26708c2ecf20Sopenharmony_ci struct blkfront_info *info = disk->private_data; 26718c2ecf20Sopenharmony_ci struct block_device *bdev; 26728c2ecf20Sopenharmony_ci struct xenbus_device *xbdev; 26738c2ecf20Sopenharmony_ci 26748c2ecf20Sopenharmony_ci mutex_lock(&blkfront_mutex); 26758c2ecf20Sopenharmony_ci 26768c2ecf20Sopenharmony_ci bdev = bdget_disk(disk, 0); 26778c2ecf20Sopenharmony_ci 26788c2ecf20Sopenharmony_ci if (!bdev) { 26798c2ecf20Sopenharmony_ci WARN(1, "Block device %s yanked out from us!\n", disk->disk_name); 26808c2ecf20Sopenharmony_ci goto out_mutex; 26818c2ecf20Sopenharmony_ci } 26828c2ecf20Sopenharmony_ci if (bdev->bd_openers) 26838c2ecf20Sopenharmony_ci goto out; 26848c2ecf20Sopenharmony_ci 26858c2ecf20Sopenharmony_ci /* 26868c2ecf20Sopenharmony_ci * Check if we have been instructed to close. We will have 26878c2ecf20Sopenharmony_ci * deferred this request, because the bdev was still open. 26888c2ecf20Sopenharmony_ci */ 26898c2ecf20Sopenharmony_ci 26908c2ecf20Sopenharmony_ci mutex_lock(&info->mutex); 26918c2ecf20Sopenharmony_ci xbdev = info->xbdev; 26928c2ecf20Sopenharmony_ci 26938c2ecf20Sopenharmony_ci if (xbdev && xbdev->state == XenbusStateClosing) { 26948c2ecf20Sopenharmony_ci /* pending switch to state closed */ 26958c2ecf20Sopenharmony_ci dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); 26968c2ecf20Sopenharmony_ci xlvbd_release_gendisk(info); 26978c2ecf20Sopenharmony_ci xenbus_frontend_closed(info->xbdev); 26988c2ecf20Sopenharmony_ci } 26998c2ecf20Sopenharmony_ci 27008c2ecf20Sopenharmony_ci mutex_unlock(&info->mutex); 27018c2ecf20Sopenharmony_ci 27028c2ecf20Sopenharmony_ci if (!xbdev) { 27038c2ecf20Sopenharmony_ci /* sudden device removal */ 27048c2ecf20Sopenharmony_ci dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); 27058c2ecf20Sopenharmony_ci xlvbd_release_gendisk(info); 27068c2ecf20Sopenharmony_ci disk->private_data = NULL; 27078c2ecf20Sopenharmony_ci free_info(info); 27088c2ecf20Sopenharmony_ci } 27098c2ecf20Sopenharmony_ci 27108c2ecf20Sopenharmony_ciout: 27118c2ecf20Sopenharmony_ci bdput(bdev); 27128c2ecf20Sopenharmony_ciout_mutex: 27138c2ecf20Sopenharmony_ci mutex_unlock(&blkfront_mutex); 27148c2ecf20Sopenharmony_ci} 27158c2ecf20Sopenharmony_ci 27168c2ecf20Sopenharmony_cistatic const struct block_device_operations xlvbd_block_fops = 27178c2ecf20Sopenharmony_ci{ 27188c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 27198c2ecf20Sopenharmony_ci .open = blkif_open, 27208c2ecf20Sopenharmony_ci .release = blkif_release, 27218c2ecf20Sopenharmony_ci .getgeo = blkif_getgeo, 27228c2ecf20Sopenharmony_ci .ioctl = blkif_ioctl, 27238c2ecf20Sopenharmony_ci .compat_ioctl = blkdev_compat_ptr_ioctl, 27248c2ecf20Sopenharmony_ci}; 27258c2ecf20Sopenharmony_ci 27268c2ecf20Sopenharmony_ci 27278c2ecf20Sopenharmony_cistatic const struct xenbus_device_id blkfront_ids[] = { 27288c2ecf20Sopenharmony_ci { "vbd" }, 27298c2ecf20Sopenharmony_ci { "" } 27308c2ecf20Sopenharmony_ci}; 27318c2ecf20Sopenharmony_ci 27328c2ecf20Sopenharmony_cistatic struct xenbus_driver blkfront_driver = { 27338c2ecf20Sopenharmony_ci .ids = blkfront_ids, 27348c2ecf20Sopenharmony_ci .probe = blkfront_probe, 27358c2ecf20Sopenharmony_ci .remove = blkfront_remove, 27368c2ecf20Sopenharmony_ci .resume = blkfront_resume, 27378c2ecf20Sopenharmony_ci .otherend_changed = blkback_changed, 27388c2ecf20Sopenharmony_ci .is_ready = blkfront_is_ready, 27398c2ecf20Sopenharmony_ci}; 27408c2ecf20Sopenharmony_ci 27418c2ecf20Sopenharmony_cistatic void purge_persistent_grants(struct blkfront_info *info) 27428c2ecf20Sopenharmony_ci{ 27438c2ecf20Sopenharmony_ci unsigned int i; 27448c2ecf20Sopenharmony_ci unsigned long flags; 27458c2ecf20Sopenharmony_ci struct blkfront_ring_info *rinfo; 27468c2ecf20Sopenharmony_ci 27478c2ecf20Sopenharmony_ci for_each_rinfo(info, rinfo, i) { 27488c2ecf20Sopenharmony_ci struct grant *gnt_list_entry, *tmp; 27498c2ecf20Sopenharmony_ci 27508c2ecf20Sopenharmony_ci spin_lock_irqsave(&rinfo->ring_lock, flags); 27518c2ecf20Sopenharmony_ci 27528c2ecf20Sopenharmony_ci if (rinfo->persistent_gnts_c == 0) { 27538c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rinfo->ring_lock, flags); 27548c2ecf20Sopenharmony_ci continue; 27558c2ecf20Sopenharmony_ci } 27568c2ecf20Sopenharmony_ci 27578c2ecf20Sopenharmony_ci list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants, 27588c2ecf20Sopenharmony_ci node) { 27598c2ecf20Sopenharmony_ci if (gnt_list_entry->gref == GRANT_INVALID_REF || 27608c2ecf20Sopenharmony_ci !gnttab_try_end_foreign_access(gnt_list_entry->gref)) 27618c2ecf20Sopenharmony_ci continue; 27628c2ecf20Sopenharmony_ci 27638c2ecf20Sopenharmony_ci list_del(&gnt_list_entry->node); 27648c2ecf20Sopenharmony_ci rinfo->persistent_gnts_c--; 27658c2ecf20Sopenharmony_ci gnt_list_entry->gref = GRANT_INVALID_REF; 27668c2ecf20Sopenharmony_ci list_add_tail(&gnt_list_entry->node, &rinfo->grants); 27678c2ecf20Sopenharmony_ci } 27688c2ecf20Sopenharmony_ci 27698c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rinfo->ring_lock, flags); 27708c2ecf20Sopenharmony_ci } 27718c2ecf20Sopenharmony_ci} 27728c2ecf20Sopenharmony_ci 27738c2ecf20Sopenharmony_cistatic void blkfront_delay_work(struct work_struct *work) 27748c2ecf20Sopenharmony_ci{ 27758c2ecf20Sopenharmony_ci struct blkfront_info *info; 27768c2ecf20Sopenharmony_ci bool need_schedule_work = false; 27778c2ecf20Sopenharmony_ci 27788c2ecf20Sopenharmony_ci /* 27798c2ecf20Sopenharmony_ci * Note that when using bounce buffers but not persistent grants 27808c2ecf20Sopenharmony_ci * there's no need to run blkfront_delay_work because grants are 27818c2ecf20Sopenharmony_ci * revoked in blkif_completion or else an error is reported and the 27828c2ecf20Sopenharmony_ci * connection is closed. 27838c2ecf20Sopenharmony_ci */ 27848c2ecf20Sopenharmony_ci 27858c2ecf20Sopenharmony_ci mutex_lock(&blkfront_mutex); 27868c2ecf20Sopenharmony_ci 27878c2ecf20Sopenharmony_ci list_for_each_entry(info, &info_list, info_list) { 27888c2ecf20Sopenharmony_ci if (info->feature_persistent) { 27898c2ecf20Sopenharmony_ci need_schedule_work = true; 27908c2ecf20Sopenharmony_ci mutex_lock(&info->mutex); 27918c2ecf20Sopenharmony_ci purge_persistent_grants(info); 27928c2ecf20Sopenharmony_ci mutex_unlock(&info->mutex); 27938c2ecf20Sopenharmony_ci } 27948c2ecf20Sopenharmony_ci } 27958c2ecf20Sopenharmony_ci 27968c2ecf20Sopenharmony_ci if (need_schedule_work) 27978c2ecf20Sopenharmony_ci schedule_delayed_work(&blkfront_work, HZ * 10); 27988c2ecf20Sopenharmony_ci 27998c2ecf20Sopenharmony_ci mutex_unlock(&blkfront_mutex); 28008c2ecf20Sopenharmony_ci} 28018c2ecf20Sopenharmony_ci 28028c2ecf20Sopenharmony_cistatic int __init xlblk_init(void) 28038c2ecf20Sopenharmony_ci{ 28048c2ecf20Sopenharmony_ci int ret; 28058c2ecf20Sopenharmony_ci int nr_cpus = num_online_cpus(); 28068c2ecf20Sopenharmony_ci 28078c2ecf20Sopenharmony_ci if (!xen_domain()) 28088c2ecf20Sopenharmony_ci return -ENODEV; 28098c2ecf20Sopenharmony_ci 28108c2ecf20Sopenharmony_ci if (!xen_has_pv_disk_devices()) 28118c2ecf20Sopenharmony_ci return -ENODEV; 28128c2ecf20Sopenharmony_ci 28138c2ecf20Sopenharmony_ci if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { 28148c2ecf20Sopenharmony_ci pr_warn("xen_blk: can't get major %d with name %s\n", 28158c2ecf20Sopenharmony_ci XENVBD_MAJOR, DEV_NAME); 28168c2ecf20Sopenharmony_ci return -ENODEV; 28178c2ecf20Sopenharmony_ci } 28188c2ecf20Sopenharmony_ci 28198c2ecf20Sopenharmony_ci if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) 28208c2ecf20Sopenharmony_ci xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; 28218c2ecf20Sopenharmony_ci 28228c2ecf20Sopenharmony_ci if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) { 28238c2ecf20Sopenharmony_ci pr_info("Invalid max_ring_order (%d), will use default max: %d.\n", 28248c2ecf20Sopenharmony_ci xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER); 28258c2ecf20Sopenharmony_ci xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER; 28268c2ecf20Sopenharmony_ci } 28278c2ecf20Sopenharmony_ci 28288c2ecf20Sopenharmony_ci if (xen_blkif_max_queues > nr_cpus) { 28298c2ecf20Sopenharmony_ci pr_info("Invalid max_queues (%d), will use default max: %d.\n", 28308c2ecf20Sopenharmony_ci xen_blkif_max_queues, nr_cpus); 28318c2ecf20Sopenharmony_ci xen_blkif_max_queues = nr_cpus; 28328c2ecf20Sopenharmony_ci } 28338c2ecf20Sopenharmony_ci 28348c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work); 28358c2ecf20Sopenharmony_ci 28368c2ecf20Sopenharmony_ci ret = xenbus_register_frontend(&blkfront_driver); 28378c2ecf20Sopenharmony_ci if (ret) { 28388c2ecf20Sopenharmony_ci unregister_blkdev(XENVBD_MAJOR, DEV_NAME); 28398c2ecf20Sopenharmony_ci return ret; 28408c2ecf20Sopenharmony_ci } 28418c2ecf20Sopenharmony_ci 28428c2ecf20Sopenharmony_ci return 0; 28438c2ecf20Sopenharmony_ci} 28448c2ecf20Sopenharmony_cimodule_init(xlblk_init); 28458c2ecf20Sopenharmony_ci 28468c2ecf20Sopenharmony_ci 28478c2ecf20Sopenharmony_cistatic void __exit xlblk_exit(void) 28488c2ecf20Sopenharmony_ci{ 28498c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&blkfront_work); 28508c2ecf20Sopenharmony_ci 28518c2ecf20Sopenharmony_ci xenbus_unregister_driver(&blkfront_driver); 28528c2ecf20Sopenharmony_ci unregister_blkdev(XENVBD_MAJOR, DEV_NAME); 28538c2ecf20Sopenharmony_ci kfree(minors); 28548c2ecf20Sopenharmony_ci} 28558c2ecf20Sopenharmony_cimodule_exit(xlblk_exit); 28568c2ecf20Sopenharmony_ci 28578c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Xen virtual block device frontend"); 28588c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 28598c2ecf20Sopenharmony_ciMODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); 28608c2ecf20Sopenharmony_ciMODULE_ALIAS("xen:vbd"); 28618c2ecf20Sopenharmony_ciMODULE_ALIAS("xenblk"); 2862