18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * blkfront.c
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * XenLinux virtual block device driver.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
78c2ecf20Sopenharmony_ci * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
88c2ecf20Sopenharmony_ci * Copyright (c) 2004, Christian Limpach
98c2ecf20Sopenharmony_ci * Copyright (c) 2004, Andrew Warfield
108c2ecf20Sopenharmony_ci * Copyright (c) 2005, Christopher Clark
118c2ecf20Sopenharmony_ci * Copyright (c) 2005, XenSource Ltd
128c2ecf20Sopenharmony_ci *
138c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or
148c2ecf20Sopenharmony_ci * modify it under the terms of the GNU General Public License version 2
158c2ecf20Sopenharmony_ci * as published by the Free Software Foundation; or, when distributed
168c2ecf20Sopenharmony_ci * separately from the Linux kernel or incorporated into other
178c2ecf20Sopenharmony_ci * software packages, subject to the following license:
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a copy
208c2ecf20Sopenharmony_ci * of this source file (the "Software"), to deal in the Software without
218c2ecf20Sopenharmony_ci * restriction, including without limitation the rights to use, copy, modify,
228c2ecf20Sopenharmony_ci * merge, publish, distribute, sublicense, and/or sell copies of the Software,
238c2ecf20Sopenharmony_ci * and to permit persons to whom the Software is furnished to do so, subject to
248c2ecf20Sopenharmony_ci * the following conditions:
258c2ecf20Sopenharmony_ci *
268c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
278c2ecf20Sopenharmony_ci * all copies or substantial portions of the Software.
288c2ecf20Sopenharmony_ci *
298c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
308c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
318c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
328c2ecf20Sopenharmony_ci * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
338c2ecf20Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
348c2ecf20Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
358c2ecf20Sopenharmony_ci * IN THE SOFTWARE.
368c2ecf20Sopenharmony_ci */
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci#include <linux/interrupt.h>
398c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
408c2ecf20Sopenharmony_ci#include <linux/blk-mq.h>
418c2ecf20Sopenharmony_ci#include <linux/hdreg.h>
428c2ecf20Sopenharmony_ci#include <linux/cdrom.h>
438c2ecf20Sopenharmony_ci#include <linux/module.h>
448c2ecf20Sopenharmony_ci#include <linux/slab.h>
458c2ecf20Sopenharmony_ci#include <linux/mutex.h>
468c2ecf20Sopenharmony_ci#include <linux/scatterlist.h>
478c2ecf20Sopenharmony_ci#include <linux/bitmap.h>
488c2ecf20Sopenharmony_ci#include <linux/list.h>
498c2ecf20Sopenharmony_ci#include <linux/workqueue.h>
508c2ecf20Sopenharmony_ci#include <linux/sched/mm.h>
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci#include <xen/xen.h>
538c2ecf20Sopenharmony_ci#include <xen/xenbus.h>
548c2ecf20Sopenharmony_ci#include <xen/grant_table.h>
558c2ecf20Sopenharmony_ci#include <xen/events.h>
568c2ecf20Sopenharmony_ci#include <xen/page.h>
578c2ecf20Sopenharmony_ci#include <xen/platform_pci.h>
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci#include <xen/interface/grant_table.h>
608c2ecf20Sopenharmony_ci#include <xen/interface/io/blkif.h>
618c2ecf20Sopenharmony_ci#include <xen/interface/io/protocols.h>
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci#include <asm/xen/hypervisor.h>
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci/*
668c2ecf20Sopenharmony_ci * The minimal size of segment supported by the block framework is PAGE_SIZE.
678c2ecf20Sopenharmony_ci * When Linux is using a different page size than Xen, it may not be possible
688c2ecf20Sopenharmony_ci * to put all the data in a single segment.
698c2ecf20Sopenharmony_ci * This can happen when the backend doesn't support indirect descriptor and
708c2ecf20Sopenharmony_ci * therefore the maximum amount of data that a request can carry is
718c2ecf20Sopenharmony_ci * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE = 44KB
728c2ecf20Sopenharmony_ci *
738c2ecf20Sopenharmony_ci * Note that we only support one extra request. So the Linux page size
748c2ecf20Sopenharmony_ci * should be <= ( 2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) =
758c2ecf20Sopenharmony_ci * 88KB.
768c2ecf20Sopenharmony_ci */
778c2ecf20Sopenharmony_ci#define HAS_EXTRA_REQ (BLKIF_MAX_SEGMENTS_PER_REQUEST < XEN_PFN_PER_PAGE)
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_cienum blkif_state {
808c2ecf20Sopenharmony_ci	BLKIF_STATE_DISCONNECTED,
818c2ecf20Sopenharmony_ci	BLKIF_STATE_CONNECTED,
828c2ecf20Sopenharmony_ci	BLKIF_STATE_SUSPENDED,
838c2ecf20Sopenharmony_ci	BLKIF_STATE_ERROR,
848c2ecf20Sopenharmony_ci};
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_cistruct grant {
878c2ecf20Sopenharmony_ci	grant_ref_t gref;
888c2ecf20Sopenharmony_ci	struct page *page;
898c2ecf20Sopenharmony_ci	struct list_head node;
908c2ecf20Sopenharmony_ci};
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_cienum blk_req_status {
938c2ecf20Sopenharmony_ci	REQ_PROCESSING,
948c2ecf20Sopenharmony_ci	REQ_WAITING,
958c2ecf20Sopenharmony_ci	REQ_DONE,
968c2ecf20Sopenharmony_ci	REQ_ERROR,
978c2ecf20Sopenharmony_ci	REQ_EOPNOTSUPP,
988c2ecf20Sopenharmony_ci};
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_cistruct blk_shadow {
1018c2ecf20Sopenharmony_ci	struct blkif_request req;
1028c2ecf20Sopenharmony_ci	struct request *request;
1038c2ecf20Sopenharmony_ci	struct grant **grants_used;
1048c2ecf20Sopenharmony_ci	struct grant **indirect_grants;
1058c2ecf20Sopenharmony_ci	struct scatterlist *sg;
1068c2ecf20Sopenharmony_ci	unsigned int num_sg;
1078c2ecf20Sopenharmony_ci	enum blk_req_status status;
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci	#define NO_ASSOCIATED_ID ~0UL
1108c2ecf20Sopenharmony_ci	/*
1118c2ecf20Sopenharmony_ci	 * Id of the sibling if we ever need 2 requests when handling a
1128c2ecf20Sopenharmony_ci	 * block I/O request
1138c2ecf20Sopenharmony_ci	 */
1148c2ecf20Sopenharmony_ci	unsigned long associated_id;
1158c2ecf20Sopenharmony_ci};
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_cistruct blkif_req {
1188c2ecf20Sopenharmony_ci	blk_status_t	error;
1198c2ecf20Sopenharmony_ci};
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_cistatic inline struct blkif_req *blkif_req(struct request *rq)
1228c2ecf20Sopenharmony_ci{
1238c2ecf20Sopenharmony_ci	return blk_mq_rq_to_pdu(rq);
1248c2ecf20Sopenharmony_ci}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(blkfront_mutex);
1278c2ecf20Sopenharmony_cistatic const struct block_device_operations xlvbd_block_fops;
1288c2ecf20Sopenharmony_cistatic struct delayed_work blkfront_work;
1298c2ecf20Sopenharmony_cistatic LIST_HEAD(info_list);
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci/*
1328c2ecf20Sopenharmony_ci * Maximum number of segments in indirect requests, the actual value used by
1338c2ecf20Sopenharmony_ci * the frontend driver is the minimum of this value and the value provided
1348c2ecf20Sopenharmony_ci * by the backend driver.
1358c2ecf20Sopenharmony_ci */
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_cistatic unsigned int xen_blkif_max_segments = 32;
1388c2ecf20Sopenharmony_cimodule_param_named(max_indirect_segments, xen_blkif_max_segments, uint, 0444);
1398c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_indirect_segments,
1408c2ecf20Sopenharmony_ci		 "Maximum amount of segments in indirect requests (default is 32)");
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_cistatic unsigned int xen_blkif_max_queues = 4;
1438c2ecf20Sopenharmony_cimodule_param_named(max_queues, xen_blkif_max_queues, uint, 0444);
1448c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk");
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci/*
1478c2ecf20Sopenharmony_ci * Maximum order of pages to be used for the shared ring between front and
1488c2ecf20Sopenharmony_ci * backend, 4KB page granularity is used.
1498c2ecf20Sopenharmony_ci */
1508c2ecf20Sopenharmony_cistatic unsigned int xen_blkif_max_ring_order;
1518c2ecf20Sopenharmony_cimodule_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
1528c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_cistatic bool __read_mostly xen_blkif_trusted = true;
1558c2ecf20Sopenharmony_cimodule_param_named(trusted, xen_blkif_trusted, bool, 0644);
1568c2ecf20Sopenharmony_ciMODULE_PARM_DESC(trusted, "Is the backend trusted");
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci#define BLK_RING_SIZE(info)	\
1598c2ecf20Sopenharmony_ci	__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci/*
1628c2ecf20Sopenharmony_ci * ring-ref%u i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
1638c2ecf20Sopenharmony_ci * characters are enough. Define to 20 to keep consistent with backend.
1648c2ecf20Sopenharmony_ci */
1658c2ecf20Sopenharmony_ci#define RINGREF_NAME_LEN (20)
1668c2ecf20Sopenharmony_ci/*
1678c2ecf20Sopenharmony_ci * queue-%u would take 7 + 10(UINT_MAX) = 17 characters.
1688c2ecf20Sopenharmony_ci */
1698c2ecf20Sopenharmony_ci#define QUEUE_NAME_LEN (17)
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci/*
1728c2ecf20Sopenharmony_ci *  Per-ring info.
1738c2ecf20Sopenharmony_ci *  Every blkfront device can associate with one or more blkfront_ring_info,
1748c2ecf20Sopenharmony_ci *  depending on how many hardware queues/rings to be used.
1758c2ecf20Sopenharmony_ci */
1768c2ecf20Sopenharmony_cistruct blkfront_ring_info {
1778c2ecf20Sopenharmony_ci	/* Lock to protect data in every ring buffer. */
1788c2ecf20Sopenharmony_ci	spinlock_t ring_lock;
1798c2ecf20Sopenharmony_ci	struct blkif_front_ring ring;
1808c2ecf20Sopenharmony_ci	unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
1818c2ecf20Sopenharmony_ci	unsigned int evtchn, irq;
1828c2ecf20Sopenharmony_ci	struct work_struct work;
1838c2ecf20Sopenharmony_ci	struct gnttab_free_callback callback;
1848c2ecf20Sopenharmony_ci	struct list_head indirect_pages;
1858c2ecf20Sopenharmony_ci	struct list_head grants;
1868c2ecf20Sopenharmony_ci	unsigned int persistent_gnts_c;
1878c2ecf20Sopenharmony_ci	unsigned long shadow_free;
1888c2ecf20Sopenharmony_ci	struct blkfront_info *dev_info;
1898c2ecf20Sopenharmony_ci	struct blk_shadow shadow[];
1908c2ecf20Sopenharmony_ci};
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci/*
1938c2ecf20Sopenharmony_ci * We have one of these per vbd, whether ide, scsi or 'other'.  They
1948c2ecf20Sopenharmony_ci * hang in private_data off the gendisk structure. We may end up
1958c2ecf20Sopenharmony_ci * putting all kinds of interesting stuff here :-)
1968c2ecf20Sopenharmony_ci */
1978c2ecf20Sopenharmony_cistruct blkfront_info
1988c2ecf20Sopenharmony_ci{
1998c2ecf20Sopenharmony_ci	struct mutex mutex;
2008c2ecf20Sopenharmony_ci	struct xenbus_device *xbdev;
2018c2ecf20Sopenharmony_ci	struct gendisk *gd;
2028c2ecf20Sopenharmony_ci	u16 sector_size;
2038c2ecf20Sopenharmony_ci	unsigned int physical_sector_size;
2048c2ecf20Sopenharmony_ci	int vdevice;
2058c2ecf20Sopenharmony_ci	blkif_vdev_t handle;
2068c2ecf20Sopenharmony_ci	enum blkif_state connected;
2078c2ecf20Sopenharmony_ci	/* Number of pages per ring buffer. */
2088c2ecf20Sopenharmony_ci	unsigned int nr_ring_pages;
2098c2ecf20Sopenharmony_ci	struct request_queue *rq;
2108c2ecf20Sopenharmony_ci	unsigned int feature_flush:1;
2118c2ecf20Sopenharmony_ci	unsigned int feature_fua:1;
2128c2ecf20Sopenharmony_ci	unsigned int feature_discard:1;
2138c2ecf20Sopenharmony_ci	unsigned int feature_secdiscard:1;
2148c2ecf20Sopenharmony_ci	/* Connect-time cached feature_persistent parameter */
2158c2ecf20Sopenharmony_ci	unsigned int feature_persistent_parm:1;
2168c2ecf20Sopenharmony_ci	/* Persistent grants feature negotiation result */
2178c2ecf20Sopenharmony_ci	unsigned int feature_persistent:1;
2188c2ecf20Sopenharmony_ci	unsigned int bounce:1;
2198c2ecf20Sopenharmony_ci	unsigned int discard_granularity;
2208c2ecf20Sopenharmony_ci	unsigned int discard_alignment;
2218c2ecf20Sopenharmony_ci	/* Number of 4KB segments handled */
2228c2ecf20Sopenharmony_ci	unsigned int max_indirect_segments;
2238c2ecf20Sopenharmony_ci	int is_ready;
2248c2ecf20Sopenharmony_ci	struct blk_mq_tag_set tag_set;
2258c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo;
2268c2ecf20Sopenharmony_ci	unsigned int nr_rings;
2278c2ecf20Sopenharmony_ci	unsigned int rinfo_size;
2288c2ecf20Sopenharmony_ci	/* Save uncomplete reqs and bios for migration. */
2298c2ecf20Sopenharmony_ci	struct list_head requests;
2308c2ecf20Sopenharmony_ci	struct bio_list bio_list;
2318c2ecf20Sopenharmony_ci	struct list_head info_list;
2328c2ecf20Sopenharmony_ci};
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_cistatic unsigned int nr_minors;
2358c2ecf20Sopenharmony_cistatic unsigned long *minors;
2368c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(minor_lock);
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci#define GRANT_INVALID_REF	0
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci#define PARTS_PER_DISK		16
2418c2ecf20Sopenharmony_ci#define PARTS_PER_EXT_DISK      256
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci#define BLKIF_MAJOR(dev) ((dev)>>8)
2448c2ecf20Sopenharmony_ci#define BLKIF_MINOR(dev) ((dev) & 0xff)
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci#define EXT_SHIFT 28
2478c2ecf20Sopenharmony_ci#define EXTENDED (1<<EXT_SHIFT)
2488c2ecf20Sopenharmony_ci#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
2498c2ecf20Sopenharmony_ci#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
2508c2ecf20Sopenharmony_ci#define EMULATED_HD_DISK_MINOR_OFFSET (0)
2518c2ecf20Sopenharmony_ci#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
2528c2ecf20Sopenharmony_ci#define EMULATED_SD_DISK_MINOR_OFFSET (0)
2538c2ecf20Sopenharmony_ci#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci#define DEV_NAME	"xvd"	/* name in /dev */
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci/*
2588c2ecf20Sopenharmony_ci * Grants are always the same size as a Xen page (i.e 4KB).
2598c2ecf20Sopenharmony_ci * A physical segment is always the same size as a Linux page.
2608c2ecf20Sopenharmony_ci * Number of grants per physical segment
2618c2ecf20Sopenharmony_ci */
2628c2ecf20Sopenharmony_ci#define GRANTS_PER_PSEG	(PAGE_SIZE / XEN_PAGE_SIZE)
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci#define GRANTS_PER_INDIRECT_FRAME \
2658c2ecf20Sopenharmony_ci	(XEN_PAGE_SIZE / sizeof(struct blkif_request_segment))
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci#define INDIRECT_GREFS(_grants)		\
2688c2ecf20Sopenharmony_ci	DIV_ROUND_UP(_grants, GRANTS_PER_INDIRECT_FRAME)
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_cistatic int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
2718c2ecf20Sopenharmony_cistatic void blkfront_gather_backend_features(struct blkfront_info *info);
2728c2ecf20Sopenharmony_cistatic int negotiate_mq(struct blkfront_info *info);
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci#define for_each_rinfo(info, ptr, idx)				\
2758c2ecf20Sopenharmony_ci	for ((ptr) = (info)->rinfo, (idx) = 0;			\
2768c2ecf20Sopenharmony_ci	     (idx) < (info)->nr_rings;				\
2778c2ecf20Sopenharmony_ci	     (idx)++, (ptr) = (void *)(ptr) + (info)->rinfo_size)
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_cistatic inline struct blkfront_ring_info *
2808c2ecf20Sopenharmony_ciget_rinfo(const struct blkfront_info *info, unsigned int i)
2818c2ecf20Sopenharmony_ci{
2828c2ecf20Sopenharmony_ci	BUG_ON(i >= info->nr_rings);
2838c2ecf20Sopenharmony_ci	return (void *)info->rinfo + i * info->rinfo_size;
2848c2ecf20Sopenharmony_ci}
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_cistatic int get_id_from_freelist(struct blkfront_ring_info *rinfo)
2878c2ecf20Sopenharmony_ci{
2888c2ecf20Sopenharmony_ci	unsigned long free = rinfo->shadow_free;
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci	BUG_ON(free >= BLK_RING_SIZE(rinfo->dev_info));
2918c2ecf20Sopenharmony_ci	rinfo->shadow_free = rinfo->shadow[free].req.u.rw.id;
2928c2ecf20Sopenharmony_ci	rinfo->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
2938c2ecf20Sopenharmony_ci	return free;
2948c2ecf20Sopenharmony_ci}
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_cistatic int add_id_to_freelist(struct blkfront_ring_info *rinfo,
2978c2ecf20Sopenharmony_ci			      unsigned long id)
2988c2ecf20Sopenharmony_ci{
2998c2ecf20Sopenharmony_ci	if (rinfo->shadow[id].req.u.rw.id != id)
3008c2ecf20Sopenharmony_ci		return -EINVAL;
3018c2ecf20Sopenharmony_ci	if (rinfo->shadow[id].request == NULL)
3028c2ecf20Sopenharmony_ci		return -EINVAL;
3038c2ecf20Sopenharmony_ci	rinfo->shadow[id].req.u.rw.id  = rinfo->shadow_free;
3048c2ecf20Sopenharmony_ci	rinfo->shadow[id].request = NULL;
3058c2ecf20Sopenharmony_ci	rinfo->shadow_free = id;
3068c2ecf20Sopenharmony_ci	return 0;
3078c2ecf20Sopenharmony_ci}
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_cistatic int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
3108c2ecf20Sopenharmony_ci{
3118c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
3128c2ecf20Sopenharmony_ci	struct page *granted_page;
3138c2ecf20Sopenharmony_ci	struct grant *gnt_list_entry, *n;
3148c2ecf20Sopenharmony_ci	int i = 0;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	while (i < num) {
3178c2ecf20Sopenharmony_ci		gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
3188c2ecf20Sopenharmony_ci		if (!gnt_list_entry)
3198c2ecf20Sopenharmony_ci			goto out_of_memory;
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci		if (info->bounce) {
3228c2ecf20Sopenharmony_ci			granted_page = alloc_page(GFP_NOIO | __GFP_ZERO);
3238c2ecf20Sopenharmony_ci			if (!granted_page) {
3248c2ecf20Sopenharmony_ci				kfree(gnt_list_entry);
3258c2ecf20Sopenharmony_ci				goto out_of_memory;
3268c2ecf20Sopenharmony_ci			}
3278c2ecf20Sopenharmony_ci			gnt_list_entry->page = granted_page;
3288c2ecf20Sopenharmony_ci		}
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ci		gnt_list_entry->gref = GRANT_INVALID_REF;
3318c2ecf20Sopenharmony_ci		list_add(&gnt_list_entry->node, &rinfo->grants);
3328c2ecf20Sopenharmony_ci		i++;
3338c2ecf20Sopenharmony_ci	}
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci	return 0;
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ciout_of_memory:
3388c2ecf20Sopenharmony_ci	list_for_each_entry_safe(gnt_list_entry, n,
3398c2ecf20Sopenharmony_ci	                         &rinfo->grants, node) {
3408c2ecf20Sopenharmony_ci		list_del(&gnt_list_entry->node);
3418c2ecf20Sopenharmony_ci		if (info->bounce)
3428c2ecf20Sopenharmony_ci			__free_page(gnt_list_entry->page);
3438c2ecf20Sopenharmony_ci		kfree(gnt_list_entry);
3448c2ecf20Sopenharmony_ci		i--;
3458c2ecf20Sopenharmony_ci	}
3468c2ecf20Sopenharmony_ci	BUG_ON(i != 0);
3478c2ecf20Sopenharmony_ci	return -ENOMEM;
3488c2ecf20Sopenharmony_ci}
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_cistatic struct grant *get_free_grant(struct blkfront_ring_info *rinfo)
3518c2ecf20Sopenharmony_ci{
3528c2ecf20Sopenharmony_ci	struct grant *gnt_list_entry;
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	BUG_ON(list_empty(&rinfo->grants));
3558c2ecf20Sopenharmony_ci	gnt_list_entry = list_first_entry(&rinfo->grants, struct grant,
3568c2ecf20Sopenharmony_ci					  node);
3578c2ecf20Sopenharmony_ci	list_del(&gnt_list_entry->node);
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci	if (gnt_list_entry->gref != GRANT_INVALID_REF)
3608c2ecf20Sopenharmony_ci		rinfo->persistent_gnts_c--;
3618c2ecf20Sopenharmony_ci
3628c2ecf20Sopenharmony_ci	return gnt_list_entry;
3638c2ecf20Sopenharmony_ci}
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_cistatic inline void grant_foreign_access(const struct grant *gnt_list_entry,
3668c2ecf20Sopenharmony_ci					const struct blkfront_info *info)
3678c2ecf20Sopenharmony_ci{
3688c2ecf20Sopenharmony_ci	gnttab_page_grant_foreign_access_ref_one(gnt_list_entry->gref,
3698c2ecf20Sopenharmony_ci						 info->xbdev->otherend_id,
3708c2ecf20Sopenharmony_ci						 gnt_list_entry->page,
3718c2ecf20Sopenharmony_ci						 0);
3728c2ecf20Sopenharmony_ci}
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_cistatic struct grant *get_grant(grant_ref_t *gref_head,
3758c2ecf20Sopenharmony_ci			       unsigned long gfn,
3768c2ecf20Sopenharmony_ci			       struct blkfront_ring_info *rinfo)
3778c2ecf20Sopenharmony_ci{
3788c2ecf20Sopenharmony_ci	struct grant *gnt_list_entry = get_free_grant(rinfo);
3798c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci	if (gnt_list_entry->gref != GRANT_INVALID_REF)
3828c2ecf20Sopenharmony_ci		return gnt_list_entry;
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci	/* Assign a gref to this page */
3858c2ecf20Sopenharmony_ci	gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
3868c2ecf20Sopenharmony_ci	BUG_ON(gnt_list_entry->gref == -ENOSPC);
3878c2ecf20Sopenharmony_ci	if (info->bounce)
3888c2ecf20Sopenharmony_ci		grant_foreign_access(gnt_list_entry, info);
3898c2ecf20Sopenharmony_ci	else {
3908c2ecf20Sopenharmony_ci		/* Grant access to the GFN passed by the caller */
3918c2ecf20Sopenharmony_ci		gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
3928c2ecf20Sopenharmony_ci						info->xbdev->otherend_id,
3938c2ecf20Sopenharmony_ci						gfn, 0);
3948c2ecf20Sopenharmony_ci	}
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci	return gnt_list_entry;
3978c2ecf20Sopenharmony_ci}
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_cistatic struct grant *get_indirect_grant(grant_ref_t *gref_head,
4008c2ecf20Sopenharmony_ci					struct blkfront_ring_info *rinfo)
4018c2ecf20Sopenharmony_ci{
4028c2ecf20Sopenharmony_ci	struct grant *gnt_list_entry = get_free_grant(rinfo);
4038c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_ci	if (gnt_list_entry->gref != GRANT_INVALID_REF)
4068c2ecf20Sopenharmony_ci		return gnt_list_entry;
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci	/* Assign a gref to this page */
4098c2ecf20Sopenharmony_ci	gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
4108c2ecf20Sopenharmony_ci	BUG_ON(gnt_list_entry->gref == -ENOSPC);
4118c2ecf20Sopenharmony_ci	if (!info->bounce) {
4128c2ecf20Sopenharmony_ci		struct page *indirect_page;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci		/* Fetch a pre-allocated page to use for indirect grefs */
4158c2ecf20Sopenharmony_ci		BUG_ON(list_empty(&rinfo->indirect_pages));
4168c2ecf20Sopenharmony_ci		indirect_page = list_first_entry(&rinfo->indirect_pages,
4178c2ecf20Sopenharmony_ci						 struct page, lru);
4188c2ecf20Sopenharmony_ci		list_del(&indirect_page->lru);
4198c2ecf20Sopenharmony_ci		gnt_list_entry->page = indirect_page;
4208c2ecf20Sopenharmony_ci	}
4218c2ecf20Sopenharmony_ci	grant_foreign_access(gnt_list_entry, info);
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_ci	return gnt_list_entry;
4248c2ecf20Sopenharmony_ci}
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_cistatic const char *op_name(int op)
4278c2ecf20Sopenharmony_ci{
4288c2ecf20Sopenharmony_ci	static const char *const names[] = {
4298c2ecf20Sopenharmony_ci		[BLKIF_OP_READ] = "read",
4308c2ecf20Sopenharmony_ci		[BLKIF_OP_WRITE] = "write",
4318c2ecf20Sopenharmony_ci		[BLKIF_OP_WRITE_BARRIER] = "barrier",
4328c2ecf20Sopenharmony_ci		[BLKIF_OP_FLUSH_DISKCACHE] = "flush",
4338c2ecf20Sopenharmony_ci		[BLKIF_OP_DISCARD] = "discard" };
4348c2ecf20Sopenharmony_ci
4358c2ecf20Sopenharmony_ci	if (op < 0 || op >= ARRAY_SIZE(names))
4368c2ecf20Sopenharmony_ci		return "unknown";
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci	if (!names[op])
4398c2ecf20Sopenharmony_ci		return "reserved";
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci	return names[op];
4428c2ecf20Sopenharmony_ci}
4438c2ecf20Sopenharmony_cistatic int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
4448c2ecf20Sopenharmony_ci{
4458c2ecf20Sopenharmony_ci	unsigned int end = minor + nr;
4468c2ecf20Sopenharmony_ci	int rc;
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	if (end > nr_minors) {
4498c2ecf20Sopenharmony_ci		unsigned long *bitmap, *old;
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci		bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
4528c2ecf20Sopenharmony_ci				 GFP_KERNEL);
4538c2ecf20Sopenharmony_ci		if (bitmap == NULL)
4548c2ecf20Sopenharmony_ci			return -ENOMEM;
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci		spin_lock(&minor_lock);
4578c2ecf20Sopenharmony_ci		if (end > nr_minors) {
4588c2ecf20Sopenharmony_ci			old = minors;
4598c2ecf20Sopenharmony_ci			memcpy(bitmap, minors,
4608c2ecf20Sopenharmony_ci			       BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
4618c2ecf20Sopenharmony_ci			minors = bitmap;
4628c2ecf20Sopenharmony_ci			nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
4638c2ecf20Sopenharmony_ci		} else
4648c2ecf20Sopenharmony_ci			old = bitmap;
4658c2ecf20Sopenharmony_ci		spin_unlock(&minor_lock);
4668c2ecf20Sopenharmony_ci		kfree(old);
4678c2ecf20Sopenharmony_ci	}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	spin_lock(&minor_lock);
4708c2ecf20Sopenharmony_ci	if (find_next_bit(minors, end, minor) >= end) {
4718c2ecf20Sopenharmony_ci		bitmap_set(minors, minor, nr);
4728c2ecf20Sopenharmony_ci		rc = 0;
4738c2ecf20Sopenharmony_ci	} else
4748c2ecf20Sopenharmony_ci		rc = -EBUSY;
4758c2ecf20Sopenharmony_ci	spin_unlock(&minor_lock);
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci	return rc;
4788c2ecf20Sopenharmony_ci}
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_cistatic void xlbd_release_minors(unsigned int minor, unsigned int nr)
4818c2ecf20Sopenharmony_ci{
4828c2ecf20Sopenharmony_ci	unsigned int end = minor + nr;
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci	BUG_ON(end > nr_minors);
4858c2ecf20Sopenharmony_ci	spin_lock(&minor_lock);
4868c2ecf20Sopenharmony_ci	bitmap_clear(minors,  minor, nr);
4878c2ecf20Sopenharmony_ci	spin_unlock(&minor_lock);
4888c2ecf20Sopenharmony_ci}
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_cistatic void blkif_restart_queue_callback(void *arg)
4918c2ecf20Sopenharmony_ci{
4928c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)arg;
4938c2ecf20Sopenharmony_ci	schedule_work(&rinfo->work);
4948c2ecf20Sopenharmony_ci}
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_cistatic int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
4978c2ecf20Sopenharmony_ci{
4988c2ecf20Sopenharmony_ci	/* We don't have real geometry info, but let's at least return
4998c2ecf20Sopenharmony_ci	   values consistent with the size of the device */
5008c2ecf20Sopenharmony_ci	sector_t nsect = get_capacity(bd->bd_disk);
5018c2ecf20Sopenharmony_ci	sector_t cylinders = nsect;
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci	hg->heads = 0xff;
5048c2ecf20Sopenharmony_ci	hg->sectors = 0x3f;
5058c2ecf20Sopenharmony_ci	sector_div(cylinders, hg->heads * hg->sectors);
5068c2ecf20Sopenharmony_ci	hg->cylinders = cylinders;
5078c2ecf20Sopenharmony_ci	if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
5088c2ecf20Sopenharmony_ci		hg->cylinders = 0xffff;
5098c2ecf20Sopenharmony_ci	return 0;
5108c2ecf20Sopenharmony_ci}
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_cistatic int blkif_ioctl(struct block_device *bdev, fmode_t mode,
5138c2ecf20Sopenharmony_ci		       unsigned command, unsigned long argument)
5148c2ecf20Sopenharmony_ci{
5158c2ecf20Sopenharmony_ci	struct blkfront_info *info = bdev->bd_disk->private_data;
5168c2ecf20Sopenharmony_ci	int i;
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci	dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
5198c2ecf20Sopenharmony_ci		command, (long)argument);
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	switch (command) {
5228c2ecf20Sopenharmony_ci	case CDROMMULTISESSION:
5238c2ecf20Sopenharmony_ci		dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
5248c2ecf20Sopenharmony_ci		for (i = 0; i < sizeof(struct cdrom_multisession); i++)
5258c2ecf20Sopenharmony_ci			if (put_user(0, (char __user *)(argument + i)))
5268c2ecf20Sopenharmony_ci				return -EFAULT;
5278c2ecf20Sopenharmony_ci		return 0;
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ci	case CDROM_GET_CAPABILITY: {
5308c2ecf20Sopenharmony_ci		struct gendisk *gd = info->gd;
5318c2ecf20Sopenharmony_ci		if (gd->flags & GENHD_FL_CD)
5328c2ecf20Sopenharmony_ci			return 0;
5338c2ecf20Sopenharmony_ci		return -EINVAL;
5348c2ecf20Sopenharmony_ci	}
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci	default:
5378c2ecf20Sopenharmony_ci		/*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
5388c2ecf20Sopenharmony_ci		  command);*/
5398c2ecf20Sopenharmony_ci		return -EINVAL; /* same return as native Linux */
5408c2ecf20Sopenharmony_ci	}
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_ci	return 0;
5438c2ecf20Sopenharmony_ci}
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_cistatic unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
5468c2ecf20Sopenharmony_ci					    struct request *req,
5478c2ecf20Sopenharmony_ci					    struct blkif_request **ring_req)
5488c2ecf20Sopenharmony_ci{
5498c2ecf20Sopenharmony_ci	unsigned long id;
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_ci	*ring_req = RING_GET_REQUEST(&rinfo->ring, rinfo->ring.req_prod_pvt);
5528c2ecf20Sopenharmony_ci	rinfo->ring.req_prod_pvt++;
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_ci	id = get_id_from_freelist(rinfo);
5558c2ecf20Sopenharmony_ci	rinfo->shadow[id].request = req;
5568c2ecf20Sopenharmony_ci	rinfo->shadow[id].status = REQ_PROCESSING;
5578c2ecf20Sopenharmony_ci	rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci	rinfo->shadow[id].req.u.rw.id = id;
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ci	return id;
5628c2ecf20Sopenharmony_ci}
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_cistatic int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo)
5658c2ecf20Sopenharmony_ci{
5668c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
5678c2ecf20Sopenharmony_ci	struct blkif_request *ring_req, *final_ring_req;
5688c2ecf20Sopenharmony_ci	unsigned long id;
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci	/* Fill out a communications ring structure. */
5718c2ecf20Sopenharmony_ci	id = blkif_ring_get_request(rinfo, req, &final_ring_req);
5728c2ecf20Sopenharmony_ci	ring_req = &rinfo->shadow[id].req;
5738c2ecf20Sopenharmony_ci
5748c2ecf20Sopenharmony_ci	ring_req->operation = BLKIF_OP_DISCARD;
5758c2ecf20Sopenharmony_ci	ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
5768c2ecf20Sopenharmony_ci	ring_req->u.discard.id = id;
5778c2ecf20Sopenharmony_ci	ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
5788c2ecf20Sopenharmony_ci	if (req_op(req) == REQ_OP_SECURE_ERASE && info->feature_secdiscard)
5798c2ecf20Sopenharmony_ci		ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
5808c2ecf20Sopenharmony_ci	else
5818c2ecf20Sopenharmony_ci		ring_req->u.discard.flag = 0;
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_ci	/* Copy the request to the ring page. */
5848c2ecf20Sopenharmony_ci	*final_ring_req = *ring_req;
5858c2ecf20Sopenharmony_ci	rinfo->shadow[id].status = REQ_WAITING;
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ci	return 0;
5888c2ecf20Sopenharmony_ci}
5898c2ecf20Sopenharmony_ci
5908c2ecf20Sopenharmony_cistruct setup_rw_req {
5918c2ecf20Sopenharmony_ci	unsigned int grant_idx;
5928c2ecf20Sopenharmony_ci	struct blkif_request_segment *segments;
5938c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo;
5948c2ecf20Sopenharmony_ci	struct blkif_request *ring_req;
5958c2ecf20Sopenharmony_ci	grant_ref_t gref_head;
5968c2ecf20Sopenharmony_ci	unsigned int id;
5978c2ecf20Sopenharmony_ci	/* Only used when persistent grant is used and it's a read request */
5988c2ecf20Sopenharmony_ci	bool need_copy;
5998c2ecf20Sopenharmony_ci	unsigned int bvec_off;
6008c2ecf20Sopenharmony_ci	char *bvec_data;
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci	bool require_extra_req;
6038c2ecf20Sopenharmony_ci	struct blkif_request *extra_ring_req;
6048c2ecf20Sopenharmony_ci};
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_cistatic void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
6078c2ecf20Sopenharmony_ci				     unsigned int len, void *data)
6088c2ecf20Sopenharmony_ci{
6098c2ecf20Sopenharmony_ci	struct setup_rw_req *setup = data;
6108c2ecf20Sopenharmony_ci	int n, ref;
6118c2ecf20Sopenharmony_ci	struct grant *gnt_list_entry;
6128c2ecf20Sopenharmony_ci	unsigned int fsect, lsect;
6138c2ecf20Sopenharmony_ci	/* Convenient aliases */
6148c2ecf20Sopenharmony_ci	unsigned int grant_idx = setup->grant_idx;
6158c2ecf20Sopenharmony_ci	struct blkif_request *ring_req = setup->ring_req;
6168c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo = setup->rinfo;
6178c2ecf20Sopenharmony_ci	/*
6188c2ecf20Sopenharmony_ci	 * We always use the shadow of the first request to store the list
6198c2ecf20Sopenharmony_ci	 * of grant associated to the block I/O request. This made the
6208c2ecf20Sopenharmony_ci	 * completion more easy to handle even if the block I/O request is
6218c2ecf20Sopenharmony_ci	 * split.
6228c2ecf20Sopenharmony_ci	 */
6238c2ecf20Sopenharmony_ci	struct blk_shadow *shadow = &rinfo->shadow[setup->id];
6248c2ecf20Sopenharmony_ci
6258c2ecf20Sopenharmony_ci	if (unlikely(setup->require_extra_req &&
6268c2ecf20Sopenharmony_ci		     grant_idx >= BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
6278c2ecf20Sopenharmony_ci		/*
6288c2ecf20Sopenharmony_ci		 * We are using the second request, setup grant_idx
6298c2ecf20Sopenharmony_ci		 * to be the index of the segment array.
6308c2ecf20Sopenharmony_ci		 */
6318c2ecf20Sopenharmony_ci		grant_idx -= BLKIF_MAX_SEGMENTS_PER_REQUEST;
6328c2ecf20Sopenharmony_ci		ring_req = setup->extra_ring_req;
6338c2ecf20Sopenharmony_ci	}
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci	if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
6368c2ecf20Sopenharmony_ci	    (grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) {
6378c2ecf20Sopenharmony_ci		if (setup->segments)
6388c2ecf20Sopenharmony_ci			kunmap_atomic(setup->segments);
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_ci		n = grant_idx / GRANTS_PER_INDIRECT_FRAME;
6418c2ecf20Sopenharmony_ci		gnt_list_entry = get_indirect_grant(&setup->gref_head, rinfo);
6428c2ecf20Sopenharmony_ci		shadow->indirect_grants[n] = gnt_list_entry;
6438c2ecf20Sopenharmony_ci		setup->segments = kmap_atomic(gnt_list_entry->page);
6448c2ecf20Sopenharmony_ci		ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
6458c2ecf20Sopenharmony_ci	}
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci	gnt_list_entry = get_grant(&setup->gref_head, gfn, rinfo);
6488c2ecf20Sopenharmony_ci	ref = gnt_list_entry->gref;
6498c2ecf20Sopenharmony_ci	/*
6508c2ecf20Sopenharmony_ci	 * All the grants are stored in the shadow of the first
6518c2ecf20Sopenharmony_ci	 * request. Therefore we have to use the global index.
6528c2ecf20Sopenharmony_ci	 */
6538c2ecf20Sopenharmony_ci	shadow->grants_used[setup->grant_idx] = gnt_list_entry;
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_ci	if (setup->need_copy) {
6568c2ecf20Sopenharmony_ci		void *shared_data;
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_ci		shared_data = kmap_atomic(gnt_list_entry->page);
6598c2ecf20Sopenharmony_ci		/*
6608c2ecf20Sopenharmony_ci		 * this does not wipe data stored outside the
6618c2ecf20Sopenharmony_ci		 * range sg->offset..sg->offset+sg->length.
6628c2ecf20Sopenharmony_ci		 * Therefore, blkback *could* see data from
6638c2ecf20Sopenharmony_ci		 * previous requests. This is OK as long as
6648c2ecf20Sopenharmony_ci		 * persistent grants are shared with just one
6658c2ecf20Sopenharmony_ci		 * domain. It may need refactoring if this
6668c2ecf20Sopenharmony_ci		 * changes
6678c2ecf20Sopenharmony_ci		 */
6688c2ecf20Sopenharmony_ci		memcpy(shared_data + offset,
6698c2ecf20Sopenharmony_ci		       setup->bvec_data + setup->bvec_off,
6708c2ecf20Sopenharmony_ci		       len);
6718c2ecf20Sopenharmony_ci
6728c2ecf20Sopenharmony_ci		kunmap_atomic(shared_data);
6738c2ecf20Sopenharmony_ci		setup->bvec_off += len;
6748c2ecf20Sopenharmony_ci	}
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci	fsect = offset >> 9;
6778c2ecf20Sopenharmony_ci	lsect = fsect + (len >> 9) - 1;
6788c2ecf20Sopenharmony_ci	if (ring_req->operation != BLKIF_OP_INDIRECT) {
6798c2ecf20Sopenharmony_ci		ring_req->u.rw.seg[grant_idx] =
6808c2ecf20Sopenharmony_ci			(struct blkif_request_segment) {
6818c2ecf20Sopenharmony_ci				.gref       = ref,
6828c2ecf20Sopenharmony_ci				.first_sect = fsect,
6838c2ecf20Sopenharmony_ci				.last_sect  = lsect };
6848c2ecf20Sopenharmony_ci	} else {
6858c2ecf20Sopenharmony_ci		setup->segments[grant_idx % GRANTS_PER_INDIRECT_FRAME] =
6868c2ecf20Sopenharmony_ci			(struct blkif_request_segment) {
6878c2ecf20Sopenharmony_ci				.gref       = ref,
6888c2ecf20Sopenharmony_ci				.first_sect = fsect,
6898c2ecf20Sopenharmony_ci				.last_sect  = lsect };
6908c2ecf20Sopenharmony_ci	}
6918c2ecf20Sopenharmony_ci
6928c2ecf20Sopenharmony_ci	(setup->grant_idx)++;
6938c2ecf20Sopenharmony_ci}
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_cistatic void blkif_setup_extra_req(struct blkif_request *first,
6968c2ecf20Sopenharmony_ci				  struct blkif_request *second)
6978c2ecf20Sopenharmony_ci{
6988c2ecf20Sopenharmony_ci	uint16_t nr_segments = first->u.rw.nr_segments;
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ci	/*
7018c2ecf20Sopenharmony_ci	 * The second request is only present when the first request uses
7028c2ecf20Sopenharmony_ci	 * all its segments. It's always the continuity of the first one.
7038c2ecf20Sopenharmony_ci	 */
7048c2ecf20Sopenharmony_ci	first->u.rw.nr_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
7058c2ecf20Sopenharmony_ci
7068c2ecf20Sopenharmony_ci	second->u.rw.nr_segments = nr_segments - BLKIF_MAX_SEGMENTS_PER_REQUEST;
7078c2ecf20Sopenharmony_ci	second->u.rw.sector_number = first->u.rw.sector_number +
7088c2ecf20Sopenharmony_ci		(BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) / 512;
7098c2ecf20Sopenharmony_ci
7108c2ecf20Sopenharmony_ci	second->u.rw.handle = first->u.rw.handle;
7118c2ecf20Sopenharmony_ci	second->operation = first->operation;
7128c2ecf20Sopenharmony_ci}
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_cistatic int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *rinfo)
7158c2ecf20Sopenharmony_ci{
7168c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
7178c2ecf20Sopenharmony_ci	struct blkif_request *ring_req, *extra_ring_req = NULL;
7188c2ecf20Sopenharmony_ci	struct blkif_request *final_ring_req, *final_extra_ring_req = NULL;
7198c2ecf20Sopenharmony_ci	unsigned long id, extra_id = NO_ASSOCIATED_ID;
7208c2ecf20Sopenharmony_ci	bool require_extra_req = false;
7218c2ecf20Sopenharmony_ci	int i;
7228c2ecf20Sopenharmony_ci	struct setup_rw_req setup = {
7238c2ecf20Sopenharmony_ci		.grant_idx = 0,
7248c2ecf20Sopenharmony_ci		.segments = NULL,
7258c2ecf20Sopenharmony_ci		.rinfo = rinfo,
7268c2ecf20Sopenharmony_ci		.need_copy = rq_data_dir(req) && info->bounce,
7278c2ecf20Sopenharmony_ci	};
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_ci	/*
7308c2ecf20Sopenharmony_ci	 * Used to store if we are able to queue the request by just using
7318c2ecf20Sopenharmony_ci	 * existing persistent grants, or if we have to get new grants,
7328c2ecf20Sopenharmony_ci	 * as there are not sufficiently many free.
7338c2ecf20Sopenharmony_ci	 */
7348c2ecf20Sopenharmony_ci	bool new_persistent_gnts = false;
7358c2ecf20Sopenharmony_ci	struct scatterlist *sg;
7368c2ecf20Sopenharmony_ci	int num_sg, max_grefs, num_grant;
7378c2ecf20Sopenharmony_ci
7388c2ecf20Sopenharmony_ci	max_grefs = req->nr_phys_segments * GRANTS_PER_PSEG;
7398c2ecf20Sopenharmony_ci	if (max_grefs > BLKIF_MAX_SEGMENTS_PER_REQUEST)
7408c2ecf20Sopenharmony_ci		/*
7418c2ecf20Sopenharmony_ci		 * If we are using indirect segments we need to account
7428c2ecf20Sopenharmony_ci		 * for the indirect grefs used in the request.
7438c2ecf20Sopenharmony_ci		 */
7448c2ecf20Sopenharmony_ci		max_grefs += INDIRECT_GREFS(max_grefs);
7458c2ecf20Sopenharmony_ci
7468c2ecf20Sopenharmony_ci	/* Check if we have enough persistent grants to allocate a requests */
7478c2ecf20Sopenharmony_ci	if (rinfo->persistent_gnts_c < max_grefs) {
7488c2ecf20Sopenharmony_ci		new_persistent_gnts = true;
7498c2ecf20Sopenharmony_ci
7508c2ecf20Sopenharmony_ci		if (gnttab_alloc_grant_references(
7518c2ecf20Sopenharmony_ci		    max_grefs - rinfo->persistent_gnts_c,
7528c2ecf20Sopenharmony_ci		    &setup.gref_head) < 0) {
7538c2ecf20Sopenharmony_ci			gnttab_request_free_callback(
7548c2ecf20Sopenharmony_ci				&rinfo->callback,
7558c2ecf20Sopenharmony_ci				blkif_restart_queue_callback,
7568c2ecf20Sopenharmony_ci				rinfo,
7578c2ecf20Sopenharmony_ci				max_grefs - rinfo->persistent_gnts_c);
7588c2ecf20Sopenharmony_ci			return 1;
7598c2ecf20Sopenharmony_ci		}
7608c2ecf20Sopenharmony_ci	}
7618c2ecf20Sopenharmony_ci
7628c2ecf20Sopenharmony_ci	/* Fill out a communications ring structure. */
7638c2ecf20Sopenharmony_ci	id = blkif_ring_get_request(rinfo, req, &final_ring_req);
7648c2ecf20Sopenharmony_ci	ring_req = &rinfo->shadow[id].req;
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_ci	num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
7678c2ecf20Sopenharmony_ci	num_grant = 0;
7688c2ecf20Sopenharmony_ci	/* Calculate the number of grant used */
7698c2ecf20Sopenharmony_ci	for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i)
7708c2ecf20Sopenharmony_ci	       num_grant += gnttab_count_grant(sg->offset, sg->length);
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	require_extra_req = info->max_indirect_segments == 0 &&
7738c2ecf20Sopenharmony_ci		num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST;
7748c2ecf20Sopenharmony_ci	BUG_ON(!HAS_EXTRA_REQ && require_extra_req);
7758c2ecf20Sopenharmony_ci
7768c2ecf20Sopenharmony_ci	rinfo->shadow[id].num_sg = num_sg;
7778c2ecf20Sopenharmony_ci	if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST &&
7788c2ecf20Sopenharmony_ci	    likely(!require_extra_req)) {
7798c2ecf20Sopenharmony_ci		/*
7808c2ecf20Sopenharmony_ci		 * The indirect operation can only be a BLKIF_OP_READ or
7818c2ecf20Sopenharmony_ci		 * BLKIF_OP_WRITE
7828c2ecf20Sopenharmony_ci		 */
7838c2ecf20Sopenharmony_ci		BUG_ON(req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA);
7848c2ecf20Sopenharmony_ci		ring_req->operation = BLKIF_OP_INDIRECT;
7858c2ecf20Sopenharmony_ci		ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
7868c2ecf20Sopenharmony_ci			BLKIF_OP_WRITE : BLKIF_OP_READ;
7878c2ecf20Sopenharmony_ci		ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req);
7888c2ecf20Sopenharmony_ci		ring_req->u.indirect.handle = info->handle;
7898c2ecf20Sopenharmony_ci		ring_req->u.indirect.nr_segments = num_grant;
7908c2ecf20Sopenharmony_ci	} else {
7918c2ecf20Sopenharmony_ci		ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
7928c2ecf20Sopenharmony_ci		ring_req->u.rw.handle = info->handle;
7938c2ecf20Sopenharmony_ci		ring_req->operation = rq_data_dir(req) ?
7948c2ecf20Sopenharmony_ci			BLKIF_OP_WRITE : BLKIF_OP_READ;
7958c2ecf20Sopenharmony_ci		if (req_op(req) == REQ_OP_FLUSH ||
7968c2ecf20Sopenharmony_ci		    (req_op(req) == REQ_OP_WRITE && (req->cmd_flags & REQ_FUA))) {
7978c2ecf20Sopenharmony_ci			/*
7988c2ecf20Sopenharmony_ci			 * Ideally we can do an unordered flush-to-disk.
7998c2ecf20Sopenharmony_ci			 * In case the backend onlysupports barriers, use that.
8008c2ecf20Sopenharmony_ci			 * A barrier request a superset of FUA, so we can
8018c2ecf20Sopenharmony_ci			 * implement it the same way.  (It's also a FLUSH+FUA,
8028c2ecf20Sopenharmony_ci			 * since it is guaranteed ordered WRT previous writes.)
8038c2ecf20Sopenharmony_ci			 */
8048c2ecf20Sopenharmony_ci			if (info->feature_flush && info->feature_fua)
8058c2ecf20Sopenharmony_ci				ring_req->operation =
8068c2ecf20Sopenharmony_ci					BLKIF_OP_WRITE_BARRIER;
8078c2ecf20Sopenharmony_ci			else if (info->feature_flush)
8088c2ecf20Sopenharmony_ci				ring_req->operation =
8098c2ecf20Sopenharmony_ci					BLKIF_OP_FLUSH_DISKCACHE;
8108c2ecf20Sopenharmony_ci			else
8118c2ecf20Sopenharmony_ci				ring_req->operation = 0;
8128c2ecf20Sopenharmony_ci		}
8138c2ecf20Sopenharmony_ci		ring_req->u.rw.nr_segments = num_grant;
8148c2ecf20Sopenharmony_ci		if (unlikely(require_extra_req)) {
8158c2ecf20Sopenharmony_ci			extra_id = blkif_ring_get_request(rinfo, req,
8168c2ecf20Sopenharmony_ci							  &final_extra_ring_req);
8178c2ecf20Sopenharmony_ci			extra_ring_req = &rinfo->shadow[extra_id].req;
8188c2ecf20Sopenharmony_ci
8198c2ecf20Sopenharmony_ci			/*
8208c2ecf20Sopenharmony_ci			 * Only the first request contains the scatter-gather
8218c2ecf20Sopenharmony_ci			 * list.
8228c2ecf20Sopenharmony_ci			 */
8238c2ecf20Sopenharmony_ci			rinfo->shadow[extra_id].num_sg = 0;
8248c2ecf20Sopenharmony_ci
8258c2ecf20Sopenharmony_ci			blkif_setup_extra_req(ring_req, extra_ring_req);
8268c2ecf20Sopenharmony_ci
8278c2ecf20Sopenharmony_ci			/* Link the 2 requests together */
8288c2ecf20Sopenharmony_ci			rinfo->shadow[extra_id].associated_id = id;
8298c2ecf20Sopenharmony_ci			rinfo->shadow[id].associated_id = extra_id;
8308c2ecf20Sopenharmony_ci		}
8318c2ecf20Sopenharmony_ci	}
8328c2ecf20Sopenharmony_ci
8338c2ecf20Sopenharmony_ci	setup.ring_req = ring_req;
8348c2ecf20Sopenharmony_ci	setup.id = id;
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_ci	setup.require_extra_req = require_extra_req;
8378c2ecf20Sopenharmony_ci	if (unlikely(require_extra_req))
8388c2ecf20Sopenharmony_ci		setup.extra_ring_req = extra_ring_req;
8398c2ecf20Sopenharmony_ci
8408c2ecf20Sopenharmony_ci	for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) {
8418c2ecf20Sopenharmony_ci		BUG_ON(sg->offset + sg->length > PAGE_SIZE);
8428c2ecf20Sopenharmony_ci
8438c2ecf20Sopenharmony_ci		if (setup.need_copy) {
8448c2ecf20Sopenharmony_ci			setup.bvec_off = sg->offset;
8458c2ecf20Sopenharmony_ci			setup.bvec_data = kmap_atomic(sg_page(sg));
8468c2ecf20Sopenharmony_ci		}
8478c2ecf20Sopenharmony_ci
8488c2ecf20Sopenharmony_ci		gnttab_foreach_grant_in_range(sg_page(sg),
8498c2ecf20Sopenharmony_ci					      sg->offset,
8508c2ecf20Sopenharmony_ci					      sg->length,
8518c2ecf20Sopenharmony_ci					      blkif_setup_rw_req_grant,
8528c2ecf20Sopenharmony_ci					      &setup);
8538c2ecf20Sopenharmony_ci
8548c2ecf20Sopenharmony_ci		if (setup.need_copy)
8558c2ecf20Sopenharmony_ci			kunmap_atomic(setup.bvec_data);
8568c2ecf20Sopenharmony_ci	}
8578c2ecf20Sopenharmony_ci	if (setup.segments)
8588c2ecf20Sopenharmony_ci		kunmap_atomic(setup.segments);
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci	/* Copy request(s) to the ring page. */
8618c2ecf20Sopenharmony_ci	*final_ring_req = *ring_req;
8628c2ecf20Sopenharmony_ci	rinfo->shadow[id].status = REQ_WAITING;
8638c2ecf20Sopenharmony_ci	if (unlikely(require_extra_req)) {
8648c2ecf20Sopenharmony_ci		*final_extra_ring_req = *extra_ring_req;
8658c2ecf20Sopenharmony_ci		rinfo->shadow[extra_id].status = REQ_WAITING;
8668c2ecf20Sopenharmony_ci	}
8678c2ecf20Sopenharmony_ci
8688c2ecf20Sopenharmony_ci	if (new_persistent_gnts)
8698c2ecf20Sopenharmony_ci		gnttab_free_grant_references(setup.gref_head);
8708c2ecf20Sopenharmony_ci
8718c2ecf20Sopenharmony_ci	return 0;
8728c2ecf20Sopenharmony_ci}
8738c2ecf20Sopenharmony_ci
8748c2ecf20Sopenharmony_ci/*
8758c2ecf20Sopenharmony_ci * Generate a Xen blkfront IO request from a blk layer request.  Reads
8768c2ecf20Sopenharmony_ci * and writes are handled as expected.
8778c2ecf20Sopenharmony_ci *
8788c2ecf20Sopenharmony_ci * @req: a request struct
8798c2ecf20Sopenharmony_ci */
8808c2ecf20Sopenharmony_cistatic int blkif_queue_request(struct request *req, struct blkfront_ring_info *rinfo)
8818c2ecf20Sopenharmony_ci{
8828c2ecf20Sopenharmony_ci	if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED))
8838c2ecf20Sopenharmony_ci		return 1;
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci	if (unlikely(req_op(req) == REQ_OP_DISCARD ||
8868c2ecf20Sopenharmony_ci		     req_op(req) == REQ_OP_SECURE_ERASE))
8878c2ecf20Sopenharmony_ci		return blkif_queue_discard_req(req, rinfo);
8888c2ecf20Sopenharmony_ci	else
8898c2ecf20Sopenharmony_ci		return blkif_queue_rw_req(req, rinfo);
8908c2ecf20Sopenharmony_ci}
8918c2ecf20Sopenharmony_ci
8928c2ecf20Sopenharmony_cistatic inline void flush_requests(struct blkfront_ring_info *rinfo)
8938c2ecf20Sopenharmony_ci{
8948c2ecf20Sopenharmony_ci	int notify;
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rinfo->ring, notify);
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_ci	if (notify)
8998c2ecf20Sopenharmony_ci		notify_remote_via_irq(rinfo->irq);
9008c2ecf20Sopenharmony_ci}
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_cistatic inline bool blkif_request_flush_invalid(struct request *req,
9038c2ecf20Sopenharmony_ci					       struct blkfront_info *info)
9048c2ecf20Sopenharmony_ci{
9058c2ecf20Sopenharmony_ci	return (blk_rq_is_passthrough(req) ||
9068c2ecf20Sopenharmony_ci		((req_op(req) == REQ_OP_FLUSH) &&
9078c2ecf20Sopenharmony_ci		 !info->feature_flush) ||
9088c2ecf20Sopenharmony_ci		((req->cmd_flags & REQ_FUA) &&
9098c2ecf20Sopenharmony_ci		 !info->feature_fua));
9108c2ecf20Sopenharmony_ci}
9118c2ecf20Sopenharmony_ci
9128c2ecf20Sopenharmony_cistatic blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
9138c2ecf20Sopenharmony_ci			  const struct blk_mq_queue_data *qd)
9148c2ecf20Sopenharmony_ci{
9158c2ecf20Sopenharmony_ci	unsigned long flags;
9168c2ecf20Sopenharmony_ci	int qid = hctx->queue_num;
9178c2ecf20Sopenharmony_ci	struct blkfront_info *info = hctx->queue->queuedata;
9188c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo = NULL;
9198c2ecf20Sopenharmony_ci
9208c2ecf20Sopenharmony_ci	rinfo = get_rinfo(info, qid);
9218c2ecf20Sopenharmony_ci	blk_mq_start_request(qd->rq);
9228c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rinfo->ring_lock, flags);
9238c2ecf20Sopenharmony_ci	if (RING_FULL(&rinfo->ring))
9248c2ecf20Sopenharmony_ci		goto out_busy;
9258c2ecf20Sopenharmony_ci
9268c2ecf20Sopenharmony_ci	if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info))
9278c2ecf20Sopenharmony_ci		goto out_err;
9288c2ecf20Sopenharmony_ci
9298c2ecf20Sopenharmony_ci	if (blkif_queue_request(qd->rq, rinfo))
9308c2ecf20Sopenharmony_ci		goto out_busy;
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ci	flush_requests(rinfo);
9338c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
9348c2ecf20Sopenharmony_ci	return BLK_STS_OK;
9358c2ecf20Sopenharmony_ci
9368c2ecf20Sopenharmony_ciout_err:
9378c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
9388c2ecf20Sopenharmony_ci	return BLK_STS_IOERR;
9398c2ecf20Sopenharmony_ci
9408c2ecf20Sopenharmony_ciout_busy:
9418c2ecf20Sopenharmony_ci	blk_mq_stop_hw_queue(hctx);
9428c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
9438c2ecf20Sopenharmony_ci	return BLK_STS_DEV_RESOURCE;
9448c2ecf20Sopenharmony_ci}
9458c2ecf20Sopenharmony_ci
9468c2ecf20Sopenharmony_cistatic void blkif_complete_rq(struct request *rq)
9478c2ecf20Sopenharmony_ci{
9488c2ecf20Sopenharmony_ci	blk_mq_end_request(rq, blkif_req(rq)->error);
9498c2ecf20Sopenharmony_ci}
9508c2ecf20Sopenharmony_ci
9518c2ecf20Sopenharmony_cistatic const struct blk_mq_ops blkfront_mq_ops = {
9528c2ecf20Sopenharmony_ci	.queue_rq = blkif_queue_rq,
9538c2ecf20Sopenharmony_ci	.complete = blkif_complete_rq,
9548c2ecf20Sopenharmony_ci};
9558c2ecf20Sopenharmony_ci
9568c2ecf20Sopenharmony_cistatic void blkif_set_queue_limits(struct blkfront_info *info)
9578c2ecf20Sopenharmony_ci{
9588c2ecf20Sopenharmony_ci	struct request_queue *rq = info->rq;
9598c2ecf20Sopenharmony_ci	struct gendisk *gd = info->gd;
9608c2ecf20Sopenharmony_ci	unsigned int segments = info->max_indirect_segments ? :
9618c2ecf20Sopenharmony_ci				BLKIF_MAX_SEGMENTS_PER_REQUEST;
9628c2ecf20Sopenharmony_ci
9638c2ecf20Sopenharmony_ci	blk_queue_flag_set(QUEUE_FLAG_VIRT, rq);
9648c2ecf20Sopenharmony_ci
9658c2ecf20Sopenharmony_ci	if (info->feature_discard) {
9668c2ecf20Sopenharmony_ci		blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq);
9678c2ecf20Sopenharmony_ci		blk_queue_max_discard_sectors(rq, get_capacity(gd));
9688c2ecf20Sopenharmony_ci		rq->limits.discard_granularity = info->discard_granularity ?:
9698c2ecf20Sopenharmony_ci						 info->physical_sector_size;
9708c2ecf20Sopenharmony_ci		rq->limits.discard_alignment = info->discard_alignment;
9718c2ecf20Sopenharmony_ci		if (info->feature_secdiscard)
9728c2ecf20Sopenharmony_ci			blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq);
9738c2ecf20Sopenharmony_ci	}
9748c2ecf20Sopenharmony_ci
9758c2ecf20Sopenharmony_ci	/* Hard sector size and max sectors impersonate the equiv. hardware. */
9768c2ecf20Sopenharmony_ci	blk_queue_logical_block_size(rq, info->sector_size);
9778c2ecf20Sopenharmony_ci	blk_queue_physical_block_size(rq, info->physical_sector_size);
9788c2ecf20Sopenharmony_ci	blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
9798c2ecf20Sopenharmony_ci
9808c2ecf20Sopenharmony_ci	/* Each segment in a request is up to an aligned page in size. */
9818c2ecf20Sopenharmony_ci	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
9828c2ecf20Sopenharmony_ci	blk_queue_max_segment_size(rq, PAGE_SIZE);
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_ci	/* Ensure a merged request will fit in a single I/O ring slot. */
9858c2ecf20Sopenharmony_ci	blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
9868c2ecf20Sopenharmony_ci
9878c2ecf20Sopenharmony_ci	/* Make sure buffer addresses are sector-aligned. */
9888c2ecf20Sopenharmony_ci	blk_queue_dma_alignment(rq, 511);
9898c2ecf20Sopenharmony_ci}
9908c2ecf20Sopenharmony_ci
9918c2ecf20Sopenharmony_cistatic int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
9928c2ecf20Sopenharmony_ci				unsigned int physical_sector_size)
9938c2ecf20Sopenharmony_ci{
9948c2ecf20Sopenharmony_ci	struct request_queue *rq;
9958c2ecf20Sopenharmony_ci	struct blkfront_info *info = gd->private_data;
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_ci	memset(&info->tag_set, 0, sizeof(info->tag_set));
9988c2ecf20Sopenharmony_ci	info->tag_set.ops = &blkfront_mq_ops;
9998c2ecf20Sopenharmony_ci	info->tag_set.nr_hw_queues = info->nr_rings;
10008c2ecf20Sopenharmony_ci	if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) {
10018c2ecf20Sopenharmony_ci		/*
10028c2ecf20Sopenharmony_ci		 * When indirect descriptior is not supported, the I/O request
10038c2ecf20Sopenharmony_ci		 * will be split between multiple request in the ring.
10048c2ecf20Sopenharmony_ci		 * To avoid problems when sending the request, divide by
10058c2ecf20Sopenharmony_ci		 * 2 the depth of the queue.
10068c2ecf20Sopenharmony_ci		 */
10078c2ecf20Sopenharmony_ci		info->tag_set.queue_depth =  BLK_RING_SIZE(info) / 2;
10088c2ecf20Sopenharmony_ci	} else
10098c2ecf20Sopenharmony_ci		info->tag_set.queue_depth = BLK_RING_SIZE(info);
10108c2ecf20Sopenharmony_ci	info->tag_set.numa_node = NUMA_NO_NODE;
10118c2ecf20Sopenharmony_ci	info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
10128c2ecf20Sopenharmony_ci	info->tag_set.cmd_size = sizeof(struct blkif_req);
10138c2ecf20Sopenharmony_ci	info->tag_set.driver_data = info;
10148c2ecf20Sopenharmony_ci
10158c2ecf20Sopenharmony_ci	if (blk_mq_alloc_tag_set(&info->tag_set))
10168c2ecf20Sopenharmony_ci		return -EINVAL;
10178c2ecf20Sopenharmony_ci	rq = blk_mq_init_queue(&info->tag_set);
10188c2ecf20Sopenharmony_ci	if (IS_ERR(rq)) {
10198c2ecf20Sopenharmony_ci		blk_mq_free_tag_set(&info->tag_set);
10208c2ecf20Sopenharmony_ci		return PTR_ERR(rq);
10218c2ecf20Sopenharmony_ci	}
10228c2ecf20Sopenharmony_ci
10238c2ecf20Sopenharmony_ci	rq->queuedata = info;
10248c2ecf20Sopenharmony_ci	info->rq = gd->queue = rq;
10258c2ecf20Sopenharmony_ci	info->gd = gd;
10268c2ecf20Sopenharmony_ci	info->sector_size = sector_size;
10278c2ecf20Sopenharmony_ci	info->physical_sector_size = physical_sector_size;
10288c2ecf20Sopenharmony_ci	blkif_set_queue_limits(info);
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_ci	return 0;
10318c2ecf20Sopenharmony_ci}
10328c2ecf20Sopenharmony_ci
10338c2ecf20Sopenharmony_cistatic const char *flush_info(struct blkfront_info *info)
10348c2ecf20Sopenharmony_ci{
10358c2ecf20Sopenharmony_ci	if (info->feature_flush && info->feature_fua)
10368c2ecf20Sopenharmony_ci		return "barrier: enabled;";
10378c2ecf20Sopenharmony_ci	else if (info->feature_flush)
10388c2ecf20Sopenharmony_ci		return "flush diskcache: enabled;";
10398c2ecf20Sopenharmony_ci	else
10408c2ecf20Sopenharmony_ci		return "barrier or flush: disabled;";
10418c2ecf20Sopenharmony_ci}
10428c2ecf20Sopenharmony_ci
10438c2ecf20Sopenharmony_cistatic void xlvbd_flush(struct blkfront_info *info)
10448c2ecf20Sopenharmony_ci{
10458c2ecf20Sopenharmony_ci	blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
10468c2ecf20Sopenharmony_ci			      info->feature_fua ? true : false);
10478c2ecf20Sopenharmony_ci	pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
10488c2ecf20Sopenharmony_ci		info->gd->disk_name, flush_info(info),
10498c2ecf20Sopenharmony_ci		"persistent grants:", info->feature_persistent ?
10508c2ecf20Sopenharmony_ci		"enabled;" : "disabled;", "indirect descriptors:",
10518c2ecf20Sopenharmony_ci		info->max_indirect_segments ? "enabled;" : "disabled;",
10528c2ecf20Sopenharmony_ci		"bounce buffer:", info->bounce ? "enabled" : "disabled;");
10538c2ecf20Sopenharmony_ci}
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_cistatic int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
10568c2ecf20Sopenharmony_ci{
10578c2ecf20Sopenharmony_ci	int major;
10588c2ecf20Sopenharmony_ci	major = BLKIF_MAJOR(vdevice);
10598c2ecf20Sopenharmony_ci	*minor = BLKIF_MINOR(vdevice);
10608c2ecf20Sopenharmony_ci	switch (major) {
10618c2ecf20Sopenharmony_ci		case XEN_IDE0_MAJOR:
10628c2ecf20Sopenharmony_ci			*offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
10638c2ecf20Sopenharmony_ci			*minor = ((*minor / 64) * PARTS_PER_DISK) +
10648c2ecf20Sopenharmony_ci				EMULATED_HD_DISK_MINOR_OFFSET;
10658c2ecf20Sopenharmony_ci			break;
10668c2ecf20Sopenharmony_ci		case XEN_IDE1_MAJOR:
10678c2ecf20Sopenharmony_ci			*offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
10688c2ecf20Sopenharmony_ci			*minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
10698c2ecf20Sopenharmony_ci				EMULATED_HD_DISK_MINOR_OFFSET;
10708c2ecf20Sopenharmony_ci			break;
10718c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK0_MAJOR:
10728c2ecf20Sopenharmony_ci			*offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
10738c2ecf20Sopenharmony_ci			*minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
10748c2ecf20Sopenharmony_ci			break;
10758c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK1_MAJOR:
10768c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK2_MAJOR:
10778c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK3_MAJOR:
10788c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK4_MAJOR:
10798c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK5_MAJOR:
10808c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK6_MAJOR:
10818c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK7_MAJOR:
10828c2ecf20Sopenharmony_ci			*offset = (*minor / PARTS_PER_DISK) +
10838c2ecf20Sopenharmony_ci				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
10848c2ecf20Sopenharmony_ci				EMULATED_SD_DISK_NAME_OFFSET;
10858c2ecf20Sopenharmony_ci			*minor = *minor +
10868c2ecf20Sopenharmony_ci				((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
10878c2ecf20Sopenharmony_ci				EMULATED_SD_DISK_MINOR_OFFSET;
10888c2ecf20Sopenharmony_ci			break;
10898c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK8_MAJOR:
10908c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK9_MAJOR:
10918c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK10_MAJOR:
10928c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK11_MAJOR:
10938c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK12_MAJOR:
10948c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK13_MAJOR:
10958c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK14_MAJOR:
10968c2ecf20Sopenharmony_ci		case XEN_SCSI_DISK15_MAJOR:
10978c2ecf20Sopenharmony_ci			*offset = (*minor / PARTS_PER_DISK) +
10988c2ecf20Sopenharmony_ci				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
10998c2ecf20Sopenharmony_ci				EMULATED_SD_DISK_NAME_OFFSET;
11008c2ecf20Sopenharmony_ci			*minor = *minor +
11018c2ecf20Sopenharmony_ci				((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
11028c2ecf20Sopenharmony_ci				EMULATED_SD_DISK_MINOR_OFFSET;
11038c2ecf20Sopenharmony_ci			break;
11048c2ecf20Sopenharmony_ci		case XENVBD_MAJOR:
11058c2ecf20Sopenharmony_ci			*offset = *minor / PARTS_PER_DISK;
11068c2ecf20Sopenharmony_ci			break;
11078c2ecf20Sopenharmony_ci		default:
11088c2ecf20Sopenharmony_ci			printk(KERN_WARNING "blkfront: your disk configuration is "
11098c2ecf20Sopenharmony_ci					"incorrect, please use an xvd device instead\n");
11108c2ecf20Sopenharmony_ci			return -ENODEV;
11118c2ecf20Sopenharmony_ci	}
11128c2ecf20Sopenharmony_ci	return 0;
11138c2ecf20Sopenharmony_ci}
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_cistatic char *encode_disk_name(char *ptr, unsigned int n)
11168c2ecf20Sopenharmony_ci{
11178c2ecf20Sopenharmony_ci	if (n >= 26)
11188c2ecf20Sopenharmony_ci		ptr = encode_disk_name(ptr, n / 26 - 1);
11198c2ecf20Sopenharmony_ci	*ptr = 'a' + n % 26;
11208c2ecf20Sopenharmony_ci	return ptr + 1;
11218c2ecf20Sopenharmony_ci}
11228c2ecf20Sopenharmony_ci
11238c2ecf20Sopenharmony_cistatic int xlvbd_alloc_gendisk(blkif_sector_t capacity,
11248c2ecf20Sopenharmony_ci			       struct blkfront_info *info,
11258c2ecf20Sopenharmony_ci			       u16 vdisk_info, u16 sector_size,
11268c2ecf20Sopenharmony_ci			       unsigned int physical_sector_size)
11278c2ecf20Sopenharmony_ci{
11288c2ecf20Sopenharmony_ci	struct gendisk *gd;
11298c2ecf20Sopenharmony_ci	int nr_minors = 1;
11308c2ecf20Sopenharmony_ci	int err;
11318c2ecf20Sopenharmony_ci	unsigned int offset;
11328c2ecf20Sopenharmony_ci	int minor;
11338c2ecf20Sopenharmony_ci	int nr_parts;
11348c2ecf20Sopenharmony_ci	char *ptr;
11358c2ecf20Sopenharmony_ci
11368c2ecf20Sopenharmony_ci	BUG_ON(info->gd != NULL);
11378c2ecf20Sopenharmony_ci	BUG_ON(info->rq != NULL);
11388c2ecf20Sopenharmony_ci
11398c2ecf20Sopenharmony_ci	if ((info->vdevice>>EXT_SHIFT) > 1) {
11408c2ecf20Sopenharmony_ci		/* this is above the extended range; something is wrong */
11418c2ecf20Sopenharmony_ci		printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
11428c2ecf20Sopenharmony_ci		return -ENODEV;
11438c2ecf20Sopenharmony_ci	}
11448c2ecf20Sopenharmony_ci
11458c2ecf20Sopenharmony_ci	if (!VDEV_IS_EXTENDED(info->vdevice)) {
11468c2ecf20Sopenharmony_ci		err = xen_translate_vdev(info->vdevice, &minor, &offset);
11478c2ecf20Sopenharmony_ci		if (err)
11488c2ecf20Sopenharmony_ci			return err;
11498c2ecf20Sopenharmony_ci		nr_parts = PARTS_PER_DISK;
11508c2ecf20Sopenharmony_ci	} else {
11518c2ecf20Sopenharmony_ci		minor = BLKIF_MINOR_EXT(info->vdevice);
11528c2ecf20Sopenharmony_ci		nr_parts = PARTS_PER_EXT_DISK;
11538c2ecf20Sopenharmony_ci		offset = minor / nr_parts;
11548c2ecf20Sopenharmony_ci		if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
11558c2ecf20Sopenharmony_ci			printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
11568c2ecf20Sopenharmony_ci					"emulated IDE disks,\n\t choose an xvd device name"
11578c2ecf20Sopenharmony_ci					"from xvde on\n", info->vdevice);
11588c2ecf20Sopenharmony_ci	}
11598c2ecf20Sopenharmony_ci	if (minor >> MINORBITS) {
11608c2ecf20Sopenharmony_ci		pr_warn("blkfront: %#x's minor (%#x) out of range; ignoring\n",
11618c2ecf20Sopenharmony_ci			info->vdevice, minor);
11628c2ecf20Sopenharmony_ci		return -ENODEV;
11638c2ecf20Sopenharmony_ci	}
11648c2ecf20Sopenharmony_ci
11658c2ecf20Sopenharmony_ci	if ((minor % nr_parts) == 0)
11668c2ecf20Sopenharmony_ci		nr_minors = nr_parts;
11678c2ecf20Sopenharmony_ci
11688c2ecf20Sopenharmony_ci	err = xlbd_reserve_minors(minor, nr_minors);
11698c2ecf20Sopenharmony_ci	if (err)
11708c2ecf20Sopenharmony_ci		goto out;
11718c2ecf20Sopenharmony_ci	err = -ENODEV;
11728c2ecf20Sopenharmony_ci
11738c2ecf20Sopenharmony_ci	gd = alloc_disk(nr_minors);
11748c2ecf20Sopenharmony_ci	if (gd == NULL)
11758c2ecf20Sopenharmony_ci		goto release;
11768c2ecf20Sopenharmony_ci
11778c2ecf20Sopenharmony_ci	strcpy(gd->disk_name, DEV_NAME);
11788c2ecf20Sopenharmony_ci	ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
11798c2ecf20Sopenharmony_ci	BUG_ON(ptr >= gd->disk_name + DISK_NAME_LEN);
11808c2ecf20Sopenharmony_ci	if (nr_minors > 1)
11818c2ecf20Sopenharmony_ci		*ptr = 0;
11828c2ecf20Sopenharmony_ci	else
11838c2ecf20Sopenharmony_ci		snprintf(ptr, gd->disk_name + DISK_NAME_LEN - ptr,
11848c2ecf20Sopenharmony_ci			 "%d", minor & (nr_parts - 1));
11858c2ecf20Sopenharmony_ci
11868c2ecf20Sopenharmony_ci	gd->major = XENVBD_MAJOR;
11878c2ecf20Sopenharmony_ci	gd->first_minor = minor;
11888c2ecf20Sopenharmony_ci	gd->fops = &xlvbd_block_fops;
11898c2ecf20Sopenharmony_ci	gd->private_data = info;
11908c2ecf20Sopenharmony_ci	set_capacity(gd, capacity);
11918c2ecf20Sopenharmony_ci
11928c2ecf20Sopenharmony_ci	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) {
11938c2ecf20Sopenharmony_ci		del_gendisk(gd);
11948c2ecf20Sopenharmony_ci		goto release;
11958c2ecf20Sopenharmony_ci	}
11968c2ecf20Sopenharmony_ci
11978c2ecf20Sopenharmony_ci	xlvbd_flush(info);
11988c2ecf20Sopenharmony_ci
11998c2ecf20Sopenharmony_ci	if (vdisk_info & VDISK_READONLY)
12008c2ecf20Sopenharmony_ci		set_disk_ro(gd, 1);
12018c2ecf20Sopenharmony_ci
12028c2ecf20Sopenharmony_ci	if (vdisk_info & VDISK_REMOVABLE)
12038c2ecf20Sopenharmony_ci		gd->flags |= GENHD_FL_REMOVABLE;
12048c2ecf20Sopenharmony_ci
12058c2ecf20Sopenharmony_ci	if (vdisk_info & VDISK_CDROM)
12068c2ecf20Sopenharmony_ci		gd->flags |= GENHD_FL_CD;
12078c2ecf20Sopenharmony_ci
12088c2ecf20Sopenharmony_ci	return 0;
12098c2ecf20Sopenharmony_ci
12108c2ecf20Sopenharmony_ci release:
12118c2ecf20Sopenharmony_ci	xlbd_release_minors(minor, nr_minors);
12128c2ecf20Sopenharmony_ci out:
12138c2ecf20Sopenharmony_ci	return err;
12148c2ecf20Sopenharmony_ci}
12158c2ecf20Sopenharmony_ci
12168c2ecf20Sopenharmony_cistatic void xlvbd_release_gendisk(struct blkfront_info *info)
12178c2ecf20Sopenharmony_ci{
12188c2ecf20Sopenharmony_ci	unsigned int minor, nr_minors, i;
12198c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo;
12208c2ecf20Sopenharmony_ci
12218c2ecf20Sopenharmony_ci	if (info->rq == NULL)
12228c2ecf20Sopenharmony_ci		return;
12238c2ecf20Sopenharmony_ci
12248c2ecf20Sopenharmony_ci	/* No more blkif_request(). */
12258c2ecf20Sopenharmony_ci	blk_mq_stop_hw_queues(info->rq);
12268c2ecf20Sopenharmony_ci
12278c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, i) {
12288c2ecf20Sopenharmony_ci		/* No more gnttab callback work. */
12298c2ecf20Sopenharmony_ci		gnttab_cancel_free_callback(&rinfo->callback);
12308c2ecf20Sopenharmony_ci
12318c2ecf20Sopenharmony_ci		/* Flush gnttab callback work. Must be done with no locks held. */
12328c2ecf20Sopenharmony_ci		flush_work(&rinfo->work);
12338c2ecf20Sopenharmony_ci	}
12348c2ecf20Sopenharmony_ci
12358c2ecf20Sopenharmony_ci	del_gendisk(info->gd);
12368c2ecf20Sopenharmony_ci
12378c2ecf20Sopenharmony_ci	minor = info->gd->first_minor;
12388c2ecf20Sopenharmony_ci	nr_minors = info->gd->minors;
12398c2ecf20Sopenharmony_ci	xlbd_release_minors(minor, nr_minors);
12408c2ecf20Sopenharmony_ci
12418c2ecf20Sopenharmony_ci	blk_cleanup_queue(info->rq);
12428c2ecf20Sopenharmony_ci	blk_mq_free_tag_set(&info->tag_set);
12438c2ecf20Sopenharmony_ci	info->rq = NULL;
12448c2ecf20Sopenharmony_ci
12458c2ecf20Sopenharmony_ci	put_disk(info->gd);
12468c2ecf20Sopenharmony_ci	info->gd = NULL;
12478c2ecf20Sopenharmony_ci}
12488c2ecf20Sopenharmony_ci
12498c2ecf20Sopenharmony_ci/* Already hold rinfo->ring_lock. */
12508c2ecf20Sopenharmony_cistatic inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo)
12518c2ecf20Sopenharmony_ci{
12528c2ecf20Sopenharmony_ci	if (!RING_FULL(&rinfo->ring))
12538c2ecf20Sopenharmony_ci		blk_mq_start_stopped_hw_queues(rinfo->dev_info->rq, true);
12548c2ecf20Sopenharmony_ci}
12558c2ecf20Sopenharmony_ci
12568c2ecf20Sopenharmony_cistatic void kick_pending_request_queues(struct blkfront_ring_info *rinfo)
12578c2ecf20Sopenharmony_ci{
12588c2ecf20Sopenharmony_ci	unsigned long flags;
12598c2ecf20Sopenharmony_ci
12608c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rinfo->ring_lock, flags);
12618c2ecf20Sopenharmony_ci	kick_pending_request_queues_locked(rinfo);
12628c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
12638c2ecf20Sopenharmony_ci}
12648c2ecf20Sopenharmony_ci
12658c2ecf20Sopenharmony_cistatic void blkif_restart_queue(struct work_struct *work)
12668c2ecf20Sopenharmony_ci{
12678c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo = container_of(work, struct blkfront_ring_info, work);
12688c2ecf20Sopenharmony_ci
12698c2ecf20Sopenharmony_ci	if (rinfo->dev_info->connected == BLKIF_STATE_CONNECTED)
12708c2ecf20Sopenharmony_ci		kick_pending_request_queues(rinfo);
12718c2ecf20Sopenharmony_ci}
12728c2ecf20Sopenharmony_ci
12738c2ecf20Sopenharmony_cistatic void blkif_free_ring(struct blkfront_ring_info *rinfo)
12748c2ecf20Sopenharmony_ci{
12758c2ecf20Sopenharmony_ci	struct grant *persistent_gnt, *n;
12768c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
12778c2ecf20Sopenharmony_ci	int i, j, segs;
12788c2ecf20Sopenharmony_ci
12798c2ecf20Sopenharmony_ci	/*
12808c2ecf20Sopenharmony_ci	 * Remove indirect pages, this only happens when using indirect
12818c2ecf20Sopenharmony_ci	 * descriptors but not persistent grants
12828c2ecf20Sopenharmony_ci	 */
12838c2ecf20Sopenharmony_ci	if (!list_empty(&rinfo->indirect_pages)) {
12848c2ecf20Sopenharmony_ci		struct page *indirect_page, *n;
12858c2ecf20Sopenharmony_ci
12868c2ecf20Sopenharmony_ci		BUG_ON(info->bounce);
12878c2ecf20Sopenharmony_ci		list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
12888c2ecf20Sopenharmony_ci			list_del(&indirect_page->lru);
12898c2ecf20Sopenharmony_ci			__free_page(indirect_page);
12908c2ecf20Sopenharmony_ci		}
12918c2ecf20Sopenharmony_ci	}
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_ci	/* Remove all persistent grants. */
12948c2ecf20Sopenharmony_ci	if (!list_empty(&rinfo->grants)) {
12958c2ecf20Sopenharmony_ci		list_for_each_entry_safe(persistent_gnt, n,
12968c2ecf20Sopenharmony_ci					 &rinfo->grants, node) {
12978c2ecf20Sopenharmony_ci			list_del(&persistent_gnt->node);
12988c2ecf20Sopenharmony_ci			if (persistent_gnt->gref != GRANT_INVALID_REF) {
12998c2ecf20Sopenharmony_ci				gnttab_end_foreign_access(persistent_gnt->gref,
13008c2ecf20Sopenharmony_ci							  0, 0UL);
13018c2ecf20Sopenharmony_ci				rinfo->persistent_gnts_c--;
13028c2ecf20Sopenharmony_ci			}
13038c2ecf20Sopenharmony_ci			if (info->bounce)
13048c2ecf20Sopenharmony_ci				__free_page(persistent_gnt->page);
13058c2ecf20Sopenharmony_ci			kfree(persistent_gnt);
13068c2ecf20Sopenharmony_ci		}
13078c2ecf20Sopenharmony_ci	}
13088c2ecf20Sopenharmony_ci	BUG_ON(rinfo->persistent_gnts_c != 0);
13098c2ecf20Sopenharmony_ci
13108c2ecf20Sopenharmony_ci	for (i = 0; i < BLK_RING_SIZE(info); i++) {
13118c2ecf20Sopenharmony_ci		/*
13128c2ecf20Sopenharmony_ci		 * Clear persistent grants present in requests already
13138c2ecf20Sopenharmony_ci		 * on the shared ring
13148c2ecf20Sopenharmony_ci		 */
13158c2ecf20Sopenharmony_ci		if (!rinfo->shadow[i].request)
13168c2ecf20Sopenharmony_ci			goto free_shadow;
13178c2ecf20Sopenharmony_ci
13188c2ecf20Sopenharmony_ci		segs = rinfo->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
13198c2ecf20Sopenharmony_ci		       rinfo->shadow[i].req.u.indirect.nr_segments :
13208c2ecf20Sopenharmony_ci		       rinfo->shadow[i].req.u.rw.nr_segments;
13218c2ecf20Sopenharmony_ci		for (j = 0; j < segs; j++) {
13228c2ecf20Sopenharmony_ci			persistent_gnt = rinfo->shadow[i].grants_used[j];
13238c2ecf20Sopenharmony_ci			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
13248c2ecf20Sopenharmony_ci			if (info->bounce)
13258c2ecf20Sopenharmony_ci				__free_page(persistent_gnt->page);
13268c2ecf20Sopenharmony_ci			kfree(persistent_gnt);
13278c2ecf20Sopenharmony_ci		}
13288c2ecf20Sopenharmony_ci
13298c2ecf20Sopenharmony_ci		if (rinfo->shadow[i].req.operation != BLKIF_OP_INDIRECT)
13308c2ecf20Sopenharmony_ci			/*
13318c2ecf20Sopenharmony_ci			 * If this is not an indirect operation don't try to
13328c2ecf20Sopenharmony_ci			 * free indirect segments
13338c2ecf20Sopenharmony_ci			 */
13348c2ecf20Sopenharmony_ci			goto free_shadow;
13358c2ecf20Sopenharmony_ci
13368c2ecf20Sopenharmony_ci		for (j = 0; j < INDIRECT_GREFS(segs); j++) {
13378c2ecf20Sopenharmony_ci			persistent_gnt = rinfo->shadow[i].indirect_grants[j];
13388c2ecf20Sopenharmony_ci			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
13398c2ecf20Sopenharmony_ci			__free_page(persistent_gnt->page);
13408c2ecf20Sopenharmony_ci			kfree(persistent_gnt);
13418c2ecf20Sopenharmony_ci		}
13428c2ecf20Sopenharmony_ci
13438c2ecf20Sopenharmony_cifree_shadow:
13448c2ecf20Sopenharmony_ci		kvfree(rinfo->shadow[i].grants_used);
13458c2ecf20Sopenharmony_ci		rinfo->shadow[i].grants_used = NULL;
13468c2ecf20Sopenharmony_ci		kvfree(rinfo->shadow[i].indirect_grants);
13478c2ecf20Sopenharmony_ci		rinfo->shadow[i].indirect_grants = NULL;
13488c2ecf20Sopenharmony_ci		kvfree(rinfo->shadow[i].sg);
13498c2ecf20Sopenharmony_ci		rinfo->shadow[i].sg = NULL;
13508c2ecf20Sopenharmony_ci	}
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci	/* No more gnttab callback work. */
13538c2ecf20Sopenharmony_ci	gnttab_cancel_free_callback(&rinfo->callback);
13548c2ecf20Sopenharmony_ci
13558c2ecf20Sopenharmony_ci	/* Flush gnttab callback work. Must be done with no locks held. */
13568c2ecf20Sopenharmony_ci	flush_work(&rinfo->work);
13578c2ecf20Sopenharmony_ci
13588c2ecf20Sopenharmony_ci	/* Free resources associated with old device channel. */
13598c2ecf20Sopenharmony_ci	for (i = 0; i < info->nr_ring_pages; i++) {
13608c2ecf20Sopenharmony_ci		if (rinfo->ring_ref[i] != GRANT_INVALID_REF) {
13618c2ecf20Sopenharmony_ci			gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0);
13628c2ecf20Sopenharmony_ci			rinfo->ring_ref[i] = GRANT_INVALID_REF;
13638c2ecf20Sopenharmony_ci		}
13648c2ecf20Sopenharmony_ci	}
13658c2ecf20Sopenharmony_ci	free_pages_exact(rinfo->ring.sring,
13668c2ecf20Sopenharmony_ci			 info->nr_ring_pages * XEN_PAGE_SIZE);
13678c2ecf20Sopenharmony_ci	rinfo->ring.sring = NULL;
13688c2ecf20Sopenharmony_ci
13698c2ecf20Sopenharmony_ci	if (rinfo->irq)
13708c2ecf20Sopenharmony_ci		unbind_from_irqhandler(rinfo->irq, rinfo);
13718c2ecf20Sopenharmony_ci	rinfo->evtchn = rinfo->irq = 0;
13728c2ecf20Sopenharmony_ci}
13738c2ecf20Sopenharmony_ci
13748c2ecf20Sopenharmony_cistatic void blkif_free(struct blkfront_info *info, int suspend)
13758c2ecf20Sopenharmony_ci{
13768c2ecf20Sopenharmony_ci	unsigned int i;
13778c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo;
13788c2ecf20Sopenharmony_ci
13798c2ecf20Sopenharmony_ci	/* Prevent new requests being issued until we fix things up. */
13808c2ecf20Sopenharmony_ci	info->connected = suspend ?
13818c2ecf20Sopenharmony_ci		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
13828c2ecf20Sopenharmony_ci	/* No more blkif_request(). */
13838c2ecf20Sopenharmony_ci	if (info->rq)
13848c2ecf20Sopenharmony_ci		blk_mq_stop_hw_queues(info->rq);
13858c2ecf20Sopenharmony_ci
13868c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, i)
13878c2ecf20Sopenharmony_ci		blkif_free_ring(rinfo);
13888c2ecf20Sopenharmony_ci
13898c2ecf20Sopenharmony_ci	kvfree(info->rinfo);
13908c2ecf20Sopenharmony_ci	info->rinfo = NULL;
13918c2ecf20Sopenharmony_ci	info->nr_rings = 0;
13928c2ecf20Sopenharmony_ci}
13938c2ecf20Sopenharmony_ci
13948c2ecf20Sopenharmony_cistruct copy_from_grant {
13958c2ecf20Sopenharmony_ci	const struct blk_shadow *s;
13968c2ecf20Sopenharmony_ci	unsigned int grant_idx;
13978c2ecf20Sopenharmony_ci	unsigned int bvec_offset;
13988c2ecf20Sopenharmony_ci	char *bvec_data;
13998c2ecf20Sopenharmony_ci};
14008c2ecf20Sopenharmony_ci
14018c2ecf20Sopenharmony_cistatic void blkif_copy_from_grant(unsigned long gfn, unsigned int offset,
14028c2ecf20Sopenharmony_ci				  unsigned int len, void *data)
14038c2ecf20Sopenharmony_ci{
14048c2ecf20Sopenharmony_ci	struct copy_from_grant *info = data;
14058c2ecf20Sopenharmony_ci	char *shared_data;
14068c2ecf20Sopenharmony_ci	/* Convenient aliases */
14078c2ecf20Sopenharmony_ci	const struct blk_shadow *s = info->s;
14088c2ecf20Sopenharmony_ci
14098c2ecf20Sopenharmony_ci	shared_data = kmap_atomic(s->grants_used[info->grant_idx]->page);
14108c2ecf20Sopenharmony_ci
14118c2ecf20Sopenharmony_ci	memcpy(info->bvec_data + info->bvec_offset,
14128c2ecf20Sopenharmony_ci	       shared_data + offset, len);
14138c2ecf20Sopenharmony_ci
14148c2ecf20Sopenharmony_ci	info->bvec_offset += len;
14158c2ecf20Sopenharmony_ci	info->grant_idx++;
14168c2ecf20Sopenharmony_ci
14178c2ecf20Sopenharmony_ci	kunmap_atomic(shared_data);
14188c2ecf20Sopenharmony_ci}
14198c2ecf20Sopenharmony_ci
14208c2ecf20Sopenharmony_cistatic enum blk_req_status blkif_rsp_to_req_status(int rsp)
14218c2ecf20Sopenharmony_ci{
14228c2ecf20Sopenharmony_ci	switch (rsp)
14238c2ecf20Sopenharmony_ci	{
14248c2ecf20Sopenharmony_ci	case BLKIF_RSP_OKAY:
14258c2ecf20Sopenharmony_ci		return REQ_DONE;
14268c2ecf20Sopenharmony_ci	case BLKIF_RSP_EOPNOTSUPP:
14278c2ecf20Sopenharmony_ci		return REQ_EOPNOTSUPP;
14288c2ecf20Sopenharmony_ci	case BLKIF_RSP_ERROR:
14298c2ecf20Sopenharmony_ci	default:
14308c2ecf20Sopenharmony_ci		return REQ_ERROR;
14318c2ecf20Sopenharmony_ci	}
14328c2ecf20Sopenharmony_ci}
14338c2ecf20Sopenharmony_ci
14348c2ecf20Sopenharmony_ci/*
14358c2ecf20Sopenharmony_ci * Get the final status of the block request based on two ring response
14368c2ecf20Sopenharmony_ci */
14378c2ecf20Sopenharmony_cistatic int blkif_get_final_status(enum blk_req_status s1,
14388c2ecf20Sopenharmony_ci				  enum blk_req_status s2)
14398c2ecf20Sopenharmony_ci{
14408c2ecf20Sopenharmony_ci	BUG_ON(s1 < REQ_DONE);
14418c2ecf20Sopenharmony_ci	BUG_ON(s2 < REQ_DONE);
14428c2ecf20Sopenharmony_ci
14438c2ecf20Sopenharmony_ci	if (s1 == REQ_ERROR || s2 == REQ_ERROR)
14448c2ecf20Sopenharmony_ci		return BLKIF_RSP_ERROR;
14458c2ecf20Sopenharmony_ci	else if (s1 == REQ_EOPNOTSUPP || s2 == REQ_EOPNOTSUPP)
14468c2ecf20Sopenharmony_ci		return BLKIF_RSP_EOPNOTSUPP;
14478c2ecf20Sopenharmony_ci	return BLKIF_RSP_OKAY;
14488c2ecf20Sopenharmony_ci}
14498c2ecf20Sopenharmony_ci
14508c2ecf20Sopenharmony_ci/*
14518c2ecf20Sopenharmony_ci * Return values:
14528c2ecf20Sopenharmony_ci *  1 response processed.
14538c2ecf20Sopenharmony_ci *  0 missing further responses.
14548c2ecf20Sopenharmony_ci * -1 error while processing.
14558c2ecf20Sopenharmony_ci */
14568c2ecf20Sopenharmony_cistatic int blkif_completion(unsigned long *id,
14578c2ecf20Sopenharmony_ci			    struct blkfront_ring_info *rinfo,
14588c2ecf20Sopenharmony_ci			    struct blkif_response *bret)
14598c2ecf20Sopenharmony_ci{
14608c2ecf20Sopenharmony_ci	int i = 0;
14618c2ecf20Sopenharmony_ci	struct scatterlist *sg;
14628c2ecf20Sopenharmony_ci	int num_sg, num_grant;
14638c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
14648c2ecf20Sopenharmony_ci	struct blk_shadow *s = &rinfo->shadow[*id];
14658c2ecf20Sopenharmony_ci	struct copy_from_grant data = {
14668c2ecf20Sopenharmony_ci		.grant_idx = 0,
14678c2ecf20Sopenharmony_ci	};
14688c2ecf20Sopenharmony_ci
14698c2ecf20Sopenharmony_ci	num_grant = s->req.operation == BLKIF_OP_INDIRECT ?
14708c2ecf20Sopenharmony_ci		s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
14718c2ecf20Sopenharmony_ci
14728c2ecf20Sopenharmony_ci	/* The I/O request may be split in two. */
14738c2ecf20Sopenharmony_ci	if (unlikely(s->associated_id != NO_ASSOCIATED_ID)) {
14748c2ecf20Sopenharmony_ci		struct blk_shadow *s2 = &rinfo->shadow[s->associated_id];
14758c2ecf20Sopenharmony_ci
14768c2ecf20Sopenharmony_ci		/* Keep the status of the current response in shadow. */
14778c2ecf20Sopenharmony_ci		s->status = blkif_rsp_to_req_status(bret->status);
14788c2ecf20Sopenharmony_ci
14798c2ecf20Sopenharmony_ci		/* Wait the second response if not yet here. */
14808c2ecf20Sopenharmony_ci		if (s2->status < REQ_DONE)
14818c2ecf20Sopenharmony_ci			return 0;
14828c2ecf20Sopenharmony_ci
14838c2ecf20Sopenharmony_ci		bret->status = blkif_get_final_status(s->status,
14848c2ecf20Sopenharmony_ci						      s2->status);
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_ci		/*
14878c2ecf20Sopenharmony_ci		 * All the grants is stored in the first shadow in order
14888c2ecf20Sopenharmony_ci		 * to make the completion code simpler.
14898c2ecf20Sopenharmony_ci		 */
14908c2ecf20Sopenharmony_ci		num_grant += s2->req.u.rw.nr_segments;
14918c2ecf20Sopenharmony_ci
14928c2ecf20Sopenharmony_ci		/*
14938c2ecf20Sopenharmony_ci		 * The two responses may not come in order. Only the
14948c2ecf20Sopenharmony_ci		 * first request will store the scatter-gather list.
14958c2ecf20Sopenharmony_ci		 */
14968c2ecf20Sopenharmony_ci		if (s2->num_sg != 0) {
14978c2ecf20Sopenharmony_ci			/* Update "id" with the ID of the first response. */
14988c2ecf20Sopenharmony_ci			*id = s->associated_id;
14998c2ecf20Sopenharmony_ci			s = s2;
15008c2ecf20Sopenharmony_ci		}
15018c2ecf20Sopenharmony_ci
15028c2ecf20Sopenharmony_ci		/*
15038c2ecf20Sopenharmony_ci		 * We don't need anymore the second request, so recycling
15048c2ecf20Sopenharmony_ci		 * it now.
15058c2ecf20Sopenharmony_ci		 */
15068c2ecf20Sopenharmony_ci		if (add_id_to_freelist(rinfo, s->associated_id))
15078c2ecf20Sopenharmony_ci			WARN(1, "%s: can't recycle the second part (id = %ld) of the request\n",
15088c2ecf20Sopenharmony_ci			     info->gd->disk_name, s->associated_id);
15098c2ecf20Sopenharmony_ci	}
15108c2ecf20Sopenharmony_ci
15118c2ecf20Sopenharmony_ci	data.s = s;
15128c2ecf20Sopenharmony_ci	num_sg = s->num_sg;
15138c2ecf20Sopenharmony_ci
15148c2ecf20Sopenharmony_ci	if (bret->operation == BLKIF_OP_READ && info->bounce) {
15158c2ecf20Sopenharmony_ci		for_each_sg(s->sg, sg, num_sg, i) {
15168c2ecf20Sopenharmony_ci			BUG_ON(sg->offset + sg->length > PAGE_SIZE);
15178c2ecf20Sopenharmony_ci
15188c2ecf20Sopenharmony_ci			data.bvec_offset = sg->offset;
15198c2ecf20Sopenharmony_ci			data.bvec_data = kmap_atomic(sg_page(sg));
15208c2ecf20Sopenharmony_ci
15218c2ecf20Sopenharmony_ci			gnttab_foreach_grant_in_range(sg_page(sg),
15228c2ecf20Sopenharmony_ci						      sg->offset,
15238c2ecf20Sopenharmony_ci						      sg->length,
15248c2ecf20Sopenharmony_ci						      blkif_copy_from_grant,
15258c2ecf20Sopenharmony_ci						      &data);
15268c2ecf20Sopenharmony_ci
15278c2ecf20Sopenharmony_ci			kunmap_atomic(data.bvec_data);
15288c2ecf20Sopenharmony_ci		}
15298c2ecf20Sopenharmony_ci	}
15308c2ecf20Sopenharmony_ci	/* Add the persistent grant into the list of free grants */
15318c2ecf20Sopenharmony_ci	for (i = 0; i < num_grant; i++) {
15328c2ecf20Sopenharmony_ci		if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) {
15338c2ecf20Sopenharmony_ci			/*
15348c2ecf20Sopenharmony_ci			 * If the grant is still mapped by the backend (the
15358c2ecf20Sopenharmony_ci			 * backend has chosen to make this grant persistent)
15368c2ecf20Sopenharmony_ci			 * we add it at the head of the list, so it will be
15378c2ecf20Sopenharmony_ci			 * reused first.
15388c2ecf20Sopenharmony_ci			 */
15398c2ecf20Sopenharmony_ci			if (!info->feature_persistent) {
15408c2ecf20Sopenharmony_ci				pr_alert("backed has not unmapped grant: %u\n",
15418c2ecf20Sopenharmony_ci					 s->grants_used[i]->gref);
15428c2ecf20Sopenharmony_ci				return -1;
15438c2ecf20Sopenharmony_ci			}
15448c2ecf20Sopenharmony_ci			list_add(&s->grants_used[i]->node, &rinfo->grants);
15458c2ecf20Sopenharmony_ci			rinfo->persistent_gnts_c++;
15468c2ecf20Sopenharmony_ci		} else {
15478c2ecf20Sopenharmony_ci			/*
15488c2ecf20Sopenharmony_ci			 * If the grant is not mapped by the backend we add it
15498c2ecf20Sopenharmony_ci			 * to the tail of the list, so it will not be picked
15508c2ecf20Sopenharmony_ci			 * again unless we run out of persistent grants.
15518c2ecf20Sopenharmony_ci			 */
15528c2ecf20Sopenharmony_ci			s->grants_used[i]->gref = GRANT_INVALID_REF;
15538c2ecf20Sopenharmony_ci			list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
15548c2ecf20Sopenharmony_ci		}
15558c2ecf20Sopenharmony_ci	}
15568c2ecf20Sopenharmony_ci	if (s->req.operation == BLKIF_OP_INDIRECT) {
15578c2ecf20Sopenharmony_ci		for (i = 0; i < INDIRECT_GREFS(num_grant); i++) {
15588c2ecf20Sopenharmony_ci			if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) {
15598c2ecf20Sopenharmony_ci				if (!info->feature_persistent) {
15608c2ecf20Sopenharmony_ci					pr_alert("backed has not unmapped grant: %u\n",
15618c2ecf20Sopenharmony_ci						 s->indirect_grants[i]->gref);
15628c2ecf20Sopenharmony_ci					return -1;
15638c2ecf20Sopenharmony_ci				}
15648c2ecf20Sopenharmony_ci				list_add(&s->indirect_grants[i]->node, &rinfo->grants);
15658c2ecf20Sopenharmony_ci				rinfo->persistent_gnts_c++;
15668c2ecf20Sopenharmony_ci			} else {
15678c2ecf20Sopenharmony_ci				struct page *indirect_page;
15688c2ecf20Sopenharmony_ci
15698c2ecf20Sopenharmony_ci				/*
15708c2ecf20Sopenharmony_ci				 * Add the used indirect page back to the list of
15718c2ecf20Sopenharmony_ci				 * available pages for indirect grefs.
15728c2ecf20Sopenharmony_ci				 */
15738c2ecf20Sopenharmony_ci				if (!info->bounce) {
15748c2ecf20Sopenharmony_ci					indirect_page = s->indirect_grants[i]->page;
15758c2ecf20Sopenharmony_ci					list_add(&indirect_page->lru, &rinfo->indirect_pages);
15768c2ecf20Sopenharmony_ci				}
15778c2ecf20Sopenharmony_ci				s->indirect_grants[i]->gref = GRANT_INVALID_REF;
15788c2ecf20Sopenharmony_ci				list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants);
15798c2ecf20Sopenharmony_ci			}
15808c2ecf20Sopenharmony_ci		}
15818c2ecf20Sopenharmony_ci	}
15828c2ecf20Sopenharmony_ci
15838c2ecf20Sopenharmony_ci	return 1;
15848c2ecf20Sopenharmony_ci}
15858c2ecf20Sopenharmony_ci
15868c2ecf20Sopenharmony_cistatic irqreturn_t blkif_interrupt(int irq, void *dev_id)
15878c2ecf20Sopenharmony_ci{
15888c2ecf20Sopenharmony_ci	struct request *req;
15898c2ecf20Sopenharmony_ci	struct blkif_response bret;
15908c2ecf20Sopenharmony_ci	RING_IDX i, rp;
15918c2ecf20Sopenharmony_ci	unsigned long flags;
15928c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
15938c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
15948c2ecf20Sopenharmony_ci	unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS;
15958c2ecf20Sopenharmony_ci
15968c2ecf20Sopenharmony_ci	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
15978c2ecf20Sopenharmony_ci		xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
15988c2ecf20Sopenharmony_ci		return IRQ_HANDLED;
15998c2ecf20Sopenharmony_ci	}
16008c2ecf20Sopenharmony_ci
16018c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rinfo->ring_lock, flags);
16028c2ecf20Sopenharmony_ci again:
16038c2ecf20Sopenharmony_ci	rp = READ_ONCE(rinfo->ring.sring->rsp_prod);
16048c2ecf20Sopenharmony_ci	virt_rmb(); /* Ensure we see queued responses up to 'rp'. */
16058c2ecf20Sopenharmony_ci	if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) {
16068c2ecf20Sopenharmony_ci		pr_alert("%s: illegal number of responses %u\n",
16078c2ecf20Sopenharmony_ci			 info->gd->disk_name, rp - rinfo->ring.rsp_cons);
16088c2ecf20Sopenharmony_ci		goto err;
16098c2ecf20Sopenharmony_ci	}
16108c2ecf20Sopenharmony_ci
16118c2ecf20Sopenharmony_ci	for (i = rinfo->ring.rsp_cons; i != rp; i++) {
16128c2ecf20Sopenharmony_ci		unsigned long id;
16138c2ecf20Sopenharmony_ci		unsigned int op;
16148c2ecf20Sopenharmony_ci
16158c2ecf20Sopenharmony_ci		eoiflag = 0;
16168c2ecf20Sopenharmony_ci
16178c2ecf20Sopenharmony_ci		RING_COPY_RESPONSE(&rinfo->ring, i, &bret);
16188c2ecf20Sopenharmony_ci		id = bret.id;
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci		/*
16218c2ecf20Sopenharmony_ci		 * The backend has messed up and given us an id that we would
16228c2ecf20Sopenharmony_ci		 * never have given to it (we stamp it up to BLK_RING_SIZE -
16238c2ecf20Sopenharmony_ci		 * look in get_id_from_freelist.
16248c2ecf20Sopenharmony_ci		 */
16258c2ecf20Sopenharmony_ci		if (id >= BLK_RING_SIZE(info)) {
16268c2ecf20Sopenharmony_ci			pr_alert("%s: response has incorrect id (%ld)\n",
16278c2ecf20Sopenharmony_ci				 info->gd->disk_name, id);
16288c2ecf20Sopenharmony_ci			goto err;
16298c2ecf20Sopenharmony_ci		}
16308c2ecf20Sopenharmony_ci		if (rinfo->shadow[id].status != REQ_WAITING) {
16318c2ecf20Sopenharmony_ci			pr_alert("%s: response references no pending request\n",
16328c2ecf20Sopenharmony_ci				 info->gd->disk_name);
16338c2ecf20Sopenharmony_ci			goto err;
16348c2ecf20Sopenharmony_ci		}
16358c2ecf20Sopenharmony_ci
16368c2ecf20Sopenharmony_ci		rinfo->shadow[id].status = REQ_PROCESSING;
16378c2ecf20Sopenharmony_ci		req  = rinfo->shadow[id].request;
16388c2ecf20Sopenharmony_ci
16398c2ecf20Sopenharmony_ci		op = rinfo->shadow[id].req.operation;
16408c2ecf20Sopenharmony_ci		if (op == BLKIF_OP_INDIRECT)
16418c2ecf20Sopenharmony_ci			op = rinfo->shadow[id].req.u.indirect.indirect_op;
16428c2ecf20Sopenharmony_ci		if (bret.operation != op) {
16438c2ecf20Sopenharmony_ci			pr_alert("%s: response has wrong operation (%u instead of %u)\n",
16448c2ecf20Sopenharmony_ci				 info->gd->disk_name, bret.operation, op);
16458c2ecf20Sopenharmony_ci			goto err;
16468c2ecf20Sopenharmony_ci		}
16478c2ecf20Sopenharmony_ci
16488c2ecf20Sopenharmony_ci		if (bret.operation != BLKIF_OP_DISCARD) {
16498c2ecf20Sopenharmony_ci			int ret;
16508c2ecf20Sopenharmony_ci
16518c2ecf20Sopenharmony_ci			/*
16528c2ecf20Sopenharmony_ci			 * We may need to wait for an extra response if the
16538c2ecf20Sopenharmony_ci			 * I/O request is split in 2
16548c2ecf20Sopenharmony_ci			 */
16558c2ecf20Sopenharmony_ci			ret = blkif_completion(&id, rinfo, &bret);
16568c2ecf20Sopenharmony_ci			if (!ret)
16578c2ecf20Sopenharmony_ci				continue;
16588c2ecf20Sopenharmony_ci			if (unlikely(ret < 0))
16598c2ecf20Sopenharmony_ci				goto err;
16608c2ecf20Sopenharmony_ci		}
16618c2ecf20Sopenharmony_ci
16628c2ecf20Sopenharmony_ci		if (add_id_to_freelist(rinfo, id)) {
16638c2ecf20Sopenharmony_ci			WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
16648c2ecf20Sopenharmony_ci			     info->gd->disk_name, op_name(bret.operation), id);
16658c2ecf20Sopenharmony_ci			continue;
16668c2ecf20Sopenharmony_ci		}
16678c2ecf20Sopenharmony_ci
16688c2ecf20Sopenharmony_ci		if (bret.status == BLKIF_RSP_OKAY)
16698c2ecf20Sopenharmony_ci			blkif_req(req)->error = BLK_STS_OK;
16708c2ecf20Sopenharmony_ci		else
16718c2ecf20Sopenharmony_ci			blkif_req(req)->error = BLK_STS_IOERR;
16728c2ecf20Sopenharmony_ci
16738c2ecf20Sopenharmony_ci		switch (bret.operation) {
16748c2ecf20Sopenharmony_ci		case BLKIF_OP_DISCARD:
16758c2ecf20Sopenharmony_ci			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
16768c2ecf20Sopenharmony_ci				struct request_queue *rq = info->rq;
16778c2ecf20Sopenharmony_ci
16788c2ecf20Sopenharmony_ci				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
16798c2ecf20Sopenharmony_ci					   info->gd->disk_name, op_name(bret.operation));
16808c2ecf20Sopenharmony_ci				blkif_req(req)->error = BLK_STS_NOTSUPP;
16818c2ecf20Sopenharmony_ci				info->feature_discard = 0;
16828c2ecf20Sopenharmony_ci				info->feature_secdiscard = 0;
16838c2ecf20Sopenharmony_ci				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
16848c2ecf20Sopenharmony_ci				blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq);
16858c2ecf20Sopenharmony_ci			}
16868c2ecf20Sopenharmony_ci			break;
16878c2ecf20Sopenharmony_ci		case BLKIF_OP_FLUSH_DISKCACHE:
16888c2ecf20Sopenharmony_ci		case BLKIF_OP_WRITE_BARRIER:
16898c2ecf20Sopenharmony_ci			if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) {
16908c2ecf20Sopenharmony_ci				pr_warn_ratelimited("blkfront: %s: %s op failed\n",
16918c2ecf20Sopenharmony_ci				       info->gd->disk_name, op_name(bret.operation));
16928c2ecf20Sopenharmony_ci				blkif_req(req)->error = BLK_STS_NOTSUPP;
16938c2ecf20Sopenharmony_ci			}
16948c2ecf20Sopenharmony_ci			if (unlikely(bret.status == BLKIF_RSP_ERROR &&
16958c2ecf20Sopenharmony_ci				     rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
16968c2ecf20Sopenharmony_ci				pr_warn_ratelimited("blkfront: %s: empty %s op failed\n",
16978c2ecf20Sopenharmony_ci				       info->gd->disk_name, op_name(bret.operation));
16988c2ecf20Sopenharmony_ci				blkif_req(req)->error = BLK_STS_NOTSUPP;
16998c2ecf20Sopenharmony_ci			}
17008c2ecf20Sopenharmony_ci			if (unlikely(blkif_req(req)->error)) {
17018c2ecf20Sopenharmony_ci				if (blkif_req(req)->error == BLK_STS_NOTSUPP)
17028c2ecf20Sopenharmony_ci					blkif_req(req)->error = BLK_STS_OK;
17038c2ecf20Sopenharmony_ci				info->feature_fua = 0;
17048c2ecf20Sopenharmony_ci				info->feature_flush = 0;
17058c2ecf20Sopenharmony_ci				xlvbd_flush(info);
17068c2ecf20Sopenharmony_ci			}
17078c2ecf20Sopenharmony_ci			fallthrough;
17088c2ecf20Sopenharmony_ci		case BLKIF_OP_READ:
17098c2ecf20Sopenharmony_ci		case BLKIF_OP_WRITE:
17108c2ecf20Sopenharmony_ci			if (unlikely(bret.status != BLKIF_RSP_OKAY))
17118c2ecf20Sopenharmony_ci				dev_dbg_ratelimited(&info->xbdev->dev,
17128c2ecf20Sopenharmony_ci					"Bad return from blkdev data request: %#x\n",
17138c2ecf20Sopenharmony_ci					bret.status);
17148c2ecf20Sopenharmony_ci
17158c2ecf20Sopenharmony_ci			break;
17168c2ecf20Sopenharmony_ci		default:
17178c2ecf20Sopenharmony_ci			BUG();
17188c2ecf20Sopenharmony_ci		}
17198c2ecf20Sopenharmony_ci
17208c2ecf20Sopenharmony_ci		if (likely(!blk_should_fake_timeout(req->q)))
17218c2ecf20Sopenharmony_ci			blk_mq_complete_request(req);
17228c2ecf20Sopenharmony_ci	}
17238c2ecf20Sopenharmony_ci
17248c2ecf20Sopenharmony_ci	rinfo->ring.rsp_cons = i;
17258c2ecf20Sopenharmony_ci
17268c2ecf20Sopenharmony_ci	if (i != rinfo->ring.req_prod_pvt) {
17278c2ecf20Sopenharmony_ci		int more_to_do;
17288c2ecf20Sopenharmony_ci		RING_FINAL_CHECK_FOR_RESPONSES(&rinfo->ring, more_to_do);
17298c2ecf20Sopenharmony_ci		if (more_to_do)
17308c2ecf20Sopenharmony_ci			goto again;
17318c2ecf20Sopenharmony_ci	} else
17328c2ecf20Sopenharmony_ci		rinfo->ring.sring->rsp_event = i + 1;
17338c2ecf20Sopenharmony_ci
17348c2ecf20Sopenharmony_ci	kick_pending_request_queues_locked(rinfo);
17358c2ecf20Sopenharmony_ci
17368c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
17378c2ecf20Sopenharmony_ci
17388c2ecf20Sopenharmony_ci	xen_irq_lateeoi(irq, eoiflag);
17398c2ecf20Sopenharmony_ci
17408c2ecf20Sopenharmony_ci	return IRQ_HANDLED;
17418c2ecf20Sopenharmony_ci
17428c2ecf20Sopenharmony_ci err:
17438c2ecf20Sopenharmony_ci	info->connected = BLKIF_STATE_ERROR;
17448c2ecf20Sopenharmony_ci
17458c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rinfo->ring_lock, flags);
17468c2ecf20Sopenharmony_ci
17478c2ecf20Sopenharmony_ci	/* No EOI in order to avoid further interrupts. */
17488c2ecf20Sopenharmony_ci
17498c2ecf20Sopenharmony_ci	pr_alert("%s disabled for further use\n", info->gd->disk_name);
17508c2ecf20Sopenharmony_ci	return IRQ_HANDLED;
17518c2ecf20Sopenharmony_ci}
17528c2ecf20Sopenharmony_ci
17538c2ecf20Sopenharmony_ci
17548c2ecf20Sopenharmony_cistatic int setup_blkring(struct xenbus_device *dev,
17558c2ecf20Sopenharmony_ci			 struct blkfront_ring_info *rinfo)
17568c2ecf20Sopenharmony_ci{
17578c2ecf20Sopenharmony_ci	struct blkif_sring *sring;
17588c2ecf20Sopenharmony_ci	int err, i;
17598c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
17608c2ecf20Sopenharmony_ci	unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE;
17618c2ecf20Sopenharmony_ci	grant_ref_t gref[XENBUS_MAX_RING_GRANTS];
17628c2ecf20Sopenharmony_ci
17638c2ecf20Sopenharmony_ci	for (i = 0; i < info->nr_ring_pages; i++)
17648c2ecf20Sopenharmony_ci		rinfo->ring_ref[i] = GRANT_INVALID_REF;
17658c2ecf20Sopenharmony_ci
17668c2ecf20Sopenharmony_ci	sring = alloc_pages_exact(ring_size, GFP_NOIO | __GFP_ZERO);
17678c2ecf20Sopenharmony_ci	if (!sring) {
17688c2ecf20Sopenharmony_ci		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
17698c2ecf20Sopenharmony_ci		return -ENOMEM;
17708c2ecf20Sopenharmony_ci	}
17718c2ecf20Sopenharmony_ci	SHARED_RING_INIT(sring);
17728c2ecf20Sopenharmony_ci	FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
17738c2ecf20Sopenharmony_ci
17748c2ecf20Sopenharmony_ci	err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
17758c2ecf20Sopenharmony_ci	if (err < 0) {
17768c2ecf20Sopenharmony_ci		free_pages_exact(sring, ring_size);
17778c2ecf20Sopenharmony_ci		rinfo->ring.sring = NULL;
17788c2ecf20Sopenharmony_ci		goto fail;
17798c2ecf20Sopenharmony_ci	}
17808c2ecf20Sopenharmony_ci	for (i = 0; i < info->nr_ring_pages; i++)
17818c2ecf20Sopenharmony_ci		rinfo->ring_ref[i] = gref[i];
17828c2ecf20Sopenharmony_ci
17838c2ecf20Sopenharmony_ci	err = xenbus_alloc_evtchn(dev, &rinfo->evtchn);
17848c2ecf20Sopenharmony_ci	if (err)
17858c2ecf20Sopenharmony_ci		goto fail;
17868c2ecf20Sopenharmony_ci
17878c2ecf20Sopenharmony_ci	err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt,
17888c2ecf20Sopenharmony_ci						0, "blkif", rinfo);
17898c2ecf20Sopenharmony_ci	if (err <= 0) {
17908c2ecf20Sopenharmony_ci		xenbus_dev_fatal(dev, err,
17918c2ecf20Sopenharmony_ci				 "bind_evtchn_to_irqhandler failed");
17928c2ecf20Sopenharmony_ci		goto fail;
17938c2ecf20Sopenharmony_ci	}
17948c2ecf20Sopenharmony_ci	rinfo->irq = err;
17958c2ecf20Sopenharmony_ci
17968c2ecf20Sopenharmony_ci	return 0;
17978c2ecf20Sopenharmony_cifail:
17988c2ecf20Sopenharmony_ci	blkif_free(info, 0);
17998c2ecf20Sopenharmony_ci	return err;
18008c2ecf20Sopenharmony_ci}
18018c2ecf20Sopenharmony_ci
18028c2ecf20Sopenharmony_ci/*
18038c2ecf20Sopenharmony_ci * Write out per-ring/queue nodes including ring-ref and event-channel, and each
18048c2ecf20Sopenharmony_ci * ring buffer may have multi pages depending on ->nr_ring_pages.
18058c2ecf20Sopenharmony_ci */
18068c2ecf20Sopenharmony_cistatic int write_per_ring_nodes(struct xenbus_transaction xbt,
18078c2ecf20Sopenharmony_ci				struct blkfront_ring_info *rinfo, const char *dir)
18088c2ecf20Sopenharmony_ci{
18098c2ecf20Sopenharmony_ci	int err;
18108c2ecf20Sopenharmony_ci	unsigned int i;
18118c2ecf20Sopenharmony_ci	const char *message = NULL;
18128c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
18138c2ecf20Sopenharmony_ci
18148c2ecf20Sopenharmony_ci	if (info->nr_ring_pages == 1) {
18158c2ecf20Sopenharmony_ci		err = xenbus_printf(xbt, dir, "ring-ref", "%u", rinfo->ring_ref[0]);
18168c2ecf20Sopenharmony_ci		if (err) {
18178c2ecf20Sopenharmony_ci			message = "writing ring-ref";
18188c2ecf20Sopenharmony_ci			goto abort_transaction;
18198c2ecf20Sopenharmony_ci		}
18208c2ecf20Sopenharmony_ci	} else {
18218c2ecf20Sopenharmony_ci		for (i = 0; i < info->nr_ring_pages; i++) {
18228c2ecf20Sopenharmony_ci			char ring_ref_name[RINGREF_NAME_LEN];
18238c2ecf20Sopenharmony_ci
18248c2ecf20Sopenharmony_ci			snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
18258c2ecf20Sopenharmony_ci			err = xenbus_printf(xbt, dir, ring_ref_name,
18268c2ecf20Sopenharmony_ci					    "%u", rinfo->ring_ref[i]);
18278c2ecf20Sopenharmony_ci			if (err) {
18288c2ecf20Sopenharmony_ci				message = "writing ring-ref";
18298c2ecf20Sopenharmony_ci				goto abort_transaction;
18308c2ecf20Sopenharmony_ci			}
18318c2ecf20Sopenharmony_ci		}
18328c2ecf20Sopenharmony_ci	}
18338c2ecf20Sopenharmony_ci
18348c2ecf20Sopenharmony_ci	err = xenbus_printf(xbt, dir, "event-channel", "%u", rinfo->evtchn);
18358c2ecf20Sopenharmony_ci	if (err) {
18368c2ecf20Sopenharmony_ci		message = "writing event-channel";
18378c2ecf20Sopenharmony_ci		goto abort_transaction;
18388c2ecf20Sopenharmony_ci	}
18398c2ecf20Sopenharmony_ci
18408c2ecf20Sopenharmony_ci	return 0;
18418c2ecf20Sopenharmony_ci
18428c2ecf20Sopenharmony_ciabort_transaction:
18438c2ecf20Sopenharmony_ci	xenbus_transaction_end(xbt, 1);
18448c2ecf20Sopenharmony_ci	if (message)
18458c2ecf20Sopenharmony_ci		xenbus_dev_fatal(info->xbdev, err, "%s", message);
18468c2ecf20Sopenharmony_ci
18478c2ecf20Sopenharmony_ci	return err;
18488c2ecf20Sopenharmony_ci}
18498c2ecf20Sopenharmony_ci
18508c2ecf20Sopenharmony_cistatic void free_info(struct blkfront_info *info)
18518c2ecf20Sopenharmony_ci{
18528c2ecf20Sopenharmony_ci	list_del(&info->info_list);
18538c2ecf20Sopenharmony_ci	kfree(info);
18548c2ecf20Sopenharmony_ci}
18558c2ecf20Sopenharmony_ci
18568c2ecf20Sopenharmony_ci/* Enable the persistent grants feature. */
18578c2ecf20Sopenharmony_cistatic bool feature_persistent = true;
18588c2ecf20Sopenharmony_cimodule_param(feature_persistent, bool, 0644);
18598c2ecf20Sopenharmony_ciMODULE_PARM_DESC(feature_persistent,
18608c2ecf20Sopenharmony_ci		"Enables the persistent grants feature");
18618c2ecf20Sopenharmony_ci
18628c2ecf20Sopenharmony_ci/* Common code used when first setting up, and when resuming. */
18638c2ecf20Sopenharmony_cistatic int talk_to_blkback(struct xenbus_device *dev,
18648c2ecf20Sopenharmony_ci			   struct blkfront_info *info)
18658c2ecf20Sopenharmony_ci{
18668c2ecf20Sopenharmony_ci	const char *message = NULL;
18678c2ecf20Sopenharmony_ci	struct xenbus_transaction xbt;
18688c2ecf20Sopenharmony_ci	int err;
18698c2ecf20Sopenharmony_ci	unsigned int i, max_page_order;
18708c2ecf20Sopenharmony_ci	unsigned int ring_page_order;
18718c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo;
18728c2ecf20Sopenharmony_ci
18738c2ecf20Sopenharmony_ci	if (!info)
18748c2ecf20Sopenharmony_ci		return -ENODEV;
18758c2ecf20Sopenharmony_ci
18768c2ecf20Sopenharmony_ci	/* Check if backend is trusted. */
18778c2ecf20Sopenharmony_ci	info->bounce = !xen_blkif_trusted ||
18788c2ecf20Sopenharmony_ci		       !xenbus_read_unsigned(dev->nodename, "trusted", 1);
18798c2ecf20Sopenharmony_ci
18808c2ecf20Sopenharmony_ci	max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
18818c2ecf20Sopenharmony_ci					      "max-ring-page-order", 0);
18828c2ecf20Sopenharmony_ci	ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
18838c2ecf20Sopenharmony_ci	info->nr_ring_pages = 1 << ring_page_order;
18848c2ecf20Sopenharmony_ci
18858c2ecf20Sopenharmony_ci	err = negotiate_mq(info);
18868c2ecf20Sopenharmony_ci	if (err)
18878c2ecf20Sopenharmony_ci		goto destroy_blkring;
18888c2ecf20Sopenharmony_ci
18898c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, i) {
18908c2ecf20Sopenharmony_ci		/* Create shared ring, alloc event channel. */
18918c2ecf20Sopenharmony_ci		err = setup_blkring(dev, rinfo);
18928c2ecf20Sopenharmony_ci		if (err)
18938c2ecf20Sopenharmony_ci			goto destroy_blkring;
18948c2ecf20Sopenharmony_ci	}
18958c2ecf20Sopenharmony_ci
18968c2ecf20Sopenharmony_ciagain:
18978c2ecf20Sopenharmony_ci	err = xenbus_transaction_start(&xbt);
18988c2ecf20Sopenharmony_ci	if (err) {
18998c2ecf20Sopenharmony_ci		xenbus_dev_fatal(dev, err, "starting transaction");
19008c2ecf20Sopenharmony_ci		goto destroy_blkring;
19018c2ecf20Sopenharmony_ci	}
19028c2ecf20Sopenharmony_ci
19038c2ecf20Sopenharmony_ci	if (info->nr_ring_pages > 1) {
19048c2ecf20Sopenharmony_ci		err = xenbus_printf(xbt, dev->nodename, "ring-page-order", "%u",
19058c2ecf20Sopenharmony_ci				    ring_page_order);
19068c2ecf20Sopenharmony_ci		if (err) {
19078c2ecf20Sopenharmony_ci			message = "writing ring-page-order";
19088c2ecf20Sopenharmony_ci			goto abort_transaction;
19098c2ecf20Sopenharmony_ci		}
19108c2ecf20Sopenharmony_ci	}
19118c2ecf20Sopenharmony_ci
19128c2ecf20Sopenharmony_ci	/* We already got the number of queues/rings in _probe */
19138c2ecf20Sopenharmony_ci	if (info->nr_rings == 1) {
19148c2ecf20Sopenharmony_ci		err = write_per_ring_nodes(xbt, info->rinfo, dev->nodename);
19158c2ecf20Sopenharmony_ci		if (err)
19168c2ecf20Sopenharmony_ci			goto destroy_blkring;
19178c2ecf20Sopenharmony_ci	} else {
19188c2ecf20Sopenharmony_ci		char *path;
19198c2ecf20Sopenharmony_ci		size_t pathsize;
19208c2ecf20Sopenharmony_ci
19218c2ecf20Sopenharmony_ci		err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues", "%u",
19228c2ecf20Sopenharmony_ci				    info->nr_rings);
19238c2ecf20Sopenharmony_ci		if (err) {
19248c2ecf20Sopenharmony_ci			message = "writing multi-queue-num-queues";
19258c2ecf20Sopenharmony_ci			goto abort_transaction;
19268c2ecf20Sopenharmony_ci		}
19278c2ecf20Sopenharmony_ci
19288c2ecf20Sopenharmony_ci		pathsize = strlen(dev->nodename) + QUEUE_NAME_LEN;
19298c2ecf20Sopenharmony_ci		path = kmalloc(pathsize, GFP_KERNEL);
19308c2ecf20Sopenharmony_ci		if (!path) {
19318c2ecf20Sopenharmony_ci			err = -ENOMEM;
19328c2ecf20Sopenharmony_ci			message = "ENOMEM while writing ring references";
19338c2ecf20Sopenharmony_ci			goto abort_transaction;
19348c2ecf20Sopenharmony_ci		}
19358c2ecf20Sopenharmony_ci
19368c2ecf20Sopenharmony_ci		for_each_rinfo(info, rinfo, i) {
19378c2ecf20Sopenharmony_ci			memset(path, 0, pathsize);
19388c2ecf20Sopenharmony_ci			snprintf(path, pathsize, "%s/queue-%u", dev->nodename, i);
19398c2ecf20Sopenharmony_ci			err = write_per_ring_nodes(xbt, rinfo, path);
19408c2ecf20Sopenharmony_ci			if (err) {
19418c2ecf20Sopenharmony_ci				kfree(path);
19428c2ecf20Sopenharmony_ci				goto destroy_blkring;
19438c2ecf20Sopenharmony_ci			}
19448c2ecf20Sopenharmony_ci		}
19458c2ecf20Sopenharmony_ci		kfree(path);
19468c2ecf20Sopenharmony_ci	}
19478c2ecf20Sopenharmony_ci	err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
19488c2ecf20Sopenharmony_ci			    XEN_IO_PROTO_ABI_NATIVE);
19498c2ecf20Sopenharmony_ci	if (err) {
19508c2ecf20Sopenharmony_ci		message = "writing protocol";
19518c2ecf20Sopenharmony_ci		goto abort_transaction;
19528c2ecf20Sopenharmony_ci	}
19538c2ecf20Sopenharmony_ci	info->feature_persistent_parm = feature_persistent;
19548c2ecf20Sopenharmony_ci	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u",
19558c2ecf20Sopenharmony_ci			info->feature_persistent_parm);
19568c2ecf20Sopenharmony_ci	if (err)
19578c2ecf20Sopenharmony_ci		dev_warn(&dev->dev,
19588c2ecf20Sopenharmony_ci			 "writing persistent grants feature to xenbus");
19598c2ecf20Sopenharmony_ci
19608c2ecf20Sopenharmony_ci	err = xenbus_transaction_end(xbt, 0);
19618c2ecf20Sopenharmony_ci	if (err) {
19628c2ecf20Sopenharmony_ci		if (err == -EAGAIN)
19638c2ecf20Sopenharmony_ci			goto again;
19648c2ecf20Sopenharmony_ci		xenbus_dev_fatal(dev, err, "completing transaction");
19658c2ecf20Sopenharmony_ci		goto destroy_blkring;
19668c2ecf20Sopenharmony_ci	}
19678c2ecf20Sopenharmony_ci
19688c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, i) {
19698c2ecf20Sopenharmony_ci		unsigned int j;
19708c2ecf20Sopenharmony_ci
19718c2ecf20Sopenharmony_ci		for (j = 0; j < BLK_RING_SIZE(info); j++)
19728c2ecf20Sopenharmony_ci			rinfo->shadow[j].req.u.rw.id = j + 1;
19738c2ecf20Sopenharmony_ci		rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
19748c2ecf20Sopenharmony_ci	}
19758c2ecf20Sopenharmony_ci	xenbus_switch_state(dev, XenbusStateInitialised);
19768c2ecf20Sopenharmony_ci
19778c2ecf20Sopenharmony_ci	return 0;
19788c2ecf20Sopenharmony_ci
19798c2ecf20Sopenharmony_ci abort_transaction:
19808c2ecf20Sopenharmony_ci	xenbus_transaction_end(xbt, 1);
19818c2ecf20Sopenharmony_ci	if (message)
19828c2ecf20Sopenharmony_ci		xenbus_dev_fatal(dev, err, "%s", message);
19838c2ecf20Sopenharmony_ci destroy_blkring:
19848c2ecf20Sopenharmony_ci	blkif_free(info, 0);
19858c2ecf20Sopenharmony_ci
19868c2ecf20Sopenharmony_ci	mutex_lock(&blkfront_mutex);
19878c2ecf20Sopenharmony_ci	free_info(info);
19888c2ecf20Sopenharmony_ci	mutex_unlock(&blkfront_mutex);
19898c2ecf20Sopenharmony_ci
19908c2ecf20Sopenharmony_ci	dev_set_drvdata(&dev->dev, NULL);
19918c2ecf20Sopenharmony_ci
19928c2ecf20Sopenharmony_ci	return err;
19938c2ecf20Sopenharmony_ci}
19948c2ecf20Sopenharmony_ci
19958c2ecf20Sopenharmony_cistatic int negotiate_mq(struct blkfront_info *info)
19968c2ecf20Sopenharmony_ci{
19978c2ecf20Sopenharmony_ci	unsigned int backend_max_queues;
19988c2ecf20Sopenharmony_ci	unsigned int i;
19998c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo;
20008c2ecf20Sopenharmony_ci
20018c2ecf20Sopenharmony_ci	BUG_ON(info->nr_rings);
20028c2ecf20Sopenharmony_ci
20038c2ecf20Sopenharmony_ci	/* Check if backend supports multiple queues. */
20048c2ecf20Sopenharmony_ci	backend_max_queues = xenbus_read_unsigned(info->xbdev->otherend,
20058c2ecf20Sopenharmony_ci						  "multi-queue-max-queues", 1);
20068c2ecf20Sopenharmony_ci	info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
20078c2ecf20Sopenharmony_ci	/* We need at least one ring. */
20088c2ecf20Sopenharmony_ci	if (!info->nr_rings)
20098c2ecf20Sopenharmony_ci		info->nr_rings = 1;
20108c2ecf20Sopenharmony_ci
20118c2ecf20Sopenharmony_ci	info->rinfo_size = struct_size(info->rinfo, shadow,
20128c2ecf20Sopenharmony_ci				       BLK_RING_SIZE(info));
20138c2ecf20Sopenharmony_ci	info->rinfo = kvcalloc(info->nr_rings, info->rinfo_size, GFP_KERNEL);
20148c2ecf20Sopenharmony_ci	if (!info->rinfo) {
20158c2ecf20Sopenharmony_ci		xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");
20168c2ecf20Sopenharmony_ci		info->nr_rings = 0;
20178c2ecf20Sopenharmony_ci		return -ENOMEM;
20188c2ecf20Sopenharmony_ci	}
20198c2ecf20Sopenharmony_ci
20208c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, i) {
20218c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&rinfo->indirect_pages);
20228c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&rinfo->grants);
20238c2ecf20Sopenharmony_ci		rinfo->dev_info = info;
20248c2ecf20Sopenharmony_ci		INIT_WORK(&rinfo->work, blkif_restart_queue);
20258c2ecf20Sopenharmony_ci		spin_lock_init(&rinfo->ring_lock);
20268c2ecf20Sopenharmony_ci	}
20278c2ecf20Sopenharmony_ci	return 0;
20288c2ecf20Sopenharmony_ci}
20298c2ecf20Sopenharmony_ci
20308c2ecf20Sopenharmony_ci/**
20318c2ecf20Sopenharmony_ci * Entry point to this code when a new device is created.  Allocate the basic
20328c2ecf20Sopenharmony_ci * structures and the ring buffer for communication with the backend, and
20338c2ecf20Sopenharmony_ci * inform the backend of the appropriate details for those.  Switch to
20348c2ecf20Sopenharmony_ci * Initialised state.
20358c2ecf20Sopenharmony_ci */
20368c2ecf20Sopenharmony_cistatic int blkfront_probe(struct xenbus_device *dev,
20378c2ecf20Sopenharmony_ci			  const struct xenbus_device_id *id)
20388c2ecf20Sopenharmony_ci{
20398c2ecf20Sopenharmony_ci	int err, vdevice;
20408c2ecf20Sopenharmony_ci	struct blkfront_info *info;
20418c2ecf20Sopenharmony_ci
20428c2ecf20Sopenharmony_ci	/* FIXME: Use dynamic device id if this is not set. */
20438c2ecf20Sopenharmony_ci	err = xenbus_scanf(XBT_NIL, dev->nodename,
20448c2ecf20Sopenharmony_ci			   "virtual-device", "%i", &vdevice);
20458c2ecf20Sopenharmony_ci	if (err != 1) {
20468c2ecf20Sopenharmony_ci		/* go looking in the extended area instead */
20478c2ecf20Sopenharmony_ci		err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
20488c2ecf20Sopenharmony_ci				   "%i", &vdevice);
20498c2ecf20Sopenharmony_ci		if (err != 1) {
20508c2ecf20Sopenharmony_ci			xenbus_dev_fatal(dev, err, "reading virtual-device");
20518c2ecf20Sopenharmony_ci			return err;
20528c2ecf20Sopenharmony_ci		}
20538c2ecf20Sopenharmony_ci	}
20548c2ecf20Sopenharmony_ci
20558c2ecf20Sopenharmony_ci	if (xen_hvm_domain()) {
20568c2ecf20Sopenharmony_ci		char *type;
20578c2ecf20Sopenharmony_ci		int len;
20588c2ecf20Sopenharmony_ci		/* no unplug has been done: do not hook devices != xen vbds */
20598c2ecf20Sopenharmony_ci		if (xen_has_pv_and_legacy_disk_devices()) {
20608c2ecf20Sopenharmony_ci			int major;
20618c2ecf20Sopenharmony_ci
20628c2ecf20Sopenharmony_ci			if (!VDEV_IS_EXTENDED(vdevice))
20638c2ecf20Sopenharmony_ci				major = BLKIF_MAJOR(vdevice);
20648c2ecf20Sopenharmony_ci			else
20658c2ecf20Sopenharmony_ci				major = XENVBD_MAJOR;
20668c2ecf20Sopenharmony_ci
20678c2ecf20Sopenharmony_ci			if (major != XENVBD_MAJOR) {
20688c2ecf20Sopenharmony_ci				printk(KERN_INFO
20698c2ecf20Sopenharmony_ci						"%s: HVM does not support vbd %d as xen block device\n",
20708c2ecf20Sopenharmony_ci						__func__, vdevice);
20718c2ecf20Sopenharmony_ci				return -ENODEV;
20728c2ecf20Sopenharmony_ci			}
20738c2ecf20Sopenharmony_ci		}
20748c2ecf20Sopenharmony_ci		/* do not create a PV cdrom device if we are an HVM guest */
20758c2ecf20Sopenharmony_ci		type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
20768c2ecf20Sopenharmony_ci		if (IS_ERR(type))
20778c2ecf20Sopenharmony_ci			return -ENODEV;
20788c2ecf20Sopenharmony_ci		if (strncmp(type, "cdrom", 5) == 0) {
20798c2ecf20Sopenharmony_ci			kfree(type);
20808c2ecf20Sopenharmony_ci			return -ENODEV;
20818c2ecf20Sopenharmony_ci		}
20828c2ecf20Sopenharmony_ci		kfree(type);
20838c2ecf20Sopenharmony_ci	}
20848c2ecf20Sopenharmony_ci	info = kzalloc(sizeof(*info), GFP_KERNEL);
20858c2ecf20Sopenharmony_ci	if (!info) {
20868c2ecf20Sopenharmony_ci		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
20878c2ecf20Sopenharmony_ci		return -ENOMEM;
20888c2ecf20Sopenharmony_ci	}
20898c2ecf20Sopenharmony_ci
20908c2ecf20Sopenharmony_ci	info->xbdev = dev;
20918c2ecf20Sopenharmony_ci
20928c2ecf20Sopenharmony_ci	mutex_init(&info->mutex);
20938c2ecf20Sopenharmony_ci	info->vdevice = vdevice;
20948c2ecf20Sopenharmony_ci	info->connected = BLKIF_STATE_DISCONNECTED;
20958c2ecf20Sopenharmony_ci
20968c2ecf20Sopenharmony_ci	/* Front end dir is a number, which is used as the id. */
20978c2ecf20Sopenharmony_ci	info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
20988c2ecf20Sopenharmony_ci	dev_set_drvdata(&dev->dev, info);
20998c2ecf20Sopenharmony_ci
21008c2ecf20Sopenharmony_ci	mutex_lock(&blkfront_mutex);
21018c2ecf20Sopenharmony_ci	list_add(&info->info_list, &info_list);
21028c2ecf20Sopenharmony_ci	mutex_unlock(&blkfront_mutex);
21038c2ecf20Sopenharmony_ci
21048c2ecf20Sopenharmony_ci	return 0;
21058c2ecf20Sopenharmony_ci}
21068c2ecf20Sopenharmony_ci
21078c2ecf20Sopenharmony_cistatic int blkif_recover(struct blkfront_info *info)
21088c2ecf20Sopenharmony_ci{
21098c2ecf20Sopenharmony_ci	unsigned int r_index;
21108c2ecf20Sopenharmony_ci	struct request *req, *n;
21118c2ecf20Sopenharmony_ci	int rc;
21128c2ecf20Sopenharmony_ci	struct bio *bio;
21138c2ecf20Sopenharmony_ci	unsigned int segs;
21148c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo;
21158c2ecf20Sopenharmony_ci
21168c2ecf20Sopenharmony_ci	blkfront_gather_backend_features(info);
21178c2ecf20Sopenharmony_ci	/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
21188c2ecf20Sopenharmony_ci	blkif_set_queue_limits(info);
21198c2ecf20Sopenharmony_ci	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
21208c2ecf20Sopenharmony_ci	blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
21218c2ecf20Sopenharmony_ci
21228c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, r_index) {
21238c2ecf20Sopenharmony_ci		rc = blkfront_setup_indirect(rinfo);
21248c2ecf20Sopenharmony_ci		if (rc)
21258c2ecf20Sopenharmony_ci			return rc;
21268c2ecf20Sopenharmony_ci	}
21278c2ecf20Sopenharmony_ci	xenbus_switch_state(info->xbdev, XenbusStateConnected);
21288c2ecf20Sopenharmony_ci
21298c2ecf20Sopenharmony_ci	/* Now safe for us to use the shared ring */
21308c2ecf20Sopenharmony_ci	info->connected = BLKIF_STATE_CONNECTED;
21318c2ecf20Sopenharmony_ci
21328c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, r_index) {
21338c2ecf20Sopenharmony_ci		/* Kick any other new requests queued since we resumed */
21348c2ecf20Sopenharmony_ci		kick_pending_request_queues(rinfo);
21358c2ecf20Sopenharmony_ci	}
21368c2ecf20Sopenharmony_ci
21378c2ecf20Sopenharmony_ci	list_for_each_entry_safe(req, n, &info->requests, queuelist) {
21388c2ecf20Sopenharmony_ci		/* Requeue pending requests (flush or discard) */
21398c2ecf20Sopenharmony_ci		list_del_init(&req->queuelist);
21408c2ecf20Sopenharmony_ci		BUG_ON(req->nr_phys_segments > segs);
21418c2ecf20Sopenharmony_ci		blk_mq_requeue_request(req, false);
21428c2ecf20Sopenharmony_ci	}
21438c2ecf20Sopenharmony_ci	blk_mq_start_stopped_hw_queues(info->rq, true);
21448c2ecf20Sopenharmony_ci	blk_mq_kick_requeue_list(info->rq);
21458c2ecf20Sopenharmony_ci
21468c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&info->bio_list)) != NULL) {
21478c2ecf20Sopenharmony_ci		/* Traverse the list of pending bios and re-queue them */
21488c2ecf20Sopenharmony_ci		submit_bio(bio);
21498c2ecf20Sopenharmony_ci	}
21508c2ecf20Sopenharmony_ci
21518c2ecf20Sopenharmony_ci	return 0;
21528c2ecf20Sopenharmony_ci}
21538c2ecf20Sopenharmony_ci
21548c2ecf20Sopenharmony_ci/**
21558c2ecf20Sopenharmony_ci * We are reconnecting to the backend, due to a suspend/resume, or a backend
21568c2ecf20Sopenharmony_ci * driver restart.  We tear down our blkif structure and recreate it, but
21578c2ecf20Sopenharmony_ci * leave the device-layer structures intact so that this is transparent to the
21588c2ecf20Sopenharmony_ci * rest of the kernel.
21598c2ecf20Sopenharmony_ci */
21608c2ecf20Sopenharmony_cistatic int blkfront_resume(struct xenbus_device *dev)
21618c2ecf20Sopenharmony_ci{
21628c2ecf20Sopenharmony_ci	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
21638c2ecf20Sopenharmony_ci	int err = 0;
21648c2ecf20Sopenharmony_ci	unsigned int i, j;
21658c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo;
21668c2ecf20Sopenharmony_ci
21678c2ecf20Sopenharmony_ci	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
21688c2ecf20Sopenharmony_ci
21698c2ecf20Sopenharmony_ci	bio_list_init(&info->bio_list);
21708c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&info->requests);
21718c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, i) {
21728c2ecf20Sopenharmony_ci		struct bio_list merge_bio;
21738c2ecf20Sopenharmony_ci		struct blk_shadow *shadow = rinfo->shadow;
21748c2ecf20Sopenharmony_ci
21758c2ecf20Sopenharmony_ci		for (j = 0; j < BLK_RING_SIZE(info); j++) {
21768c2ecf20Sopenharmony_ci			/* Not in use? */
21778c2ecf20Sopenharmony_ci			if (!shadow[j].request)
21788c2ecf20Sopenharmony_ci				continue;
21798c2ecf20Sopenharmony_ci
21808c2ecf20Sopenharmony_ci			/*
21818c2ecf20Sopenharmony_ci			 * Get the bios in the request so we can re-queue them.
21828c2ecf20Sopenharmony_ci			 */
21838c2ecf20Sopenharmony_ci			if (req_op(shadow[j].request) == REQ_OP_FLUSH ||
21848c2ecf20Sopenharmony_ci			    req_op(shadow[j].request) == REQ_OP_DISCARD ||
21858c2ecf20Sopenharmony_ci			    req_op(shadow[j].request) == REQ_OP_SECURE_ERASE ||
21868c2ecf20Sopenharmony_ci			    shadow[j].request->cmd_flags & REQ_FUA) {
21878c2ecf20Sopenharmony_ci				/*
21888c2ecf20Sopenharmony_ci				 * Flush operations don't contain bios, so
21898c2ecf20Sopenharmony_ci				 * we need to requeue the whole request
21908c2ecf20Sopenharmony_ci				 *
21918c2ecf20Sopenharmony_ci				 * XXX: but this doesn't make any sense for a
21928c2ecf20Sopenharmony_ci				 * write with the FUA flag set..
21938c2ecf20Sopenharmony_ci				 */
21948c2ecf20Sopenharmony_ci				list_add(&shadow[j].request->queuelist, &info->requests);
21958c2ecf20Sopenharmony_ci				continue;
21968c2ecf20Sopenharmony_ci			}
21978c2ecf20Sopenharmony_ci			merge_bio.head = shadow[j].request->bio;
21988c2ecf20Sopenharmony_ci			merge_bio.tail = shadow[j].request->biotail;
21998c2ecf20Sopenharmony_ci			bio_list_merge(&info->bio_list, &merge_bio);
22008c2ecf20Sopenharmony_ci			shadow[j].request->bio = NULL;
22018c2ecf20Sopenharmony_ci			blk_mq_end_request(shadow[j].request, BLK_STS_OK);
22028c2ecf20Sopenharmony_ci		}
22038c2ecf20Sopenharmony_ci	}
22048c2ecf20Sopenharmony_ci
22058c2ecf20Sopenharmony_ci	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
22068c2ecf20Sopenharmony_ci
22078c2ecf20Sopenharmony_ci	err = talk_to_blkback(dev, info);
22088c2ecf20Sopenharmony_ci	if (!err)
22098c2ecf20Sopenharmony_ci		blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings);
22108c2ecf20Sopenharmony_ci
22118c2ecf20Sopenharmony_ci	/*
22128c2ecf20Sopenharmony_ci	 * We have to wait for the backend to switch to
22138c2ecf20Sopenharmony_ci	 * connected state, since we want to read which
22148c2ecf20Sopenharmony_ci	 * features it supports.
22158c2ecf20Sopenharmony_ci	 */
22168c2ecf20Sopenharmony_ci
22178c2ecf20Sopenharmony_ci	return err;
22188c2ecf20Sopenharmony_ci}
22198c2ecf20Sopenharmony_ci
22208c2ecf20Sopenharmony_cistatic void blkfront_closing(struct blkfront_info *info)
22218c2ecf20Sopenharmony_ci{
22228c2ecf20Sopenharmony_ci	struct xenbus_device *xbdev = info->xbdev;
22238c2ecf20Sopenharmony_ci	struct block_device *bdev = NULL;
22248c2ecf20Sopenharmony_ci
22258c2ecf20Sopenharmony_ci	mutex_lock(&info->mutex);
22268c2ecf20Sopenharmony_ci
22278c2ecf20Sopenharmony_ci	if (xbdev->state == XenbusStateClosing) {
22288c2ecf20Sopenharmony_ci		mutex_unlock(&info->mutex);
22298c2ecf20Sopenharmony_ci		return;
22308c2ecf20Sopenharmony_ci	}
22318c2ecf20Sopenharmony_ci
22328c2ecf20Sopenharmony_ci	if (info->gd)
22338c2ecf20Sopenharmony_ci		bdev = bdget_disk(info->gd, 0);
22348c2ecf20Sopenharmony_ci
22358c2ecf20Sopenharmony_ci	mutex_unlock(&info->mutex);
22368c2ecf20Sopenharmony_ci
22378c2ecf20Sopenharmony_ci	if (!bdev) {
22388c2ecf20Sopenharmony_ci		xenbus_frontend_closed(xbdev);
22398c2ecf20Sopenharmony_ci		return;
22408c2ecf20Sopenharmony_ci	}
22418c2ecf20Sopenharmony_ci
22428c2ecf20Sopenharmony_ci	mutex_lock(&bdev->bd_mutex);
22438c2ecf20Sopenharmony_ci
22448c2ecf20Sopenharmony_ci	if (bdev->bd_openers) {
22458c2ecf20Sopenharmony_ci		xenbus_dev_error(xbdev, -EBUSY,
22468c2ecf20Sopenharmony_ci				 "Device in use; refusing to close");
22478c2ecf20Sopenharmony_ci		xenbus_switch_state(xbdev, XenbusStateClosing);
22488c2ecf20Sopenharmony_ci	} else {
22498c2ecf20Sopenharmony_ci		xlvbd_release_gendisk(info);
22508c2ecf20Sopenharmony_ci		xenbus_frontend_closed(xbdev);
22518c2ecf20Sopenharmony_ci	}
22528c2ecf20Sopenharmony_ci
22538c2ecf20Sopenharmony_ci	mutex_unlock(&bdev->bd_mutex);
22548c2ecf20Sopenharmony_ci	bdput(bdev);
22558c2ecf20Sopenharmony_ci}
22568c2ecf20Sopenharmony_ci
22578c2ecf20Sopenharmony_cistatic void blkfront_setup_discard(struct blkfront_info *info)
22588c2ecf20Sopenharmony_ci{
22598c2ecf20Sopenharmony_ci	info->feature_discard = 1;
22608c2ecf20Sopenharmony_ci	info->discard_granularity = xenbus_read_unsigned(info->xbdev->otherend,
22618c2ecf20Sopenharmony_ci							 "discard-granularity",
22628c2ecf20Sopenharmony_ci							 0);
22638c2ecf20Sopenharmony_ci	info->discard_alignment = xenbus_read_unsigned(info->xbdev->otherend,
22648c2ecf20Sopenharmony_ci						       "discard-alignment", 0);
22658c2ecf20Sopenharmony_ci	info->feature_secdiscard =
22668c2ecf20Sopenharmony_ci		!!xenbus_read_unsigned(info->xbdev->otherend, "discard-secure",
22678c2ecf20Sopenharmony_ci				       0);
22688c2ecf20Sopenharmony_ci}
22698c2ecf20Sopenharmony_ci
22708c2ecf20Sopenharmony_cistatic int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
22718c2ecf20Sopenharmony_ci{
22728c2ecf20Sopenharmony_ci	unsigned int psegs, grants, memflags;
22738c2ecf20Sopenharmony_ci	int err, i;
22748c2ecf20Sopenharmony_ci	struct blkfront_info *info = rinfo->dev_info;
22758c2ecf20Sopenharmony_ci
22768c2ecf20Sopenharmony_ci	memflags = memalloc_noio_save();
22778c2ecf20Sopenharmony_ci
22788c2ecf20Sopenharmony_ci	if (info->max_indirect_segments == 0) {
22798c2ecf20Sopenharmony_ci		if (!HAS_EXTRA_REQ)
22808c2ecf20Sopenharmony_ci			grants = BLKIF_MAX_SEGMENTS_PER_REQUEST;
22818c2ecf20Sopenharmony_ci		else {
22828c2ecf20Sopenharmony_ci			/*
22838c2ecf20Sopenharmony_ci			 * When an extra req is required, the maximum
22848c2ecf20Sopenharmony_ci			 * grants supported is related to the size of the
22858c2ecf20Sopenharmony_ci			 * Linux block segment.
22868c2ecf20Sopenharmony_ci			 */
22878c2ecf20Sopenharmony_ci			grants = GRANTS_PER_PSEG;
22888c2ecf20Sopenharmony_ci		}
22898c2ecf20Sopenharmony_ci	}
22908c2ecf20Sopenharmony_ci	else
22918c2ecf20Sopenharmony_ci		grants = info->max_indirect_segments;
22928c2ecf20Sopenharmony_ci	psegs = DIV_ROUND_UP(grants, GRANTS_PER_PSEG);
22938c2ecf20Sopenharmony_ci
22948c2ecf20Sopenharmony_ci	err = fill_grant_buffer(rinfo,
22958c2ecf20Sopenharmony_ci				(grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info));
22968c2ecf20Sopenharmony_ci	if (err)
22978c2ecf20Sopenharmony_ci		goto out_of_memory;
22988c2ecf20Sopenharmony_ci
22998c2ecf20Sopenharmony_ci	if (!info->bounce && info->max_indirect_segments) {
23008c2ecf20Sopenharmony_ci		/*
23018c2ecf20Sopenharmony_ci		 * We are using indirect descriptors but don't have a bounce
23028c2ecf20Sopenharmony_ci		 * buffer, we need to allocate a set of pages that can be
23038c2ecf20Sopenharmony_ci		 * used for mapping indirect grefs
23048c2ecf20Sopenharmony_ci		 */
23058c2ecf20Sopenharmony_ci		int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
23068c2ecf20Sopenharmony_ci
23078c2ecf20Sopenharmony_ci		BUG_ON(!list_empty(&rinfo->indirect_pages));
23088c2ecf20Sopenharmony_ci		for (i = 0; i < num; i++) {
23098c2ecf20Sopenharmony_ci			struct page *indirect_page = alloc_page(GFP_KERNEL |
23108c2ecf20Sopenharmony_ci			                                        __GFP_ZERO);
23118c2ecf20Sopenharmony_ci			if (!indirect_page)
23128c2ecf20Sopenharmony_ci				goto out_of_memory;
23138c2ecf20Sopenharmony_ci			list_add(&indirect_page->lru, &rinfo->indirect_pages);
23148c2ecf20Sopenharmony_ci		}
23158c2ecf20Sopenharmony_ci	}
23168c2ecf20Sopenharmony_ci
23178c2ecf20Sopenharmony_ci	for (i = 0; i < BLK_RING_SIZE(info); i++) {
23188c2ecf20Sopenharmony_ci		rinfo->shadow[i].grants_used =
23198c2ecf20Sopenharmony_ci			kvcalloc(grants,
23208c2ecf20Sopenharmony_ci				 sizeof(rinfo->shadow[i].grants_used[0]),
23218c2ecf20Sopenharmony_ci				 GFP_KERNEL);
23228c2ecf20Sopenharmony_ci		rinfo->shadow[i].sg = kvcalloc(psegs,
23238c2ecf20Sopenharmony_ci					       sizeof(rinfo->shadow[i].sg[0]),
23248c2ecf20Sopenharmony_ci					       GFP_KERNEL);
23258c2ecf20Sopenharmony_ci		if (info->max_indirect_segments)
23268c2ecf20Sopenharmony_ci			rinfo->shadow[i].indirect_grants =
23278c2ecf20Sopenharmony_ci				kvcalloc(INDIRECT_GREFS(grants),
23288c2ecf20Sopenharmony_ci					 sizeof(rinfo->shadow[i].indirect_grants[0]),
23298c2ecf20Sopenharmony_ci					 GFP_KERNEL);
23308c2ecf20Sopenharmony_ci		if ((rinfo->shadow[i].grants_used == NULL) ||
23318c2ecf20Sopenharmony_ci			(rinfo->shadow[i].sg == NULL) ||
23328c2ecf20Sopenharmony_ci		     (info->max_indirect_segments &&
23338c2ecf20Sopenharmony_ci		     (rinfo->shadow[i].indirect_grants == NULL)))
23348c2ecf20Sopenharmony_ci			goto out_of_memory;
23358c2ecf20Sopenharmony_ci		sg_init_table(rinfo->shadow[i].sg, psegs);
23368c2ecf20Sopenharmony_ci	}
23378c2ecf20Sopenharmony_ci
23388c2ecf20Sopenharmony_ci	memalloc_noio_restore(memflags);
23398c2ecf20Sopenharmony_ci
23408c2ecf20Sopenharmony_ci	return 0;
23418c2ecf20Sopenharmony_ci
23428c2ecf20Sopenharmony_ciout_of_memory:
23438c2ecf20Sopenharmony_ci	for (i = 0; i < BLK_RING_SIZE(info); i++) {
23448c2ecf20Sopenharmony_ci		kvfree(rinfo->shadow[i].grants_used);
23458c2ecf20Sopenharmony_ci		rinfo->shadow[i].grants_used = NULL;
23468c2ecf20Sopenharmony_ci		kvfree(rinfo->shadow[i].sg);
23478c2ecf20Sopenharmony_ci		rinfo->shadow[i].sg = NULL;
23488c2ecf20Sopenharmony_ci		kvfree(rinfo->shadow[i].indirect_grants);
23498c2ecf20Sopenharmony_ci		rinfo->shadow[i].indirect_grants = NULL;
23508c2ecf20Sopenharmony_ci	}
23518c2ecf20Sopenharmony_ci	if (!list_empty(&rinfo->indirect_pages)) {
23528c2ecf20Sopenharmony_ci		struct page *indirect_page, *n;
23538c2ecf20Sopenharmony_ci		list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
23548c2ecf20Sopenharmony_ci			list_del(&indirect_page->lru);
23558c2ecf20Sopenharmony_ci			__free_page(indirect_page);
23568c2ecf20Sopenharmony_ci		}
23578c2ecf20Sopenharmony_ci	}
23588c2ecf20Sopenharmony_ci
23598c2ecf20Sopenharmony_ci	memalloc_noio_restore(memflags);
23608c2ecf20Sopenharmony_ci
23618c2ecf20Sopenharmony_ci	return -ENOMEM;
23628c2ecf20Sopenharmony_ci}
23638c2ecf20Sopenharmony_ci
23648c2ecf20Sopenharmony_ci/*
23658c2ecf20Sopenharmony_ci * Gather all backend feature-*
23668c2ecf20Sopenharmony_ci */
23678c2ecf20Sopenharmony_cistatic void blkfront_gather_backend_features(struct blkfront_info *info)
23688c2ecf20Sopenharmony_ci{
23698c2ecf20Sopenharmony_ci	unsigned int indirect_segments;
23708c2ecf20Sopenharmony_ci
23718c2ecf20Sopenharmony_ci	info->feature_flush = 0;
23728c2ecf20Sopenharmony_ci	info->feature_fua = 0;
23738c2ecf20Sopenharmony_ci
23748c2ecf20Sopenharmony_ci	/*
23758c2ecf20Sopenharmony_ci	 * If there's no "feature-barrier" defined, then it means
23768c2ecf20Sopenharmony_ci	 * we're dealing with a very old backend which writes
23778c2ecf20Sopenharmony_ci	 * synchronously; nothing to do.
23788c2ecf20Sopenharmony_ci	 *
23798c2ecf20Sopenharmony_ci	 * If there are barriers, then we use flush.
23808c2ecf20Sopenharmony_ci	 */
23818c2ecf20Sopenharmony_ci	if (xenbus_read_unsigned(info->xbdev->otherend, "feature-barrier", 0)) {
23828c2ecf20Sopenharmony_ci		info->feature_flush = 1;
23838c2ecf20Sopenharmony_ci		info->feature_fua = 1;
23848c2ecf20Sopenharmony_ci	}
23858c2ecf20Sopenharmony_ci
23868c2ecf20Sopenharmony_ci	/*
23878c2ecf20Sopenharmony_ci	 * And if there is "feature-flush-cache" use that above
23888c2ecf20Sopenharmony_ci	 * barriers.
23898c2ecf20Sopenharmony_ci	 */
23908c2ecf20Sopenharmony_ci	if (xenbus_read_unsigned(info->xbdev->otherend, "feature-flush-cache",
23918c2ecf20Sopenharmony_ci				 0)) {
23928c2ecf20Sopenharmony_ci		info->feature_flush = 1;
23938c2ecf20Sopenharmony_ci		info->feature_fua = 0;
23948c2ecf20Sopenharmony_ci	}
23958c2ecf20Sopenharmony_ci
23968c2ecf20Sopenharmony_ci	if (xenbus_read_unsigned(info->xbdev->otherend, "feature-discard", 0))
23978c2ecf20Sopenharmony_ci		blkfront_setup_discard(info);
23988c2ecf20Sopenharmony_ci
23998c2ecf20Sopenharmony_ci	if (info->feature_persistent_parm)
24008c2ecf20Sopenharmony_ci		info->feature_persistent =
24018c2ecf20Sopenharmony_ci			!!xenbus_read_unsigned(info->xbdev->otherend,
24028c2ecf20Sopenharmony_ci					       "feature-persistent", 0);
24038c2ecf20Sopenharmony_ci	if (info->feature_persistent)
24048c2ecf20Sopenharmony_ci		info->bounce = true;
24058c2ecf20Sopenharmony_ci
24068c2ecf20Sopenharmony_ci	indirect_segments = xenbus_read_unsigned(info->xbdev->otherend,
24078c2ecf20Sopenharmony_ci					"feature-max-indirect-segments", 0);
24088c2ecf20Sopenharmony_ci	if (indirect_segments > xen_blkif_max_segments)
24098c2ecf20Sopenharmony_ci		indirect_segments = xen_blkif_max_segments;
24108c2ecf20Sopenharmony_ci	if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST)
24118c2ecf20Sopenharmony_ci		indirect_segments = 0;
24128c2ecf20Sopenharmony_ci	info->max_indirect_segments = indirect_segments;
24138c2ecf20Sopenharmony_ci
24148c2ecf20Sopenharmony_ci	if (info->feature_persistent) {
24158c2ecf20Sopenharmony_ci		mutex_lock(&blkfront_mutex);
24168c2ecf20Sopenharmony_ci		schedule_delayed_work(&blkfront_work, HZ * 10);
24178c2ecf20Sopenharmony_ci		mutex_unlock(&blkfront_mutex);
24188c2ecf20Sopenharmony_ci	}
24198c2ecf20Sopenharmony_ci}
24208c2ecf20Sopenharmony_ci
24218c2ecf20Sopenharmony_ci/*
24228c2ecf20Sopenharmony_ci * Invoked when the backend is finally 'ready' (and has told produced
24238c2ecf20Sopenharmony_ci * the details about the physical device - #sectors, size, etc).
24248c2ecf20Sopenharmony_ci */
24258c2ecf20Sopenharmony_cistatic void blkfront_connect(struct blkfront_info *info)
24268c2ecf20Sopenharmony_ci{
24278c2ecf20Sopenharmony_ci	unsigned long long sectors;
24288c2ecf20Sopenharmony_ci	unsigned long sector_size;
24298c2ecf20Sopenharmony_ci	unsigned int physical_sector_size;
24308c2ecf20Sopenharmony_ci	unsigned int binfo;
24318c2ecf20Sopenharmony_ci	int err, i;
24328c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo;
24338c2ecf20Sopenharmony_ci
24348c2ecf20Sopenharmony_ci	switch (info->connected) {
24358c2ecf20Sopenharmony_ci	case BLKIF_STATE_CONNECTED:
24368c2ecf20Sopenharmony_ci		/*
24378c2ecf20Sopenharmony_ci		 * Potentially, the back-end may be signalling
24388c2ecf20Sopenharmony_ci		 * a capacity change; update the capacity.
24398c2ecf20Sopenharmony_ci		 */
24408c2ecf20Sopenharmony_ci		err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
24418c2ecf20Sopenharmony_ci				   "sectors", "%Lu", &sectors);
24428c2ecf20Sopenharmony_ci		if (XENBUS_EXIST_ERR(err))
24438c2ecf20Sopenharmony_ci			return;
24448c2ecf20Sopenharmony_ci		printk(KERN_INFO "Setting capacity to %Lu\n",
24458c2ecf20Sopenharmony_ci		       sectors);
24468c2ecf20Sopenharmony_ci		set_capacity_revalidate_and_notify(info->gd, sectors, true);
24478c2ecf20Sopenharmony_ci
24488c2ecf20Sopenharmony_ci		return;
24498c2ecf20Sopenharmony_ci	case BLKIF_STATE_SUSPENDED:
24508c2ecf20Sopenharmony_ci		/*
24518c2ecf20Sopenharmony_ci		 * If we are recovering from suspension, we need to wait
24528c2ecf20Sopenharmony_ci		 * for the backend to announce it's features before
24538c2ecf20Sopenharmony_ci		 * reconnecting, at least we need to know if the backend
24548c2ecf20Sopenharmony_ci		 * supports indirect descriptors, and how many.
24558c2ecf20Sopenharmony_ci		 */
24568c2ecf20Sopenharmony_ci		blkif_recover(info);
24578c2ecf20Sopenharmony_ci		return;
24588c2ecf20Sopenharmony_ci
24598c2ecf20Sopenharmony_ci	default:
24608c2ecf20Sopenharmony_ci		break;
24618c2ecf20Sopenharmony_ci	}
24628c2ecf20Sopenharmony_ci
24638c2ecf20Sopenharmony_ci	dev_dbg(&info->xbdev->dev, "%s:%s.\n",
24648c2ecf20Sopenharmony_ci		__func__, info->xbdev->otherend);
24658c2ecf20Sopenharmony_ci
24668c2ecf20Sopenharmony_ci	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
24678c2ecf20Sopenharmony_ci			    "sectors", "%llu", &sectors,
24688c2ecf20Sopenharmony_ci			    "info", "%u", &binfo,
24698c2ecf20Sopenharmony_ci			    "sector-size", "%lu", &sector_size,
24708c2ecf20Sopenharmony_ci			    NULL);
24718c2ecf20Sopenharmony_ci	if (err) {
24728c2ecf20Sopenharmony_ci		xenbus_dev_fatal(info->xbdev, err,
24738c2ecf20Sopenharmony_ci				 "reading backend fields at %s",
24748c2ecf20Sopenharmony_ci				 info->xbdev->otherend);
24758c2ecf20Sopenharmony_ci		return;
24768c2ecf20Sopenharmony_ci	}
24778c2ecf20Sopenharmony_ci
24788c2ecf20Sopenharmony_ci	/*
24798c2ecf20Sopenharmony_ci	 * physcial-sector-size is a newer field, so old backends may not
24808c2ecf20Sopenharmony_ci	 * provide this. Assume physical sector size to be the same as
24818c2ecf20Sopenharmony_ci	 * sector_size in that case.
24828c2ecf20Sopenharmony_ci	 */
24838c2ecf20Sopenharmony_ci	physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
24848c2ecf20Sopenharmony_ci						    "physical-sector-size",
24858c2ecf20Sopenharmony_ci						    sector_size);
24868c2ecf20Sopenharmony_ci	blkfront_gather_backend_features(info);
24878c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, i) {
24888c2ecf20Sopenharmony_ci		err = blkfront_setup_indirect(rinfo);
24898c2ecf20Sopenharmony_ci		if (err) {
24908c2ecf20Sopenharmony_ci			xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
24918c2ecf20Sopenharmony_ci					 info->xbdev->otherend);
24928c2ecf20Sopenharmony_ci			blkif_free(info, 0);
24938c2ecf20Sopenharmony_ci			break;
24948c2ecf20Sopenharmony_ci		}
24958c2ecf20Sopenharmony_ci	}
24968c2ecf20Sopenharmony_ci
24978c2ecf20Sopenharmony_ci	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
24988c2ecf20Sopenharmony_ci				  physical_sector_size);
24998c2ecf20Sopenharmony_ci	if (err) {
25008c2ecf20Sopenharmony_ci		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
25018c2ecf20Sopenharmony_ci				 info->xbdev->otherend);
25028c2ecf20Sopenharmony_ci		goto fail;
25038c2ecf20Sopenharmony_ci	}
25048c2ecf20Sopenharmony_ci
25058c2ecf20Sopenharmony_ci	xenbus_switch_state(info->xbdev, XenbusStateConnected);
25068c2ecf20Sopenharmony_ci
25078c2ecf20Sopenharmony_ci	/* Kick pending requests. */
25088c2ecf20Sopenharmony_ci	info->connected = BLKIF_STATE_CONNECTED;
25098c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, i)
25108c2ecf20Sopenharmony_ci		kick_pending_request_queues(rinfo);
25118c2ecf20Sopenharmony_ci
25128c2ecf20Sopenharmony_ci	device_add_disk(&info->xbdev->dev, info->gd, NULL);
25138c2ecf20Sopenharmony_ci
25148c2ecf20Sopenharmony_ci	info->is_ready = 1;
25158c2ecf20Sopenharmony_ci	return;
25168c2ecf20Sopenharmony_ci
25178c2ecf20Sopenharmony_cifail:
25188c2ecf20Sopenharmony_ci	blkif_free(info, 0);
25198c2ecf20Sopenharmony_ci	return;
25208c2ecf20Sopenharmony_ci}
25218c2ecf20Sopenharmony_ci
25228c2ecf20Sopenharmony_ci/**
25238c2ecf20Sopenharmony_ci * Callback received when the backend's state changes.
25248c2ecf20Sopenharmony_ci */
25258c2ecf20Sopenharmony_cistatic void blkback_changed(struct xenbus_device *dev,
25268c2ecf20Sopenharmony_ci			    enum xenbus_state backend_state)
25278c2ecf20Sopenharmony_ci{
25288c2ecf20Sopenharmony_ci	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
25298c2ecf20Sopenharmony_ci
25308c2ecf20Sopenharmony_ci	dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
25318c2ecf20Sopenharmony_ci
25328c2ecf20Sopenharmony_ci	switch (backend_state) {
25338c2ecf20Sopenharmony_ci	case XenbusStateInitWait:
25348c2ecf20Sopenharmony_ci		if (dev->state != XenbusStateInitialising)
25358c2ecf20Sopenharmony_ci			break;
25368c2ecf20Sopenharmony_ci		if (talk_to_blkback(dev, info))
25378c2ecf20Sopenharmony_ci			break;
25388c2ecf20Sopenharmony_ci	case XenbusStateInitialising:
25398c2ecf20Sopenharmony_ci	case XenbusStateInitialised:
25408c2ecf20Sopenharmony_ci	case XenbusStateReconfiguring:
25418c2ecf20Sopenharmony_ci	case XenbusStateReconfigured:
25428c2ecf20Sopenharmony_ci	case XenbusStateUnknown:
25438c2ecf20Sopenharmony_ci		break;
25448c2ecf20Sopenharmony_ci
25458c2ecf20Sopenharmony_ci	case XenbusStateConnected:
25468c2ecf20Sopenharmony_ci		/*
25478c2ecf20Sopenharmony_ci		 * talk_to_blkback sets state to XenbusStateInitialised
25488c2ecf20Sopenharmony_ci		 * and blkfront_connect sets it to XenbusStateConnected
25498c2ecf20Sopenharmony_ci		 * (if connection went OK).
25508c2ecf20Sopenharmony_ci		 *
25518c2ecf20Sopenharmony_ci		 * If the backend (or toolstack) decides to poke at backend
25528c2ecf20Sopenharmony_ci		 * state (and re-trigger the watch by setting the state repeatedly
25538c2ecf20Sopenharmony_ci		 * to XenbusStateConnected (4)) we need to deal with this.
25548c2ecf20Sopenharmony_ci		 * This is allowed as this is used to communicate to the guest
25558c2ecf20Sopenharmony_ci		 * that the size of disk has changed!
25568c2ecf20Sopenharmony_ci		 */
25578c2ecf20Sopenharmony_ci		if ((dev->state != XenbusStateInitialised) &&
25588c2ecf20Sopenharmony_ci		    (dev->state != XenbusStateConnected)) {
25598c2ecf20Sopenharmony_ci			if (talk_to_blkback(dev, info))
25608c2ecf20Sopenharmony_ci				break;
25618c2ecf20Sopenharmony_ci		}
25628c2ecf20Sopenharmony_ci
25638c2ecf20Sopenharmony_ci		blkfront_connect(info);
25648c2ecf20Sopenharmony_ci		break;
25658c2ecf20Sopenharmony_ci
25668c2ecf20Sopenharmony_ci	case XenbusStateClosed:
25678c2ecf20Sopenharmony_ci		if (dev->state == XenbusStateClosed)
25688c2ecf20Sopenharmony_ci			break;
25698c2ecf20Sopenharmony_ci		fallthrough;
25708c2ecf20Sopenharmony_ci	case XenbusStateClosing:
25718c2ecf20Sopenharmony_ci		if (info)
25728c2ecf20Sopenharmony_ci			blkfront_closing(info);
25738c2ecf20Sopenharmony_ci		break;
25748c2ecf20Sopenharmony_ci	}
25758c2ecf20Sopenharmony_ci}
25768c2ecf20Sopenharmony_ci
25778c2ecf20Sopenharmony_cistatic int blkfront_remove(struct xenbus_device *xbdev)
25788c2ecf20Sopenharmony_ci{
25798c2ecf20Sopenharmony_ci	struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
25808c2ecf20Sopenharmony_ci	struct block_device *bdev = NULL;
25818c2ecf20Sopenharmony_ci	struct gendisk *disk;
25828c2ecf20Sopenharmony_ci
25838c2ecf20Sopenharmony_ci	dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
25848c2ecf20Sopenharmony_ci
25858c2ecf20Sopenharmony_ci	if (!info)
25868c2ecf20Sopenharmony_ci		return 0;
25878c2ecf20Sopenharmony_ci
25888c2ecf20Sopenharmony_ci	blkif_free(info, 0);
25898c2ecf20Sopenharmony_ci
25908c2ecf20Sopenharmony_ci	mutex_lock(&info->mutex);
25918c2ecf20Sopenharmony_ci
25928c2ecf20Sopenharmony_ci	disk = info->gd;
25938c2ecf20Sopenharmony_ci	if (disk)
25948c2ecf20Sopenharmony_ci		bdev = bdget_disk(disk, 0);
25958c2ecf20Sopenharmony_ci
25968c2ecf20Sopenharmony_ci	info->xbdev = NULL;
25978c2ecf20Sopenharmony_ci	mutex_unlock(&info->mutex);
25988c2ecf20Sopenharmony_ci
25998c2ecf20Sopenharmony_ci	if (!bdev) {
26008c2ecf20Sopenharmony_ci		mutex_lock(&blkfront_mutex);
26018c2ecf20Sopenharmony_ci		free_info(info);
26028c2ecf20Sopenharmony_ci		mutex_unlock(&blkfront_mutex);
26038c2ecf20Sopenharmony_ci		return 0;
26048c2ecf20Sopenharmony_ci	}
26058c2ecf20Sopenharmony_ci
26068c2ecf20Sopenharmony_ci	/*
26078c2ecf20Sopenharmony_ci	 * The xbdev was removed before we reached the Closed
26088c2ecf20Sopenharmony_ci	 * state. See if it's safe to remove the disk. If the bdev
26098c2ecf20Sopenharmony_ci	 * isn't closed yet, we let release take care of it.
26108c2ecf20Sopenharmony_ci	 */
26118c2ecf20Sopenharmony_ci
26128c2ecf20Sopenharmony_ci	mutex_lock(&bdev->bd_mutex);
26138c2ecf20Sopenharmony_ci	info = disk->private_data;
26148c2ecf20Sopenharmony_ci
26158c2ecf20Sopenharmony_ci	dev_warn(disk_to_dev(disk),
26168c2ecf20Sopenharmony_ci		 "%s was hot-unplugged, %d stale handles\n",
26178c2ecf20Sopenharmony_ci		 xbdev->nodename, bdev->bd_openers);
26188c2ecf20Sopenharmony_ci
26198c2ecf20Sopenharmony_ci	if (info && !bdev->bd_openers) {
26208c2ecf20Sopenharmony_ci		xlvbd_release_gendisk(info);
26218c2ecf20Sopenharmony_ci		disk->private_data = NULL;
26228c2ecf20Sopenharmony_ci		mutex_lock(&blkfront_mutex);
26238c2ecf20Sopenharmony_ci		free_info(info);
26248c2ecf20Sopenharmony_ci		mutex_unlock(&blkfront_mutex);
26258c2ecf20Sopenharmony_ci	}
26268c2ecf20Sopenharmony_ci
26278c2ecf20Sopenharmony_ci	mutex_unlock(&bdev->bd_mutex);
26288c2ecf20Sopenharmony_ci	bdput(bdev);
26298c2ecf20Sopenharmony_ci
26308c2ecf20Sopenharmony_ci	return 0;
26318c2ecf20Sopenharmony_ci}
26328c2ecf20Sopenharmony_ci
26338c2ecf20Sopenharmony_cistatic int blkfront_is_ready(struct xenbus_device *dev)
26348c2ecf20Sopenharmony_ci{
26358c2ecf20Sopenharmony_ci	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
26368c2ecf20Sopenharmony_ci
26378c2ecf20Sopenharmony_ci	return info->is_ready && info->xbdev;
26388c2ecf20Sopenharmony_ci}
26398c2ecf20Sopenharmony_ci
26408c2ecf20Sopenharmony_cistatic int blkif_open(struct block_device *bdev, fmode_t mode)
26418c2ecf20Sopenharmony_ci{
26428c2ecf20Sopenharmony_ci	struct gendisk *disk = bdev->bd_disk;
26438c2ecf20Sopenharmony_ci	struct blkfront_info *info;
26448c2ecf20Sopenharmony_ci	int err = 0;
26458c2ecf20Sopenharmony_ci
26468c2ecf20Sopenharmony_ci	mutex_lock(&blkfront_mutex);
26478c2ecf20Sopenharmony_ci
26488c2ecf20Sopenharmony_ci	info = disk->private_data;
26498c2ecf20Sopenharmony_ci	if (!info) {
26508c2ecf20Sopenharmony_ci		/* xbdev gone */
26518c2ecf20Sopenharmony_ci		err = -ERESTARTSYS;
26528c2ecf20Sopenharmony_ci		goto out;
26538c2ecf20Sopenharmony_ci	}
26548c2ecf20Sopenharmony_ci
26558c2ecf20Sopenharmony_ci	mutex_lock(&info->mutex);
26568c2ecf20Sopenharmony_ci
26578c2ecf20Sopenharmony_ci	if (!info->gd)
26588c2ecf20Sopenharmony_ci		/* xbdev is closed */
26598c2ecf20Sopenharmony_ci		err = -ERESTARTSYS;
26608c2ecf20Sopenharmony_ci
26618c2ecf20Sopenharmony_ci	mutex_unlock(&info->mutex);
26628c2ecf20Sopenharmony_ci
26638c2ecf20Sopenharmony_ciout:
26648c2ecf20Sopenharmony_ci	mutex_unlock(&blkfront_mutex);
26658c2ecf20Sopenharmony_ci	return err;
26668c2ecf20Sopenharmony_ci}
26678c2ecf20Sopenharmony_ci
26688c2ecf20Sopenharmony_cistatic void blkif_release(struct gendisk *disk, fmode_t mode)
26698c2ecf20Sopenharmony_ci{
26708c2ecf20Sopenharmony_ci	struct blkfront_info *info = disk->private_data;
26718c2ecf20Sopenharmony_ci	struct block_device *bdev;
26728c2ecf20Sopenharmony_ci	struct xenbus_device *xbdev;
26738c2ecf20Sopenharmony_ci
26748c2ecf20Sopenharmony_ci	mutex_lock(&blkfront_mutex);
26758c2ecf20Sopenharmony_ci
26768c2ecf20Sopenharmony_ci	bdev = bdget_disk(disk, 0);
26778c2ecf20Sopenharmony_ci
26788c2ecf20Sopenharmony_ci	if (!bdev) {
26798c2ecf20Sopenharmony_ci		WARN(1, "Block device %s yanked out from us!\n", disk->disk_name);
26808c2ecf20Sopenharmony_ci		goto out_mutex;
26818c2ecf20Sopenharmony_ci	}
26828c2ecf20Sopenharmony_ci	if (bdev->bd_openers)
26838c2ecf20Sopenharmony_ci		goto out;
26848c2ecf20Sopenharmony_ci
26858c2ecf20Sopenharmony_ci	/*
26868c2ecf20Sopenharmony_ci	 * Check if we have been instructed to close. We will have
26878c2ecf20Sopenharmony_ci	 * deferred this request, because the bdev was still open.
26888c2ecf20Sopenharmony_ci	 */
26898c2ecf20Sopenharmony_ci
26908c2ecf20Sopenharmony_ci	mutex_lock(&info->mutex);
26918c2ecf20Sopenharmony_ci	xbdev = info->xbdev;
26928c2ecf20Sopenharmony_ci
26938c2ecf20Sopenharmony_ci	if (xbdev && xbdev->state == XenbusStateClosing) {
26948c2ecf20Sopenharmony_ci		/* pending switch to state closed */
26958c2ecf20Sopenharmony_ci		dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
26968c2ecf20Sopenharmony_ci		xlvbd_release_gendisk(info);
26978c2ecf20Sopenharmony_ci		xenbus_frontend_closed(info->xbdev);
26988c2ecf20Sopenharmony_ci 	}
26998c2ecf20Sopenharmony_ci
27008c2ecf20Sopenharmony_ci	mutex_unlock(&info->mutex);
27018c2ecf20Sopenharmony_ci
27028c2ecf20Sopenharmony_ci	if (!xbdev) {
27038c2ecf20Sopenharmony_ci		/* sudden device removal */
27048c2ecf20Sopenharmony_ci		dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
27058c2ecf20Sopenharmony_ci		xlvbd_release_gendisk(info);
27068c2ecf20Sopenharmony_ci		disk->private_data = NULL;
27078c2ecf20Sopenharmony_ci		free_info(info);
27088c2ecf20Sopenharmony_ci	}
27098c2ecf20Sopenharmony_ci
27108c2ecf20Sopenharmony_ciout:
27118c2ecf20Sopenharmony_ci	bdput(bdev);
27128c2ecf20Sopenharmony_ciout_mutex:
27138c2ecf20Sopenharmony_ci	mutex_unlock(&blkfront_mutex);
27148c2ecf20Sopenharmony_ci}
27158c2ecf20Sopenharmony_ci
27168c2ecf20Sopenharmony_cistatic const struct block_device_operations xlvbd_block_fops =
27178c2ecf20Sopenharmony_ci{
27188c2ecf20Sopenharmony_ci	.owner = THIS_MODULE,
27198c2ecf20Sopenharmony_ci	.open = blkif_open,
27208c2ecf20Sopenharmony_ci	.release = blkif_release,
27218c2ecf20Sopenharmony_ci	.getgeo = blkif_getgeo,
27228c2ecf20Sopenharmony_ci	.ioctl = blkif_ioctl,
27238c2ecf20Sopenharmony_ci	.compat_ioctl = blkdev_compat_ptr_ioctl,
27248c2ecf20Sopenharmony_ci};
27258c2ecf20Sopenharmony_ci
27268c2ecf20Sopenharmony_ci
27278c2ecf20Sopenharmony_cistatic const struct xenbus_device_id blkfront_ids[] = {
27288c2ecf20Sopenharmony_ci	{ "vbd" },
27298c2ecf20Sopenharmony_ci	{ "" }
27308c2ecf20Sopenharmony_ci};
27318c2ecf20Sopenharmony_ci
27328c2ecf20Sopenharmony_cistatic struct xenbus_driver blkfront_driver = {
27338c2ecf20Sopenharmony_ci	.ids  = blkfront_ids,
27348c2ecf20Sopenharmony_ci	.probe = blkfront_probe,
27358c2ecf20Sopenharmony_ci	.remove = blkfront_remove,
27368c2ecf20Sopenharmony_ci	.resume = blkfront_resume,
27378c2ecf20Sopenharmony_ci	.otherend_changed = blkback_changed,
27388c2ecf20Sopenharmony_ci	.is_ready = blkfront_is_ready,
27398c2ecf20Sopenharmony_ci};
27408c2ecf20Sopenharmony_ci
27418c2ecf20Sopenharmony_cistatic void purge_persistent_grants(struct blkfront_info *info)
27428c2ecf20Sopenharmony_ci{
27438c2ecf20Sopenharmony_ci	unsigned int i;
27448c2ecf20Sopenharmony_ci	unsigned long flags;
27458c2ecf20Sopenharmony_ci	struct blkfront_ring_info *rinfo;
27468c2ecf20Sopenharmony_ci
27478c2ecf20Sopenharmony_ci	for_each_rinfo(info, rinfo, i) {
27488c2ecf20Sopenharmony_ci		struct grant *gnt_list_entry, *tmp;
27498c2ecf20Sopenharmony_ci
27508c2ecf20Sopenharmony_ci		spin_lock_irqsave(&rinfo->ring_lock, flags);
27518c2ecf20Sopenharmony_ci
27528c2ecf20Sopenharmony_ci		if (rinfo->persistent_gnts_c == 0) {
27538c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&rinfo->ring_lock, flags);
27548c2ecf20Sopenharmony_ci			continue;
27558c2ecf20Sopenharmony_ci		}
27568c2ecf20Sopenharmony_ci
27578c2ecf20Sopenharmony_ci		list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
27588c2ecf20Sopenharmony_ci					 node) {
27598c2ecf20Sopenharmony_ci			if (gnt_list_entry->gref == GRANT_INVALID_REF ||
27608c2ecf20Sopenharmony_ci			    !gnttab_try_end_foreign_access(gnt_list_entry->gref))
27618c2ecf20Sopenharmony_ci				continue;
27628c2ecf20Sopenharmony_ci
27638c2ecf20Sopenharmony_ci			list_del(&gnt_list_entry->node);
27648c2ecf20Sopenharmony_ci			rinfo->persistent_gnts_c--;
27658c2ecf20Sopenharmony_ci			gnt_list_entry->gref = GRANT_INVALID_REF;
27668c2ecf20Sopenharmony_ci			list_add_tail(&gnt_list_entry->node, &rinfo->grants);
27678c2ecf20Sopenharmony_ci		}
27688c2ecf20Sopenharmony_ci
27698c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&rinfo->ring_lock, flags);
27708c2ecf20Sopenharmony_ci	}
27718c2ecf20Sopenharmony_ci}
27728c2ecf20Sopenharmony_ci
27738c2ecf20Sopenharmony_cistatic void blkfront_delay_work(struct work_struct *work)
27748c2ecf20Sopenharmony_ci{
27758c2ecf20Sopenharmony_ci	struct blkfront_info *info;
27768c2ecf20Sopenharmony_ci	bool need_schedule_work = false;
27778c2ecf20Sopenharmony_ci
27788c2ecf20Sopenharmony_ci	/*
27798c2ecf20Sopenharmony_ci	 * Note that when using bounce buffers but not persistent grants
27808c2ecf20Sopenharmony_ci	 * there's no need to run blkfront_delay_work because grants are
27818c2ecf20Sopenharmony_ci	 * revoked in blkif_completion or else an error is reported and the
27828c2ecf20Sopenharmony_ci	 * connection is closed.
27838c2ecf20Sopenharmony_ci	 */
27848c2ecf20Sopenharmony_ci
27858c2ecf20Sopenharmony_ci	mutex_lock(&blkfront_mutex);
27868c2ecf20Sopenharmony_ci
27878c2ecf20Sopenharmony_ci	list_for_each_entry(info, &info_list, info_list) {
27888c2ecf20Sopenharmony_ci		if (info->feature_persistent) {
27898c2ecf20Sopenharmony_ci			need_schedule_work = true;
27908c2ecf20Sopenharmony_ci			mutex_lock(&info->mutex);
27918c2ecf20Sopenharmony_ci			purge_persistent_grants(info);
27928c2ecf20Sopenharmony_ci			mutex_unlock(&info->mutex);
27938c2ecf20Sopenharmony_ci		}
27948c2ecf20Sopenharmony_ci	}
27958c2ecf20Sopenharmony_ci
27968c2ecf20Sopenharmony_ci	if (need_schedule_work)
27978c2ecf20Sopenharmony_ci		schedule_delayed_work(&blkfront_work, HZ * 10);
27988c2ecf20Sopenharmony_ci
27998c2ecf20Sopenharmony_ci	mutex_unlock(&blkfront_mutex);
28008c2ecf20Sopenharmony_ci}
28018c2ecf20Sopenharmony_ci
28028c2ecf20Sopenharmony_cistatic int __init xlblk_init(void)
28038c2ecf20Sopenharmony_ci{
28048c2ecf20Sopenharmony_ci	int ret;
28058c2ecf20Sopenharmony_ci	int nr_cpus = num_online_cpus();
28068c2ecf20Sopenharmony_ci
28078c2ecf20Sopenharmony_ci	if (!xen_domain())
28088c2ecf20Sopenharmony_ci		return -ENODEV;
28098c2ecf20Sopenharmony_ci
28108c2ecf20Sopenharmony_ci	if (!xen_has_pv_disk_devices())
28118c2ecf20Sopenharmony_ci		return -ENODEV;
28128c2ecf20Sopenharmony_ci
28138c2ecf20Sopenharmony_ci	if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
28148c2ecf20Sopenharmony_ci		pr_warn("xen_blk: can't get major %d with name %s\n",
28158c2ecf20Sopenharmony_ci			XENVBD_MAJOR, DEV_NAME);
28168c2ecf20Sopenharmony_ci		return -ENODEV;
28178c2ecf20Sopenharmony_ci	}
28188c2ecf20Sopenharmony_ci
28198c2ecf20Sopenharmony_ci	if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST)
28208c2ecf20Sopenharmony_ci		xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
28218c2ecf20Sopenharmony_ci
28228c2ecf20Sopenharmony_ci	if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) {
28238c2ecf20Sopenharmony_ci		pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
28248c2ecf20Sopenharmony_ci			xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER);
28258c2ecf20Sopenharmony_ci		xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
28268c2ecf20Sopenharmony_ci	}
28278c2ecf20Sopenharmony_ci
28288c2ecf20Sopenharmony_ci	if (xen_blkif_max_queues > nr_cpus) {
28298c2ecf20Sopenharmony_ci		pr_info("Invalid max_queues (%d), will use default max: %d.\n",
28308c2ecf20Sopenharmony_ci			xen_blkif_max_queues, nr_cpus);
28318c2ecf20Sopenharmony_ci		xen_blkif_max_queues = nr_cpus;
28328c2ecf20Sopenharmony_ci	}
28338c2ecf20Sopenharmony_ci
28348c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work);
28358c2ecf20Sopenharmony_ci
28368c2ecf20Sopenharmony_ci	ret = xenbus_register_frontend(&blkfront_driver);
28378c2ecf20Sopenharmony_ci	if (ret) {
28388c2ecf20Sopenharmony_ci		unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
28398c2ecf20Sopenharmony_ci		return ret;
28408c2ecf20Sopenharmony_ci	}
28418c2ecf20Sopenharmony_ci
28428c2ecf20Sopenharmony_ci	return 0;
28438c2ecf20Sopenharmony_ci}
28448c2ecf20Sopenharmony_cimodule_init(xlblk_init);
28458c2ecf20Sopenharmony_ci
28468c2ecf20Sopenharmony_ci
28478c2ecf20Sopenharmony_cistatic void __exit xlblk_exit(void)
28488c2ecf20Sopenharmony_ci{
28498c2ecf20Sopenharmony_ci	cancel_delayed_work_sync(&blkfront_work);
28508c2ecf20Sopenharmony_ci
28518c2ecf20Sopenharmony_ci	xenbus_unregister_driver(&blkfront_driver);
28528c2ecf20Sopenharmony_ci	unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
28538c2ecf20Sopenharmony_ci	kfree(minors);
28548c2ecf20Sopenharmony_ci}
28558c2ecf20Sopenharmony_cimodule_exit(xlblk_exit);
28568c2ecf20Sopenharmony_ci
28578c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Xen virtual block device frontend");
28588c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
28598c2ecf20Sopenharmony_ciMODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
28608c2ecf20Sopenharmony_ciMODULE_ALIAS("xen:vbd");
28618c2ecf20Sopenharmony_ciMODULE_ALIAS("xenblk");
2862