162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * VDPA simulator for block device.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
662306a36Sopenharmony_ci * Copyright (c) 2021, Red Hat Inc. All rights reserved.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/init.h>
1162306a36Sopenharmony_ci#include <linux/module.h>
1262306a36Sopenharmony_ci#include <linux/device.h>
1362306a36Sopenharmony_ci#include <linux/kernel.h>
1462306a36Sopenharmony_ci#include <linux/blkdev.h>
1562306a36Sopenharmony_ci#include <linux/vringh.h>
1662306a36Sopenharmony_ci#include <linux/vdpa.h>
1762306a36Sopenharmony_ci#include <uapi/linux/virtio_blk.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include "vdpa_sim.h"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#define DRV_VERSION  "0.1"
2262306a36Sopenharmony_ci#define DRV_AUTHOR   "Max Gurtovoy <mgurtovoy@nvidia.com>"
2362306a36Sopenharmony_ci#define DRV_DESC     "vDPA Device Simulator for block device"
2462306a36Sopenharmony_ci#define DRV_LICENSE  "GPL v2"
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#define VDPASIM_BLK_FEATURES	(VDPASIM_FEATURES | \
2762306a36Sopenharmony_ci				 (1ULL << VIRTIO_BLK_F_FLUSH)    | \
2862306a36Sopenharmony_ci				 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
2962306a36Sopenharmony_ci				 (1ULL << VIRTIO_BLK_F_SEG_MAX)  | \
3062306a36Sopenharmony_ci				 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
3162306a36Sopenharmony_ci				 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
3262306a36Sopenharmony_ci				 (1ULL << VIRTIO_BLK_F_MQ)       | \
3362306a36Sopenharmony_ci				 (1ULL << VIRTIO_BLK_F_DISCARD)  | \
3462306a36Sopenharmony_ci				 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci#define VDPASIM_BLK_CAPACITY	0x40000
3762306a36Sopenharmony_ci#define VDPASIM_BLK_SIZE_MAX	0x1000
3862306a36Sopenharmony_ci#define VDPASIM_BLK_SEG_MAX	32
3962306a36Sopenharmony_ci#define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci/* 1 virtqueue, 1 address space, 1 virtqueue group */
4262306a36Sopenharmony_ci#define VDPASIM_BLK_VQ_NUM	1
4362306a36Sopenharmony_ci#define VDPASIM_BLK_AS_NUM	1
4462306a36Sopenharmony_ci#define VDPASIM_BLK_GROUP_NUM	1
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_cistruct vdpasim_blk {
4762306a36Sopenharmony_ci	struct vdpasim vdpasim;
4862306a36Sopenharmony_ci	void *buffer;
4962306a36Sopenharmony_ci	bool shared_backend;
5062306a36Sopenharmony_ci};
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_cistatic struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim)
5362306a36Sopenharmony_ci{
5462306a36Sopenharmony_ci	return container_of(vdpasim, struct vdpasim_blk, vdpasim);
5562306a36Sopenharmony_ci}
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_cistatic char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_cistatic bool shared_backend;
6062306a36Sopenharmony_cimodule_param(shared_backend, bool, 0444);
6162306a36Sopenharmony_ciMODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices");
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_cistatic void *shared_buffer;
6462306a36Sopenharmony_ci/* mutex to synchronize shared_buffer access */
6562306a36Sopenharmony_cistatic DEFINE_MUTEX(shared_buffer_mutex);
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cistatic void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk)
6862306a36Sopenharmony_ci{
6962306a36Sopenharmony_ci	if (blk->shared_backend)
7062306a36Sopenharmony_ci		mutex_lock(&shared_buffer_mutex);
7162306a36Sopenharmony_ci}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_cistatic void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk)
7462306a36Sopenharmony_ci{
7562306a36Sopenharmony_ci	if (blk->shared_backend)
7662306a36Sopenharmony_ci		mutex_unlock(&shared_buffer_mutex);
7762306a36Sopenharmony_ci}
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_cistatic bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
8062306a36Sopenharmony_ci				    u64 num_sectors, u64 max_sectors)
8162306a36Sopenharmony_ci{
8262306a36Sopenharmony_ci	if (start_sector > VDPASIM_BLK_CAPACITY) {
8362306a36Sopenharmony_ci		dev_dbg(&vdpasim->vdpa.dev,
8462306a36Sopenharmony_ci			"starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
8562306a36Sopenharmony_ci			start_sector, VDPASIM_BLK_CAPACITY);
8662306a36Sopenharmony_ci	}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	if (num_sectors > max_sectors) {
8962306a36Sopenharmony_ci		dev_dbg(&vdpasim->vdpa.dev,
9062306a36Sopenharmony_ci			"number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
9162306a36Sopenharmony_ci			num_sectors, max_sectors);
9262306a36Sopenharmony_ci		return false;
9362306a36Sopenharmony_ci	}
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) {
9662306a36Sopenharmony_ci		dev_dbg(&vdpasim->vdpa.dev,
9762306a36Sopenharmony_ci			"request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
9862306a36Sopenharmony_ci			start_sector, num_sectors, VDPASIM_BLK_CAPACITY);
9962306a36Sopenharmony_ci		return false;
10062306a36Sopenharmony_ci	}
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	return true;
10362306a36Sopenharmony_ci}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci/* Returns 'true' if the request is handled (with or without an I/O error)
10662306a36Sopenharmony_ci * and the status is correctly written in the last byte of the 'in iov',
10762306a36Sopenharmony_ci * 'false' otherwise.
10862306a36Sopenharmony_ci */
10962306a36Sopenharmony_cistatic bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
11062306a36Sopenharmony_ci				   struct vdpasim_virtqueue *vq)
11162306a36Sopenharmony_ci{
11262306a36Sopenharmony_ci	struct vdpasim_blk *blk = sim_to_blk(vdpasim);
11362306a36Sopenharmony_ci	size_t pushed = 0, to_pull, to_push;
11462306a36Sopenharmony_ci	struct virtio_blk_outhdr hdr;
11562306a36Sopenharmony_ci	bool handled = false;
11662306a36Sopenharmony_ci	ssize_t bytes;
11762306a36Sopenharmony_ci	loff_t offset;
11862306a36Sopenharmony_ci	u64 sector;
11962306a36Sopenharmony_ci	u8 status;
12062306a36Sopenharmony_ci	u32 type;
12162306a36Sopenharmony_ci	int ret;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov,
12462306a36Sopenharmony_ci				   &vq->head, GFP_ATOMIC);
12562306a36Sopenharmony_ci	if (ret != 1)
12662306a36Sopenharmony_ci		return false;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
12962306a36Sopenharmony_ci		dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
13062306a36Sopenharmony_ci			vq->out_iov.used, vq->in_iov.used);
13162306a36Sopenharmony_ci		goto err;
13262306a36Sopenharmony_ci	}
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
13562306a36Sopenharmony_ci		dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n");
13662306a36Sopenharmony_ci		goto err;
13762306a36Sopenharmony_ci	}
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	/* The last byte is the status and we checked if the last iov has
14062306a36Sopenharmony_ci	 * enough room for it.
14162306a36Sopenharmony_ci	 */
14262306a36Sopenharmony_ci	to_push = vringh_kiov_length(&vq->in_iov) - 1;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	to_pull = vringh_kiov_length(&vq->out_iov);
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
14762306a36Sopenharmony_ci				      sizeof(hdr));
14862306a36Sopenharmony_ci	if (bytes != sizeof(hdr)) {
14962306a36Sopenharmony_ci		dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n");
15062306a36Sopenharmony_ci		goto err;
15162306a36Sopenharmony_ci	}
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	to_pull -= bytes;
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	type = vdpasim32_to_cpu(vdpasim, hdr.type);
15662306a36Sopenharmony_ci	sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
15762306a36Sopenharmony_ci	offset = sector << SECTOR_SHIFT;
15862306a36Sopenharmony_ci	status = VIRTIO_BLK_S_OK;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT &&
16162306a36Sopenharmony_ci	    sector != 0) {
16262306a36Sopenharmony_ci		dev_dbg(&vdpasim->vdpa.dev,
16362306a36Sopenharmony_ci			"sector must be 0 for %u request - sector: 0x%llx\n",
16462306a36Sopenharmony_ci			type, sector);
16562306a36Sopenharmony_ci		status = VIRTIO_BLK_S_IOERR;
16662306a36Sopenharmony_ci		goto err_status;
16762306a36Sopenharmony_ci	}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	switch (type) {
17062306a36Sopenharmony_ci	case VIRTIO_BLK_T_IN:
17162306a36Sopenharmony_ci		if (!vdpasim_blk_check_range(vdpasim, sector,
17262306a36Sopenharmony_ci					     to_push >> SECTOR_SHIFT,
17362306a36Sopenharmony_ci					     VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
17462306a36Sopenharmony_ci			status = VIRTIO_BLK_S_IOERR;
17562306a36Sopenharmony_ci			break;
17662306a36Sopenharmony_ci		}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci		vdpasim_blk_buffer_lock(blk);
17962306a36Sopenharmony_ci		bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
18062306a36Sopenharmony_ci					      blk->buffer + offset, to_push);
18162306a36Sopenharmony_ci		vdpasim_blk_buffer_unlock(blk);
18262306a36Sopenharmony_ci		if (bytes < 0) {
18362306a36Sopenharmony_ci			dev_dbg(&vdpasim->vdpa.dev,
18462306a36Sopenharmony_ci				"vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
18562306a36Sopenharmony_ci				bytes, offset, to_push);
18662306a36Sopenharmony_ci			status = VIRTIO_BLK_S_IOERR;
18762306a36Sopenharmony_ci			break;
18862306a36Sopenharmony_ci		}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci		pushed += bytes;
19162306a36Sopenharmony_ci		break;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	case VIRTIO_BLK_T_OUT:
19462306a36Sopenharmony_ci		if (!vdpasim_blk_check_range(vdpasim, sector,
19562306a36Sopenharmony_ci					     to_pull >> SECTOR_SHIFT,
19662306a36Sopenharmony_ci					     VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
19762306a36Sopenharmony_ci			status = VIRTIO_BLK_S_IOERR;
19862306a36Sopenharmony_ci			break;
19962306a36Sopenharmony_ci		}
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci		vdpasim_blk_buffer_lock(blk);
20262306a36Sopenharmony_ci		bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
20362306a36Sopenharmony_ci					      blk->buffer + offset, to_pull);
20462306a36Sopenharmony_ci		vdpasim_blk_buffer_unlock(blk);
20562306a36Sopenharmony_ci		if (bytes < 0) {
20662306a36Sopenharmony_ci			dev_dbg(&vdpasim->vdpa.dev,
20762306a36Sopenharmony_ci				"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
20862306a36Sopenharmony_ci				bytes, offset, to_pull);
20962306a36Sopenharmony_ci			status = VIRTIO_BLK_S_IOERR;
21062306a36Sopenharmony_ci			break;
21162306a36Sopenharmony_ci		}
21262306a36Sopenharmony_ci		break;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	case VIRTIO_BLK_T_GET_ID:
21562306a36Sopenharmony_ci		bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
21662306a36Sopenharmony_ci					      vdpasim_blk_id,
21762306a36Sopenharmony_ci					      VIRTIO_BLK_ID_BYTES);
21862306a36Sopenharmony_ci		if (bytes < 0) {
21962306a36Sopenharmony_ci			dev_dbg(&vdpasim->vdpa.dev,
22062306a36Sopenharmony_ci				"vringh_iov_push_iotlb() error: %zd\n", bytes);
22162306a36Sopenharmony_ci			status = VIRTIO_BLK_S_IOERR;
22262306a36Sopenharmony_ci			break;
22362306a36Sopenharmony_ci		}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci		pushed += bytes;
22662306a36Sopenharmony_ci		break;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	case VIRTIO_BLK_T_FLUSH:
22962306a36Sopenharmony_ci		/* nothing to do */
23062306a36Sopenharmony_ci		break;
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	case VIRTIO_BLK_T_DISCARD:
23362306a36Sopenharmony_ci	case VIRTIO_BLK_T_WRITE_ZEROES: {
23462306a36Sopenharmony_ci		struct virtio_blk_discard_write_zeroes range;
23562306a36Sopenharmony_ci		u32 num_sectors, flags;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci		if (to_pull != sizeof(range)) {
23862306a36Sopenharmony_ci			dev_dbg(&vdpasim->vdpa.dev,
23962306a36Sopenharmony_ci				"discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
24062306a36Sopenharmony_ci				to_pull, sizeof(range));
24162306a36Sopenharmony_ci			status = VIRTIO_BLK_S_IOERR;
24262306a36Sopenharmony_ci			break;
24362306a36Sopenharmony_ci		}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci		bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range,
24662306a36Sopenharmony_ci					      to_pull);
24762306a36Sopenharmony_ci		if (bytes < 0) {
24862306a36Sopenharmony_ci			dev_dbg(&vdpasim->vdpa.dev,
24962306a36Sopenharmony_ci				"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
25062306a36Sopenharmony_ci				bytes, offset, to_pull);
25162306a36Sopenharmony_ci			status = VIRTIO_BLK_S_IOERR;
25262306a36Sopenharmony_ci			break;
25362306a36Sopenharmony_ci		}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci		sector = le64_to_cpu(range.sector);
25662306a36Sopenharmony_ci		offset = sector << SECTOR_SHIFT;
25762306a36Sopenharmony_ci		num_sectors = le32_to_cpu(range.num_sectors);
25862306a36Sopenharmony_ci		flags = le32_to_cpu(range.flags);
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci		if (type == VIRTIO_BLK_T_DISCARD && flags != 0) {
26162306a36Sopenharmony_ci			dev_dbg(&vdpasim->vdpa.dev,
26262306a36Sopenharmony_ci				"discard unexpected flags set - flags: 0x%x\n",
26362306a36Sopenharmony_ci				flags);
26462306a36Sopenharmony_ci			status = VIRTIO_BLK_S_UNSUPP;
26562306a36Sopenharmony_ci			break;
26662306a36Sopenharmony_ci		}
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci		if (type == VIRTIO_BLK_T_WRITE_ZEROES &&
26962306a36Sopenharmony_ci		    flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
27062306a36Sopenharmony_ci			dev_dbg(&vdpasim->vdpa.dev,
27162306a36Sopenharmony_ci				"write_zeroes unexpected flags set - flags: 0x%x\n",
27262306a36Sopenharmony_ci				flags);
27362306a36Sopenharmony_ci			status = VIRTIO_BLK_S_UNSUPP;
27462306a36Sopenharmony_ci			break;
27562306a36Sopenharmony_ci		}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci		if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors,
27862306a36Sopenharmony_ci					     VDPASIM_BLK_DWZ_MAX_SECTORS)) {
27962306a36Sopenharmony_ci			status = VIRTIO_BLK_S_IOERR;
28062306a36Sopenharmony_ci			break;
28162306a36Sopenharmony_ci		}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci		if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
28462306a36Sopenharmony_ci			vdpasim_blk_buffer_lock(blk);
28562306a36Sopenharmony_ci			memset(blk->buffer + offset, 0,
28662306a36Sopenharmony_ci			       num_sectors << SECTOR_SHIFT);
28762306a36Sopenharmony_ci			vdpasim_blk_buffer_unlock(blk);
28862306a36Sopenharmony_ci		}
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci		break;
29162306a36Sopenharmony_ci	}
29262306a36Sopenharmony_ci	default:
29362306a36Sopenharmony_ci		dev_dbg(&vdpasim->vdpa.dev,
29462306a36Sopenharmony_ci			"Unsupported request type %d\n", type);
29562306a36Sopenharmony_ci		status = VIRTIO_BLK_S_IOERR;
29662306a36Sopenharmony_ci		break;
29762306a36Sopenharmony_ci	}
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_cierr_status:
30062306a36Sopenharmony_ci	/* If some operations fail, we need to skip the remaining bytes
30162306a36Sopenharmony_ci	 * to put the status in the last byte
30262306a36Sopenharmony_ci	 */
30362306a36Sopenharmony_ci	if (to_push - pushed > 0)
30462306a36Sopenharmony_ci		vringh_kiov_advance(&vq->in_iov, to_push - pushed);
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	/* Last byte is the status */
30762306a36Sopenharmony_ci	bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
30862306a36Sopenharmony_ci	if (bytes != 1)
30962306a36Sopenharmony_ci		goto err;
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	pushed += bytes;
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	/* Make sure data is wrote before advancing index */
31462306a36Sopenharmony_ci	smp_wmb();
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	handled = true;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_cierr:
31962306a36Sopenharmony_ci	vringh_complete_iotlb(&vq->vring, vq->head, pushed);
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	return handled;
32262306a36Sopenharmony_ci}
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_cistatic void vdpasim_blk_work(struct vdpasim *vdpasim)
32562306a36Sopenharmony_ci{
32662306a36Sopenharmony_ci	bool reschedule = false;
32762306a36Sopenharmony_ci	int i;
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	mutex_lock(&vdpasim->mutex);
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
33262306a36Sopenharmony_ci		goto out;
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	if (!vdpasim->running)
33562306a36Sopenharmony_ci		goto out;
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
33862306a36Sopenharmony_ci		struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
33962306a36Sopenharmony_ci		int reqs = 0;
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci		if (!vq->ready)
34262306a36Sopenharmony_ci			continue;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci		while (vdpasim_blk_handle_req(vdpasim, vq)) {
34562306a36Sopenharmony_ci			/* Make sure used is visible before rasing the interrupt. */
34662306a36Sopenharmony_ci			smp_wmb();
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci			local_bh_disable();
34962306a36Sopenharmony_ci			if (vringh_need_notify_iotlb(&vq->vring) > 0)
35062306a36Sopenharmony_ci				vringh_notify(&vq->vring);
35162306a36Sopenharmony_ci			local_bh_enable();
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci			if (++reqs > 4) {
35462306a36Sopenharmony_ci				reschedule = true;
35562306a36Sopenharmony_ci				break;
35662306a36Sopenharmony_ci			}
35762306a36Sopenharmony_ci		}
35862306a36Sopenharmony_ci	}
35962306a36Sopenharmony_ciout:
36062306a36Sopenharmony_ci	mutex_unlock(&vdpasim->mutex);
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	if (reschedule)
36362306a36Sopenharmony_ci		vdpasim_schedule_work(vdpasim);
36462306a36Sopenharmony_ci}
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_cistatic void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
36762306a36Sopenharmony_ci{
36862306a36Sopenharmony_ci	struct virtio_blk_config *blk_config = config;
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	memset(config, 0, sizeof(struct virtio_blk_config));
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
37362306a36Sopenharmony_ci	blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
37462306a36Sopenharmony_ci	blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
37562306a36Sopenharmony_ci	blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
37662306a36Sopenharmony_ci	blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
37762306a36Sopenharmony_ci	blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
37862306a36Sopenharmony_ci	blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
37962306a36Sopenharmony_ci	/* VIRTIO_BLK_F_DISCARD */
38062306a36Sopenharmony_ci	blk_config->discard_sector_alignment =
38162306a36Sopenharmony_ci		cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
38262306a36Sopenharmony_ci	blk_config->max_discard_sectors =
38362306a36Sopenharmony_ci		cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
38462306a36Sopenharmony_ci	blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1);
38562306a36Sopenharmony_ci	/* VIRTIO_BLK_F_WRITE_ZEROES */
38662306a36Sopenharmony_ci	blk_config->max_write_zeroes_sectors =
38762306a36Sopenharmony_ci		cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
38862306a36Sopenharmony_ci	blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1);
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci}
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_cistatic void vdpasim_blk_free(struct vdpasim *vdpasim)
39362306a36Sopenharmony_ci{
39462306a36Sopenharmony_ci	struct vdpasim_blk *blk = sim_to_blk(vdpasim);
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	if (!blk->shared_backend)
39762306a36Sopenharmony_ci		kvfree(blk->buffer);
39862306a36Sopenharmony_ci}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_cistatic void vdpasim_blk_mgmtdev_release(struct device *dev)
40162306a36Sopenharmony_ci{
40262306a36Sopenharmony_ci}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_cistatic struct device vdpasim_blk_mgmtdev = {
40562306a36Sopenharmony_ci	.init_name = "vdpasim_blk",
40662306a36Sopenharmony_ci	.release = vdpasim_blk_mgmtdev_release,
40762306a36Sopenharmony_ci};
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_cistatic int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
41062306a36Sopenharmony_ci			       const struct vdpa_dev_set_config *config)
41162306a36Sopenharmony_ci{
41262306a36Sopenharmony_ci	struct vdpasim_dev_attr dev_attr = {};
41362306a36Sopenharmony_ci	struct vdpasim_blk *blk;
41462306a36Sopenharmony_ci	struct vdpasim *simdev;
41562306a36Sopenharmony_ci	int ret;
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	dev_attr.mgmt_dev = mdev;
41862306a36Sopenharmony_ci	dev_attr.name = name;
41962306a36Sopenharmony_ci	dev_attr.id = VIRTIO_ID_BLOCK;
42062306a36Sopenharmony_ci	dev_attr.supported_features = VDPASIM_BLK_FEATURES;
42162306a36Sopenharmony_ci	dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
42262306a36Sopenharmony_ci	dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
42362306a36Sopenharmony_ci	dev_attr.nas = VDPASIM_BLK_AS_NUM;
42462306a36Sopenharmony_ci	dev_attr.alloc_size = sizeof(struct vdpasim_blk);
42562306a36Sopenharmony_ci	dev_attr.config_size = sizeof(struct virtio_blk_config);
42662306a36Sopenharmony_ci	dev_attr.get_config = vdpasim_blk_get_config;
42762306a36Sopenharmony_ci	dev_attr.work_fn = vdpasim_blk_work;
42862306a36Sopenharmony_ci	dev_attr.free = vdpasim_blk_free;
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci	simdev = vdpasim_create(&dev_attr, config);
43162306a36Sopenharmony_ci	if (IS_ERR(simdev))
43262306a36Sopenharmony_ci		return PTR_ERR(simdev);
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	blk = sim_to_blk(simdev);
43562306a36Sopenharmony_ci	blk->shared_backend = shared_backend;
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci	if (blk->shared_backend) {
43862306a36Sopenharmony_ci		blk->buffer = shared_buffer;
43962306a36Sopenharmony_ci	} else {
44062306a36Sopenharmony_ci		blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
44162306a36Sopenharmony_ci				       GFP_KERNEL);
44262306a36Sopenharmony_ci		if (!blk->buffer) {
44362306a36Sopenharmony_ci			ret = -ENOMEM;
44462306a36Sopenharmony_ci			goto put_dev;
44562306a36Sopenharmony_ci		}
44662306a36Sopenharmony_ci	}
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
44962306a36Sopenharmony_ci	if (ret)
45062306a36Sopenharmony_ci		goto put_dev;
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci	return 0;
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ciput_dev:
45562306a36Sopenharmony_ci	put_device(&simdev->vdpa.dev);
45662306a36Sopenharmony_ci	return ret;
45762306a36Sopenharmony_ci}
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_cistatic void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev,
46062306a36Sopenharmony_ci				struct vdpa_device *dev)
46162306a36Sopenharmony_ci{
46262306a36Sopenharmony_ci	struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_ci	_vdpa_unregister_device(&simdev->vdpa);
46562306a36Sopenharmony_ci}
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_cistatic const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = {
46862306a36Sopenharmony_ci	.dev_add = vdpasim_blk_dev_add,
46962306a36Sopenharmony_ci	.dev_del = vdpasim_blk_dev_del
47062306a36Sopenharmony_ci};
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_cistatic struct virtio_device_id id_table[] = {
47362306a36Sopenharmony_ci	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
47462306a36Sopenharmony_ci	{ 0 },
47562306a36Sopenharmony_ci};
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_cistatic struct vdpa_mgmt_dev mgmt_dev = {
47862306a36Sopenharmony_ci	.device = &vdpasim_blk_mgmtdev,
47962306a36Sopenharmony_ci	.id_table = id_table,
48062306a36Sopenharmony_ci	.ops = &vdpasim_blk_mgmtdev_ops,
48162306a36Sopenharmony_ci};
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_cistatic int __init vdpasim_blk_init(void)
48462306a36Sopenharmony_ci{
48562306a36Sopenharmony_ci	int ret;
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci	ret = device_register(&vdpasim_blk_mgmtdev);
48862306a36Sopenharmony_ci	if (ret) {
48962306a36Sopenharmony_ci		put_device(&vdpasim_blk_mgmtdev);
49062306a36Sopenharmony_ci		return ret;
49162306a36Sopenharmony_ci	}
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci	ret = vdpa_mgmtdev_register(&mgmt_dev);
49462306a36Sopenharmony_ci	if (ret)
49562306a36Sopenharmony_ci		goto parent_err;
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	if (shared_backend) {
49862306a36Sopenharmony_ci		shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
49962306a36Sopenharmony_ci					 GFP_KERNEL);
50062306a36Sopenharmony_ci		if (!shared_buffer) {
50162306a36Sopenharmony_ci			ret = -ENOMEM;
50262306a36Sopenharmony_ci			goto mgmt_dev_err;
50362306a36Sopenharmony_ci		}
50462306a36Sopenharmony_ci	}
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	return 0;
50762306a36Sopenharmony_cimgmt_dev_err:
50862306a36Sopenharmony_ci	vdpa_mgmtdev_unregister(&mgmt_dev);
50962306a36Sopenharmony_ciparent_err:
51062306a36Sopenharmony_ci	device_unregister(&vdpasim_blk_mgmtdev);
51162306a36Sopenharmony_ci	return ret;
51262306a36Sopenharmony_ci}
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_cistatic void __exit vdpasim_blk_exit(void)
51562306a36Sopenharmony_ci{
51662306a36Sopenharmony_ci	kvfree(shared_buffer);
51762306a36Sopenharmony_ci	vdpa_mgmtdev_unregister(&mgmt_dev);
51862306a36Sopenharmony_ci	device_unregister(&vdpasim_blk_mgmtdev);
51962306a36Sopenharmony_ci}
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_cimodule_init(vdpasim_blk_init)
52262306a36Sopenharmony_cimodule_exit(vdpasim_blk_exit)
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ciMODULE_VERSION(DRV_VERSION);
52562306a36Sopenharmony_ciMODULE_LICENSE(DRV_LICENSE);
52662306a36Sopenharmony_ciMODULE_AUTHOR(DRV_AUTHOR);
52762306a36Sopenharmony_ciMODULE_DESCRIPTION(DRV_DESC);
528