18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * NVMe over Fabrics RDMA host code.
48c2ecf20Sopenharmony_ci * Copyright (c) 2015-2016 HGST, a Western Digital Company.
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
78c2ecf20Sopenharmony_ci#include <linux/module.h>
88c2ecf20Sopenharmony_ci#include <linux/init.h>
98c2ecf20Sopenharmony_ci#include <linux/slab.h>
108c2ecf20Sopenharmony_ci#include <rdma/mr_pool.h>
118c2ecf20Sopenharmony_ci#include <linux/err.h>
128c2ecf20Sopenharmony_ci#include <linux/string.h>
138c2ecf20Sopenharmony_ci#include <linux/atomic.h>
148c2ecf20Sopenharmony_ci#include <linux/blk-mq.h>
158c2ecf20Sopenharmony_ci#include <linux/blk-mq-rdma.h>
168c2ecf20Sopenharmony_ci#include <linux/types.h>
178c2ecf20Sopenharmony_ci#include <linux/list.h>
188c2ecf20Sopenharmony_ci#include <linux/mutex.h>
198c2ecf20Sopenharmony_ci#include <linux/scatterlist.h>
208c2ecf20Sopenharmony_ci#include <linux/nvme.h>
218c2ecf20Sopenharmony_ci#include <asm/unaligned.h>
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci#include <rdma/ib_verbs.h>
248c2ecf20Sopenharmony_ci#include <rdma/rdma_cm.h>
258c2ecf20Sopenharmony_ci#include <linux/nvme-rdma.h>
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci#include "nvme.h"
288c2ecf20Sopenharmony_ci#include "fabrics.h"
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci#define NVME_RDMA_CONNECT_TIMEOUT_MS	3000		/* 3 second */
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#define NVME_RDMA_MAX_SEGMENTS		256
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci#define NVME_RDMA_MAX_INLINE_SEGMENTS	4
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci#define NVME_RDMA_DATA_SGL_SIZE \
388c2ecf20Sopenharmony_ci	(sizeof(struct scatterlist) * NVME_INLINE_SG_CNT)
398c2ecf20Sopenharmony_ci#define NVME_RDMA_METADATA_SGL_SIZE \
408c2ecf20Sopenharmony_ci	(sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT)
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_cistruct nvme_rdma_device {
438c2ecf20Sopenharmony_ci	struct ib_device	*dev;
448c2ecf20Sopenharmony_ci	struct ib_pd		*pd;
458c2ecf20Sopenharmony_ci	struct kref		ref;
468c2ecf20Sopenharmony_ci	struct list_head	entry;
478c2ecf20Sopenharmony_ci	unsigned int		num_inline_segments;
488c2ecf20Sopenharmony_ci};
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_cistruct nvme_rdma_qe {
518c2ecf20Sopenharmony_ci	struct ib_cqe		cqe;
528c2ecf20Sopenharmony_ci	void			*data;
538c2ecf20Sopenharmony_ci	u64			dma;
548c2ecf20Sopenharmony_ci};
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_cistruct nvme_rdma_sgl {
578c2ecf20Sopenharmony_ci	int			nents;
588c2ecf20Sopenharmony_ci	struct sg_table		sg_table;
598c2ecf20Sopenharmony_ci};
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_cistruct nvme_rdma_queue;
628c2ecf20Sopenharmony_cistruct nvme_rdma_request {
638c2ecf20Sopenharmony_ci	struct nvme_request	req;
648c2ecf20Sopenharmony_ci	struct ib_mr		*mr;
658c2ecf20Sopenharmony_ci	struct nvme_rdma_qe	sqe;
668c2ecf20Sopenharmony_ci	union nvme_result	result;
678c2ecf20Sopenharmony_ci	__le16			status;
688c2ecf20Sopenharmony_ci	refcount_t		ref;
698c2ecf20Sopenharmony_ci	struct ib_sge		sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
708c2ecf20Sopenharmony_ci	u32			num_sge;
718c2ecf20Sopenharmony_ci	struct ib_reg_wr	reg_wr;
728c2ecf20Sopenharmony_ci	struct ib_cqe		reg_cqe;
738c2ecf20Sopenharmony_ci	struct nvme_rdma_queue  *queue;
748c2ecf20Sopenharmony_ci	struct nvme_rdma_sgl	data_sgl;
758c2ecf20Sopenharmony_ci	struct nvme_rdma_sgl	*metadata_sgl;
768c2ecf20Sopenharmony_ci	bool			use_sig_mr;
778c2ecf20Sopenharmony_ci};
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_cienum nvme_rdma_queue_flags {
808c2ecf20Sopenharmony_ci	NVME_RDMA_Q_ALLOCATED		= 0,
818c2ecf20Sopenharmony_ci	NVME_RDMA_Q_LIVE		= 1,
828c2ecf20Sopenharmony_ci	NVME_RDMA_Q_TR_READY		= 2,
838c2ecf20Sopenharmony_ci};
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_cistruct nvme_rdma_queue {
868c2ecf20Sopenharmony_ci	struct nvme_rdma_qe	*rsp_ring;
878c2ecf20Sopenharmony_ci	int			queue_size;
888c2ecf20Sopenharmony_ci	size_t			cmnd_capsule_len;
898c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl	*ctrl;
908c2ecf20Sopenharmony_ci	struct nvme_rdma_device	*device;
918c2ecf20Sopenharmony_ci	struct ib_cq		*ib_cq;
928c2ecf20Sopenharmony_ci	struct ib_qp		*qp;
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	unsigned long		flags;
958c2ecf20Sopenharmony_ci	struct rdma_cm_id	*cm_id;
968c2ecf20Sopenharmony_ci	int			cm_error;
978c2ecf20Sopenharmony_ci	struct completion	cm_done;
988c2ecf20Sopenharmony_ci	bool			pi_support;
998c2ecf20Sopenharmony_ci	int			cq_size;
1008c2ecf20Sopenharmony_ci	struct mutex		queue_lock;
1018c2ecf20Sopenharmony_ci};
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_cistruct nvme_rdma_ctrl {
1048c2ecf20Sopenharmony_ci	/* read only in the hot path */
1058c2ecf20Sopenharmony_ci	struct nvme_rdma_queue	*queues;
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	/* other member variables */
1088c2ecf20Sopenharmony_ci	struct blk_mq_tag_set	tag_set;
1098c2ecf20Sopenharmony_ci	struct work_struct	err_work;
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	struct nvme_rdma_qe	async_event_sqe;
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	struct delayed_work	reconnect_work;
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	struct list_head	list;
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	struct blk_mq_tag_set	admin_tag_set;
1188c2ecf20Sopenharmony_ci	struct nvme_rdma_device	*device;
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	u32			max_fr_pages;
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci	struct sockaddr_storage addr;
1238c2ecf20Sopenharmony_ci	struct sockaddr_storage src_addr;
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci	struct nvme_ctrl	ctrl;
1268c2ecf20Sopenharmony_ci	bool			use_inline_data;
1278c2ecf20Sopenharmony_ci	u32			io_queues[HCTX_MAX_TYPES];
1288c2ecf20Sopenharmony_ci};
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_cistatic inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl)
1318c2ecf20Sopenharmony_ci{
1328c2ecf20Sopenharmony_ci	return container_of(ctrl, struct nvme_rdma_ctrl, ctrl);
1338c2ecf20Sopenharmony_ci}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_cistatic LIST_HEAD(device_list);
1368c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(device_list_mutex);
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_cistatic LIST_HEAD(nvme_rdma_ctrl_list);
1398c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(nvme_rdma_ctrl_mutex);
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci/*
1428c2ecf20Sopenharmony_ci * Disabling this option makes small I/O goes faster, but is fundamentally
1438c2ecf20Sopenharmony_ci * unsafe.  With it turned off we will have to register a global rkey that
1448c2ecf20Sopenharmony_ci * allows read and write access to all physical memory.
1458c2ecf20Sopenharmony_ci */
1468c2ecf20Sopenharmony_cistatic bool register_always = true;
1478c2ecf20Sopenharmony_cimodule_param(register_always, bool, 0444);
1488c2ecf20Sopenharmony_ciMODULE_PARM_DESC(register_always,
1498c2ecf20Sopenharmony_ci	 "Use memory registration even for contiguous memory regions");
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_cistatic int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
1528c2ecf20Sopenharmony_ci		struct rdma_cm_event *event);
1538c2ecf20Sopenharmony_cistatic void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
1548c2ecf20Sopenharmony_cistatic void nvme_rdma_complete_rq(struct request *rq);
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_cistatic const struct blk_mq_ops nvme_rdma_mq_ops;
1578c2ecf20Sopenharmony_cistatic const struct blk_mq_ops nvme_rdma_admin_mq_ops;
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_cistatic inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue)
1608c2ecf20Sopenharmony_ci{
1618c2ecf20Sopenharmony_ci	return queue - queue->ctrl->queues;
1628c2ecf20Sopenharmony_ci}
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_cistatic bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue)
1658c2ecf20Sopenharmony_ci{
1668c2ecf20Sopenharmony_ci	return nvme_rdma_queue_idx(queue) >
1678c2ecf20Sopenharmony_ci		queue->ctrl->io_queues[HCTX_TYPE_DEFAULT] +
1688c2ecf20Sopenharmony_ci		queue->ctrl->io_queues[HCTX_TYPE_READ];
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_cistatic inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue)
1728c2ecf20Sopenharmony_ci{
1738c2ecf20Sopenharmony_ci	return queue->cmnd_capsule_len - sizeof(struct nvme_command);
1748c2ecf20Sopenharmony_ci}
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_cistatic void nvme_rdma_free_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
1778c2ecf20Sopenharmony_ci		size_t capsule_size, enum dma_data_direction dir)
1788c2ecf20Sopenharmony_ci{
1798c2ecf20Sopenharmony_ci	ib_dma_unmap_single(ibdev, qe->dma, capsule_size, dir);
1808c2ecf20Sopenharmony_ci	kfree(qe->data);
1818c2ecf20Sopenharmony_ci}
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_cistatic int nvme_rdma_alloc_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
1848c2ecf20Sopenharmony_ci		size_t capsule_size, enum dma_data_direction dir)
1858c2ecf20Sopenharmony_ci{
1868c2ecf20Sopenharmony_ci	qe->data = kzalloc(capsule_size, GFP_KERNEL);
1878c2ecf20Sopenharmony_ci	if (!qe->data)
1888c2ecf20Sopenharmony_ci		return -ENOMEM;
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir);
1918c2ecf20Sopenharmony_ci	if (ib_dma_mapping_error(ibdev, qe->dma)) {
1928c2ecf20Sopenharmony_ci		kfree(qe->data);
1938c2ecf20Sopenharmony_ci		qe->data = NULL;
1948c2ecf20Sopenharmony_ci		return -ENOMEM;
1958c2ecf20Sopenharmony_ci	}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci	return 0;
1988c2ecf20Sopenharmony_ci}
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_cistatic void nvme_rdma_free_ring(struct ib_device *ibdev,
2018c2ecf20Sopenharmony_ci		struct nvme_rdma_qe *ring, size_t ib_queue_size,
2028c2ecf20Sopenharmony_ci		size_t capsule_size, enum dma_data_direction dir)
2038c2ecf20Sopenharmony_ci{
2048c2ecf20Sopenharmony_ci	int i;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	for (i = 0; i < ib_queue_size; i++)
2078c2ecf20Sopenharmony_ci		nvme_rdma_free_qe(ibdev, &ring[i], capsule_size, dir);
2088c2ecf20Sopenharmony_ci	kfree(ring);
2098c2ecf20Sopenharmony_ci}
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_cistatic struct nvme_rdma_qe *nvme_rdma_alloc_ring(struct ib_device *ibdev,
2128c2ecf20Sopenharmony_ci		size_t ib_queue_size, size_t capsule_size,
2138c2ecf20Sopenharmony_ci		enum dma_data_direction dir)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	struct nvme_rdma_qe *ring;
2168c2ecf20Sopenharmony_ci	int i;
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	ring = kcalloc(ib_queue_size, sizeof(struct nvme_rdma_qe), GFP_KERNEL);
2198c2ecf20Sopenharmony_ci	if (!ring)
2208c2ecf20Sopenharmony_ci		return NULL;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	/*
2238c2ecf20Sopenharmony_ci	 * Bind the CQEs (post recv buffers) DMA mapping to the RDMA queue
2248c2ecf20Sopenharmony_ci	 * lifetime. It's safe, since any chage in the underlying RDMA device
2258c2ecf20Sopenharmony_ci	 * will issue error recovery and queue re-creation.
2268c2ecf20Sopenharmony_ci	 */
2278c2ecf20Sopenharmony_ci	for (i = 0; i < ib_queue_size; i++) {
2288c2ecf20Sopenharmony_ci		if (nvme_rdma_alloc_qe(ibdev, &ring[i], capsule_size, dir))
2298c2ecf20Sopenharmony_ci			goto out_free_ring;
2308c2ecf20Sopenharmony_ci	}
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci	return ring;
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ciout_free_ring:
2358c2ecf20Sopenharmony_ci	nvme_rdma_free_ring(ibdev, ring, i, capsule_size, dir);
2368c2ecf20Sopenharmony_ci	return NULL;
2378c2ecf20Sopenharmony_ci}
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_cistatic void nvme_rdma_qp_event(struct ib_event *event, void *context)
2408c2ecf20Sopenharmony_ci{
2418c2ecf20Sopenharmony_ci	pr_debug("QP event %s (%d)\n",
2428c2ecf20Sopenharmony_ci		 ib_event_msg(event->event), event->event);
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci}
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_cistatic int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue)
2478c2ecf20Sopenharmony_ci{
2488c2ecf20Sopenharmony_ci	int ret;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci	ret = wait_for_completion_interruptible_timeout(&queue->cm_done,
2518c2ecf20Sopenharmony_ci			msecs_to_jiffies(NVME_RDMA_CONNECT_TIMEOUT_MS) + 1);
2528c2ecf20Sopenharmony_ci	if (ret < 0)
2538c2ecf20Sopenharmony_ci		return ret;
2548c2ecf20Sopenharmony_ci	if (ret == 0)
2558c2ecf20Sopenharmony_ci		return -ETIMEDOUT;
2568c2ecf20Sopenharmony_ci	WARN_ON_ONCE(queue->cm_error > 0);
2578c2ecf20Sopenharmony_ci	return queue->cm_error;
2588c2ecf20Sopenharmony_ci}
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_cistatic int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
2618c2ecf20Sopenharmony_ci{
2628c2ecf20Sopenharmony_ci	struct nvme_rdma_device *dev = queue->device;
2638c2ecf20Sopenharmony_ci	struct ib_qp_init_attr init_attr;
2648c2ecf20Sopenharmony_ci	int ret;
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	memset(&init_attr, 0, sizeof(init_attr));
2678c2ecf20Sopenharmony_ci	init_attr.event_handler = nvme_rdma_qp_event;
2688c2ecf20Sopenharmony_ci	/* +1 for drain */
2698c2ecf20Sopenharmony_ci	init_attr.cap.max_send_wr = factor * queue->queue_size + 1;
2708c2ecf20Sopenharmony_ci	/* +1 for drain */
2718c2ecf20Sopenharmony_ci	init_attr.cap.max_recv_wr = queue->queue_size + 1;
2728c2ecf20Sopenharmony_ci	init_attr.cap.max_recv_sge = 1;
2738c2ecf20Sopenharmony_ci	init_attr.cap.max_send_sge = 1 + dev->num_inline_segments;
2748c2ecf20Sopenharmony_ci	init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
2758c2ecf20Sopenharmony_ci	init_attr.qp_type = IB_QPT_RC;
2768c2ecf20Sopenharmony_ci	init_attr.send_cq = queue->ib_cq;
2778c2ecf20Sopenharmony_ci	init_attr.recv_cq = queue->ib_cq;
2788c2ecf20Sopenharmony_ci	if (queue->pi_support)
2798c2ecf20Sopenharmony_ci		init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
2808c2ecf20Sopenharmony_ci	init_attr.qp_context = queue;
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_ci	ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci	queue->qp = queue->cm_id->qp;
2858c2ecf20Sopenharmony_ci	return ret;
2868c2ecf20Sopenharmony_ci}
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_cistatic void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
2898c2ecf20Sopenharmony_ci		struct request *rq, unsigned int hctx_idx)
2908c2ecf20Sopenharmony_ci{
2918c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci	kfree(req->sqe.data);
2948c2ecf20Sopenharmony_ci}
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_cistatic int nvme_rdma_init_request(struct blk_mq_tag_set *set,
2978c2ecf20Sopenharmony_ci		struct request *rq, unsigned int hctx_idx,
2988c2ecf20Sopenharmony_ci		unsigned int numa_node)
2998c2ecf20Sopenharmony_ci{
3008c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = set->driver_data;
3018c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
3028c2ecf20Sopenharmony_ci	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
3038c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	nvme_req(rq)->ctrl = &ctrl->ctrl;
3068c2ecf20Sopenharmony_ci	req->sqe.data = kzalloc(sizeof(struct nvme_command), GFP_KERNEL);
3078c2ecf20Sopenharmony_ci	if (!req->sqe.data)
3088c2ecf20Sopenharmony_ci		return -ENOMEM;
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	/* metadata nvme_rdma_sgl struct is located after command's data SGL */
3118c2ecf20Sopenharmony_ci	if (queue->pi_support)
3128c2ecf20Sopenharmony_ci		req->metadata_sgl = (void *)nvme_req(rq) +
3138c2ecf20Sopenharmony_ci			sizeof(struct nvme_rdma_request) +
3148c2ecf20Sopenharmony_ci			NVME_RDMA_DATA_SGL_SIZE;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	req->queue = queue;
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	return 0;
3198c2ecf20Sopenharmony_ci}
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_cistatic int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
3228c2ecf20Sopenharmony_ci		unsigned int hctx_idx)
3238c2ecf20Sopenharmony_ci{
3248c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = data;
3258c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1];
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	BUG_ON(hctx_idx >= ctrl->ctrl.queue_count);
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci	hctx->driver_data = queue;
3308c2ecf20Sopenharmony_ci	return 0;
3318c2ecf20Sopenharmony_ci}
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_cistatic int nvme_rdma_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
3348c2ecf20Sopenharmony_ci		unsigned int hctx_idx)
3358c2ecf20Sopenharmony_ci{
3368c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = data;
3378c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = &ctrl->queues[0];
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	BUG_ON(hctx_idx != 0);
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	hctx->driver_data = queue;
3428c2ecf20Sopenharmony_ci	return 0;
3438c2ecf20Sopenharmony_ci}
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_cistatic void nvme_rdma_free_dev(struct kref *ref)
3468c2ecf20Sopenharmony_ci{
3478c2ecf20Sopenharmony_ci	struct nvme_rdma_device *ndev =
3488c2ecf20Sopenharmony_ci		container_of(ref, struct nvme_rdma_device, ref);
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	mutex_lock(&device_list_mutex);
3518c2ecf20Sopenharmony_ci	list_del(&ndev->entry);
3528c2ecf20Sopenharmony_ci	mutex_unlock(&device_list_mutex);
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	ib_dealloc_pd(ndev->pd);
3558c2ecf20Sopenharmony_ci	kfree(ndev);
3568c2ecf20Sopenharmony_ci}
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_cistatic void nvme_rdma_dev_put(struct nvme_rdma_device *dev)
3598c2ecf20Sopenharmony_ci{
3608c2ecf20Sopenharmony_ci	kref_put(&dev->ref, nvme_rdma_free_dev);
3618c2ecf20Sopenharmony_ci}
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_cistatic int nvme_rdma_dev_get(struct nvme_rdma_device *dev)
3648c2ecf20Sopenharmony_ci{
3658c2ecf20Sopenharmony_ci	return kref_get_unless_zero(&dev->ref);
3668c2ecf20Sopenharmony_ci}
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_cistatic struct nvme_rdma_device *
3698c2ecf20Sopenharmony_cinvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
3708c2ecf20Sopenharmony_ci{
3718c2ecf20Sopenharmony_ci	struct nvme_rdma_device *ndev;
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	mutex_lock(&device_list_mutex);
3748c2ecf20Sopenharmony_ci	list_for_each_entry(ndev, &device_list, entry) {
3758c2ecf20Sopenharmony_ci		if (ndev->dev->node_guid == cm_id->device->node_guid &&
3768c2ecf20Sopenharmony_ci		    nvme_rdma_dev_get(ndev))
3778c2ecf20Sopenharmony_ci			goto out_unlock;
3788c2ecf20Sopenharmony_ci	}
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
3818c2ecf20Sopenharmony_ci	if (!ndev)
3828c2ecf20Sopenharmony_ci		goto out_err;
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci	ndev->dev = cm_id->device;
3858c2ecf20Sopenharmony_ci	kref_init(&ndev->ref);
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci	ndev->pd = ib_alloc_pd(ndev->dev,
3888c2ecf20Sopenharmony_ci		register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY);
3898c2ecf20Sopenharmony_ci	if (IS_ERR(ndev->pd))
3908c2ecf20Sopenharmony_ci		goto out_free_dev;
3918c2ecf20Sopenharmony_ci
3928c2ecf20Sopenharmony_ci	if (!(ndev->dev->attrs.device_cap_flags &
3938c2ecf20Sopenharmony_ci	      IB_DEVICE_MEM_MGT_EXTENSIONS)) {
3948c2ecf20Sopenharmony_ci		dev_err(&ndev->dev->dev,
3958c2ecf20Sopenharmony_ci			"Memory registrations not supported.\n");
3968c2ecf20Sopenharmony_ci		goto out_free_pd;
3978c2ecf20Sopenharmony_ci	}
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
4008c2ecf20Sopenharmony_ci					ndev->dev->attrs.max_send_sge - 1);
4018c2ecf20Sopenharmony_ci	list_add(&ndev->entry, &device_list);
4028c2ecf20Sopenharmony_ciout_unlock:
4038c2ecf20Sopenharmony_ci	mutex_unlock(&device_list_mutex);
4048c2ecf20Sopenharmony_ci	return ndev;
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ciout_free_pd:
4078c2ecf20Sopenharmony_ci	ib_dealloc_pd(ndev->pd);
4088c2ecf20Sopenharmony_ciout_free_dev:
4098c2ecf20Sopenharmony_ci	kfree(ndev);
4108c2ecf20Sopenharmony_ciout_err:
4118c2ecf20Sopenharmony_ci	mutex_unlock(&device_list_mutex);
4128c2ecf20Sopenharmony_ci	return NULL;
4138c2ecf20Sopenharmony_ci}
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_cistatic void nvme_rdma_free_cq(struct nvme_rdma_queue *queue)
4168c2ecf20Sopenharmony_ci{
4178c2ecf20Sopenharmony_ci	if (nvme_rdma_poll_queue(queue))
4188c2ecf20Sopenharmony_ci		ib_free_cq(queue->ib_cq);
4198c2ecf20Sopenharmony_ci	else
4208c2ecf20Sopenharmony_ci		ib_cq_pool_put(queue->ib_cq, queue->cq_size);
4218c2ecf20Sopenharmony_ci}
4228c2ecf20Sopenharmony_ci
4238c2ecf20Sopenharmony_cistatic void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
4248c2ecf20Sopenharmony_ci{
4258c2ecf20Sopenharmony_ci	struct nvme_rdma_device *dev;
4268c2ecf20Sopenharmony_ci	struct ib_device *ibdev;
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags))
4298c2ecf20Sopenharmony_ci		return;
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	dev = queue->device;
4328c2ecf20Sopenharmony_ci	ibdev = dev->dev;
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci	if (queue->pi_support)
4358c2ecf20Sopenharmony_ci		ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs);
4368c2ecf20Sopenharmony_ci	ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci	/*
4398c2ecf20Sopenharmony_ci	 * The cm_id object might have been destroyed during RDMA connection
4408c2ecf20Sopenharmony_ci	 * establishment error flow to avoid getting other cma events, thus
4418c2ecf20Sopenharmony_ci	 * the destruction of the QP shouldn't use rdma_cm API.
4428c2ecf20Sopenharmony_ci	 */
4438c2ecf20Sopenharmony_ci	ib_destroy_qp(queue->qp);
4448c2ecf20Sopenharmony_ci	nvme_rdma_free_cq(queue);
4458c2ecf20Sopenharmony_ci
4468c2ecf20Sopenharmony_ci	nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
4478c2ecf20Sopenharmony_ci			sizeof(struct nvme_completion), DMA_FROM_DEVICE);
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci	nvme_rdma_dev_put(dev);
4508c2ecf20Sopenharmony_ci}
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_cistatic int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
4538c2ecf20Sopenharmony_ci{
4548c2ecf20Sopenharmony_ci	u32 max_page_list_len;
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci	if (pi_support)
4578c2ecf20Sopenharmony_ci		max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len;
4588c2ecf20Sopenharmony_ci	else
4598c2ecf20Sopenharmony_ci		max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len;
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci	return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
4628c2ecf20Sopenharmony_ci}
4638c2ecf20Sopenharmony_ci
4648c2ecf20Sopenharmony_cistatic int nvme_rdma_create_cq(struct ib_device *ibdev,
4658c2ecf20Sopenharmony_ci		struct nvme_rdma_queue *queue)
4668c2ecf20Sopenharmony_ci{
4678c2ecf20Sopenharmony_ci	int ret, comp_vector, idx = nvme_rdma_queue_idx(queue);
4688c2ecf20Sopenharmony_ci	enum ib_poll_context poll_ctx;
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_ci	/*
4718c2ecf20Sopenharmony_ci	 * Spread I/O queues completion vectors according their queue index.
4728c2ecf20Sopenharmony_ci	 * Admin queues can always go on completion vector 0.
4738c2ecf20Sopenharmony_ci	 */
4748c2ecf20Sopenharmony_ci	comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_ci	/* Polling queues need direct cq polling context */
4778c2ecf20Sopenharmony_ci	if (nvme_rdma_poll_queue(queue)) {
4788c2ecf20Sopenharmony_ci		poll_ctx = IB_POLL_DIRECT;
4798c2ecf20Sopenharmony_ci		queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size,
4808c2ecf20Sopenharmony_ci					   comp_vector, poll_ctx);
4818c2ecf20Sopenharmony_ci	} else {
4828c2ecf20Sopenharmony_ci		poll_ctx = IB_POLL_SOFTIRQ;
4838c2ecf20Sopenharmony_ci		queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size,
4848c2ecf20Sopenharmony_ci					      comp_vector, poll_ctx);
4858c2ecf20Sopenharmony_ci	}
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci	if (IS_ERR(queue->ib_cq)) {
4888c2ecf20Sopenharmony_ci		ret = PTR_ERR(queue->ib_cq);
4898c2ecf20Sopenharmony_ci		return ret;
4908c2ecf20Sopenharmony_ci	}
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci	return 0;
4938c2ecf20Sopenharmony_ci}
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_cistatic int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
4968c2ecf20Sopenharmony_ci{
4978c2ecf20Sopenharmony_ci	struct ib_device *ibdev;
4988c2ecf20Sopenharmony_ci	const int send_wr_factor = 3;			/* MR, SEND, INV */
4998c2ecf20Sopenharmony_ci	const int cq_factor = send_wr_factor + 1;	/* + RECV */
5008c2ecf20Sopenharmony_ci	int ret, pages_per_mr;
5018c2ecf20Sopenharmony_ci
5028c2ecf20Sopenharmony_ci	queue->device = nvme_rdma_find_get_device(queue->cm_id);
5038c2ecf20Sopenharmony_ci	if (!queue->device) {
5048c2ecf20Sopenharmony_ci		dev_err(queue->cm_id->device->dev.parent,
5058c2ecf20Sopenharmony_ci			"no client data found!\n");
5068c2ecf20Sopenharmony_ci		return -ECONNREFUSED;
5078c2ecf20Sopenharmony_ci	}
5088c2ecf20Sopenharmony_ci	ibdev = queue->device->dev;
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci	/* +1 for ib_stop_cq */
5118c2ecf20Sopenharmony_ci	queue->cq_size = cq_factor * queue->queue_size + 1;
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	ret = nvme_rdma_create_cq(ibdev, queue);
5148c2ecf20Sopenharmony_ci	if (ret)
5158c2ecf20Sopenharmony_ci		goto out_put_dev;
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci	ret = nvme_rdma_create_qp(queue, send_wr_factor);
5188c2ecf20Sopenharmony_ci	if (ret)
5198c2ecf20Sopenharmony_ci		goto out_destroy_ib_cq;
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	queue->rsp_ring = nvme_rdma_alloc_ring(ibdev, queue->queue_size,
5228c2ecf20Sopenharmony_ci			sizeof(struct nvme_completion), DMA_FROM_DEVICE);
5238c2ecf20Sopenharmony_ci	if (!queue->rsp_ring) {
5248c2ecf20Sopenharmony_ci		ret = -ENOMEM;
5258c2ecf20Sopenharmony_ci		goto out_destroy_qp;
5268c2ecf20Sopenharmony_ci	}
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	/*
5298c2ecf20Sopenharmony_ci	 * Currently we don't use SG_GAPS MR's so if the first entry is
5308c2ecf20Sopenharmony_ci	 * misaligned we'll end up using two entries for a single data page,
5318c2ecf20Sopenharmony_ci	 * so one additional entry is required.
5328c2ecf20Sopenharmony_ci	 */
5338c2ecf20Sopenharmony_ci	pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1;
5348c2ecf20Sopenharmony_ci	ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
5358c2ecf20Sopenharmony_ci			      queue->queue_size,
5368c2ecf20Sopenharmony_ci			      IB_MR_TYPE_MEM_REG,
5378c2ecf20Sopenharmony_ci			      pages_per_mr, 0);
5388c2ecf20Sopenharmony_ci	if (ret) {
5398c2ecf20Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
5408c2ecf20Sopenharmony_ci			"failed to initialize MR pool sized %d for QID %d\n",
5418c2ecf20Sopenharmony_ci			queue->queue_size, nvme_rdma_queue_idx(queue));
5428c2ecf20Sopenharmony_ci		goto out_destroy_ring;
5438c2ecf20Sopenharmony_ci	}
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	if (queue->pi_support) {
5468c2ecf20Sopenharmony_ci		ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs,
5478c2ecf20Sopenharmony_ci				      queue->queue_size, IB_MR_TYPE_INTEGRITY,
5488c2ecf20Sopenharmony_ci				      pages_per_mr, pages_per_mr);
5498c2ecf20Sopenharmony_ci		if (ret) {
5508c2ecf20Sopenharmony_ci			dev_err(queue->ctrl->ctrl.device,
5518c2ecf20Sopenharmony_ci				"failed to initialize PI MR pool sized %d for QID %d\n",
5528c2ecf20Sopenharmony_ci				queue->queue_size, nvme_rdma_queue_idx(queue));
5538c2ecf20Sopenharmony_ci			goto out_destroy_mr_pool;
5548c2ecf20Sopenharmony_ci		}
5558c2ecf20Sopenharmony_ci	}
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_ci	set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci	return 0;
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ciout_destroy_mr_pool:
5628c2ecf20Sopenharmony_ci	ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
5638c2ecf20Sopenharmony_ciout_destroy_ring:
5648c2ecf20Sopenharmony_ci	nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
5658c2ecf20Sopenharmony_ci			    sizeof(struct nvme_completion), DMA_FROM_DEVICE);
5668c2ecf20Sopenharmony_ciout_destroy_qp:
5678c2ecf20Sopenharmony_ci	rdma_destroy_qp(queue->cm_id);
5688c2ecf20Sopenharmony_ciout_destroy_ib_cq:
5698c2ecf20Sopenharmony_ci	nvme_rdma_free_cq(queue);
5708c2ecf20Sopenharmony_ciout_put_dev:
5718c2ecf20Sopenharmony_ci	nvme_rdma_dev_put(queue->device);
5728c2ecf20Sopenharmony_ci	return ret;
5738c2ecf20Sopenharmony_ci}
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_cistatic int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
5768c2ecf20Sopenharmony_ci		int idx, size_t queue_size)
5778c2ecf20Sopenharmony_ci{
5788c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue;
5798c2ecf20Sopenharmony_ci	struct sockaddr *src_addr = NULL;
5808c2ecf20Sopenharmony_ci	int ret;
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci	queue = &ctrl->queues[idx];
5838c2ecf20Sopenharmony_ci	mutex_init(&queue->queue_lock);
5848c2ecf20Sopenharmony_ci	queue->ctrl = ctrl;
5858c2ecf20Sopenharmony_ci	if (idx && ctrl->ctrl.max_integrity_segments)
5868c2ecf20Sopenharmony_ci		queue->pi_support = true;
5878c2ecf20Sopenharmony_ci	else
5888c2ecf20Sopenharmony_ci		queue->pi_support = false;
5898c2ecf20Sopenharmony_ci	init_completion(&queue->cm_done);
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci	if (idx > 0)
5928c2ecf20Sopenharmony_ci		queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16;
5938c2ecf20Sopenharmony_ci	else
5948c2ecf20Sopenharmony_ci		queue->cmnd_capsule_len = sizeof(struct nvme_command);
5958c2ecf20Sopenharmony_ci
5968c2ecf20Sopenharmony_ci	queue->queue_size = queue_size;
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci	queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
5998c2ecf20Sopenharmony_ci			RDMA_PS_TCP, IB_QPT_RC);
6008c2ecf20Sopenharmony_ci	if (IS_ERR(queue->cm_id)) {
6018c2ecf20Sopenharmony_ci		dev_info(ctrl->ctrl.device,
6028c2ecf20Sopenharmony_ci			"failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id));
6038c2ecf20Sopenharmony_ci		ret = PTR_ERR(queue->cm_id);
6048c2ecf20Sopenharmony_ci		goto out_destroy_mutex;
6058c2ecf20Sopenharmony_ci	}
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_ci	if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
6088c2ecf20Sopenharmony_ci		src_addr = (struct sockaddr *)&ctrl->src_addr;
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_ci	queue->cm_error = -ETIMEDOUT;
6118c2ecf20Sopenharmony_ci	ret = rdma_resolve_addr(queue->cm_id, src_addr,
6128c2ecf20Sopenharmony_ci			(struct sockaddr *)&ctrl->addr,
6138c2ecf20Sopenharmony_ci			NVME_RDMA_CONNECT_TIMEOUT_MS);
6148c2ecf20Sopenharmony_ci	if (ret) {
6158c2ecf20Sopenharmony_ci		dev_info(ctrl->ctrl.device,
6168c2ecf20Sopenharmony_ci			"rdma_resolve_addr failed (%d).\n", ret);
6178c2ecf20Sopenharmony_ci		goto out_destroy_cm_id;
6188c2ecf20Sopenharmony_ci	}
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	ret = nvme_rdma_wait_for_cm(queue);
6218c2ecf20Sopenharmony_ci	if (ret) {
6228c2ecf20Sopenharmony_ci		dev_info(ctrl->ctrl.device,
6238c2ecf20Sopenharmony_ci			"rdma connection establishment failed (%d)\n", ret);
6248c2ecf20Sopenharmony_ci		goto out_destroy_cm_id;
6258c2ecf20Sopenharmony_ci	}
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci	set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags);
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci	return 0;
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ciout_destroy_cm_id:
6328c2ecf20Sopenharmony_ci	rdma_destroy_id(queue->cm_id);
6338c2ecf20Sopenharmony_ci	nvme_rdma_destroy_queue_ib(queue);
6348c2ecf20Sopenharmony_ciout_destroy_mutex:
6358c2ecf20Sopenharmony_ci	mutex_destroy(&queue->queue_lock);
6368c2ecf20Sopenharmony_ci	return ret;
6378c2ecf20Sopenharmony_ci}
6388c2ecf20Sopenharmony_ci
6398c2ecf20Sopenharmony_cistatic void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
6408c2ecf20Sopenharmony_ci{
6418c2ecf20Sopenharmony_ci	rdma_disconnect(queue->cm_id);
6428c2ecf20Sopenharmony_ci	ib_drain_qp(queue->qp);
6438c2ecf20Sopenharmony_ci}
6448c2ecf20Sopenharmony_ci
6458c2ecf20Sopenharmony_cistatic void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
6468c2ecf20Sopenharmony_ci{
6478c2ecf20Sopenharmony_ci	if (!test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
6488c2ecf20Sopenharmony_ci		return;
6498c2ecf20Sopenharmony_ci
6508c2ecf20Sopenharmony_ci	mutex_lock(&queue->queue_lock);
6518c2ecf20Sopenharmony_ci	if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
6528c2ecf20Sopenharmony_ci		__nvme_rdma_stop_queue(queue);
6538c2ecf20Sopenharmony_ci	mutex_unlock(&queue->queue_lock);
6548c2ecf20Sopenharmony_ci}
6558c2ecf20Sopenharmony_ci
6568c2ecf20Sopenharmony_cistatic void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
6578c2ecf20Sopenharmony_ci{
6588c2ecf20Sopenharmony_ci	if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
6598c2ecf20Sopenharmony_ci		return;
6608c2ecf20Sopenharmony_ci
6618c2ecf20Sopenharmony_ci	rdma_destroy_id(queue->cm_id);
6628c2ecf20Sopenharmony_ci	nvme_rdma_destroy_queue_ib(queue);
6638c2ecf20Sopenharmony_ci	mutex_destroy(&queue->queue_lock);
6648c2ecf20Sopenharmony_ci}
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_cistatic void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
6678c2ecf20Sopenharmony_ci{
6688c2ecf20Sopenharmony_ci	int i;
6698c2ecf20Sopenharmony_ci
6708c2ecf20Sopenharmony_ci	for (i = 1; i < ctrl->ctrl.queue_count; i++)
6718c2ecf20Sopenharmony_ci		nvme_rdma_free_queue(&ctrl->queues[i]);
6728c2ecf20Sopenharmony_ci}
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_cistatic void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
6758c2ecf20Sopenharmony_ci{
6768c2ecf20Sopenharmony_ci	int i;
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_ci	for (i = 1; i < ctrl->ctrl.queue_count; i++)
6798c2ecf20Sopenharmony_ci		nvme_rdma_stop_queue(&ctrl->queues[i]);
6808c2ecf20Sopenharmony_ci}
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_cistatic int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
6838c2ecf20Sopenharmony_ci{
6848c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = &ctrl->queues[idx];
6858c2ecf20Sopenharmony_ci	bool poll = nvme_rdma_poll_queue(queue);
6868c2ecf20Sopenharmony_ci	int ret;
6878c2ecf20Sopenharmony_ci
6888c2ecf20Sopenharmony_ci	if (idx)
6898c2ecf20Sopenharmony_ci		ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, poll);
6908c2ecf20Sopenharmony_ci	else
6918c2ecf20Sopenharmony_ci		ret = nvmf_connect_admin_queue(&ctrl->ctrl);
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci	if (!ret) {
6948c2ecf20Sopenharmony_ci		set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
6958c2ecf20Sopenharmony_ci	} else {
6968c2ecf20Sopenharmony_ci		if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
6978c2ecf20Sopenharmony_ci			__nvme_rdma_stop_queue(queue);
6988c2ecf20Sopenharmony_ci		dev_info(ctrl->ctrl.device,
6998c2ecf20Sopenharmony_ci			"failed to connect queue: %d ret=%d\n", idx, ret);
7008c2ecf20Sopenharmony_ci	}
7018c2ecf20Sopenharmony_ci	return ret;
7028c2ecf20Sopenharmony_ci}
7038c2ecf20Sopenharmony_ci
7048c2ecf20Sopenharmony_cistatic int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl)
7058c2ecf20Sopenharmony_ci{
7068c2ecf20Sopenharmony_ci	int i, ret = 0;
7078c2ecf20Sopenharmony_ci
7088c2ecf20Sopenharmony_ci	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
7098c2ecf20Sopenharmony_ci		ret = nvme_rdma_start_queue(ctrl, i);
7108c2ecf20Sopenharmony_ci		if (ret)
7118c2ecf20Sopenharmony_ci			goto out_stop_queues;
7128c2ecf20Sopenharmony_ci	}
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_ci	return 0;
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ciout_stop_queues:
7178c2ecf20Sopenharmony_ci	for (i--; i >= 1; i--)
7188c2ecf20Sopenharmony_ci		nvme_rdma_stop_queue(&ctrl->queues[i]);
7198c2ecf20Sopenharmony_ci	return ret;
7208c2ecf20Sopenharmony_ci}
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_cistatic int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
7238c2ecf20Sopenharmony_ci{
7248c2ecf20Sopenharmony_ci	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
7258c2ecf20Sopenharmony_ci	struct ib_device *ibdev = ctrl->device->dev;
7268c2ecf20Sopenharmony_ci	unsigned int nr_io_queues, nr_default_queues;
7278c2ecf20Sopenharmony_ci	unsigned int nr_read_queues, nr_poll_queues;
7288c2ecf20Sopenharmony_ci	int i, ret;
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_ci	nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
7318c2ecf20Sopenharmony_ci				min(opts->nr_io_queues, num_online_cpus()));
7328c2ecf20Sopenharmony_ci	nr_default_queues =  min_t(unsigned int, ibdev->num_comp_vectors,
7338c2ecf20Sopenharmony_ci				min(opts->nr_write_queues, num_online_cpus()));
7348c2ecf20Sopenharmony_ci	nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus());
7358c2ecf20Sopenharmony_ci	nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues;
7368c2ecf20Sopenharmony_ci
7378c2ecf20Sopenharmony_ci	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
7388c2ecf20Sopenharmony_ci	if (ret)
7398c2ecf20Sopenharmony_ci		return ret;
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_ci	if (nr_io_queues == 0) {
7428c2ecf20Sopenharmony_ci		dev_err(ctrl->ctrl.device,
7438c2ecf20Sopenharmony_ci			"unable to set any I/O queues\n");
7448c2ecf20Sopenharmony_ci		return -ENOMEM;
7458c2ecf20Sopenharmony_ci	}
7468c2ecf20Sopenharmony_ci
7478c2ecf20Sopenharmony_ci	ctrl->ctrl.queue_count = nr_io_queues + 1;
7488c2ecf20Sopenharmony_ci	dev_info(ctrl->ctrl.device,
7498c2ecf20Sopenharmony_ci		"creating %d I/O queues.\n", nr_io_queues);
7508c2ecf20Sopenharmony_ci
7518c2ecf20Sopenharmony_ci	if (opts->nr_write_queues && nr_read_queues < nr_io_queues) {
7528c2ecf20Sopenharmony_ci		/*
7538c2ecf20Sopenharmony_ci		 * separate read/write queues
7548c2ecf20Sopenharmony_ci		 * hand out dedicated default queues only after we have
7558c2ecf20Sopenharmony_ci		 * sufficient read queues.
7568c2ecf20Sopenharmony_ci		 */
7578c2ecf20Sopenharmony_ci		ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues;
7588c2ecf20Sopenharmony_ci		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
7598c2ecf20Sopenharmony_ci		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
7608c2ecf20Sopenharmony_ci			min(nr_default_queues, nr_io_queues);
7618c2ecf20Sopenharmony_ci		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
7628c2ecf20Sopenharmony_ci	} else {
7638c2ecf20Sopenharmony_ci		/*
7648c2ecf20Sopenharmony_ci		 * shared read/write queues
7658c2ecf20Sopenharmony_ci		 * either no write queues were requested, or we don't have
7668c2ecf20Sopenharmony_ci		 * sufficient queue count to have dedicated default queues.
7678c2ecf20Sopenharmony_ci		 */
7688c2ecf20Sopenharmony_ci		ctrl->io_queues[HCTX_TYPE_DEFAULT] =
7698c2ecf20Sopenharmony_ci			min(nr_read_queues, nr_io_queues);
7708c2ecf20Sopenharmony_ci		nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
7718c2ecf20Sopenharmony_ci	}
7728c2ecf20Sopenharmony_ci
7738c2ecf20Sopenharmony_ci	if (opts->nr_poll_queues && nr_io_queues) {
7748c2ecf20Sopenharmony_ci		/* map dedicated poll queues only if we have queues left */
7758c2ecf20Sopenharmony_ci		ctrl->io_queues[HCTX_TYPE_POLL] =
7768c2ecf20Sopenharmony_ci			min(nr_poll_queues, nr_io_queues);
7778c2ecf20Sopenharmony_ci	}
7788c2ecf20Sopenharmony_ci
7798c2ecf20Sopenharmony_ci	for (i = 1; i < ctrl->ctrl.queue_count; i++) {
7808c2ecf20Sopenharmony_ci		ret = nvme_rdma_alloc_queue(ctrl, i,
7818c2ecf20Sopenharmony_ci				ctrl->ctrl.sqsize + 1);
7828c2ecf20Sopenharmony_ci		if (ret)
7838c2ecf20Sopenharmony_ci			goto out_free_queues;
7848c2ecf20Sopenharmony_ci	}
7858c2ecf20Sopenharmony_ci
7868c2ecf20Sopenharmony_ci	return 0;
7878c2ecf20Sopenharmony_ci
7888c2ecf20Sopenharmony_ciout_free_queues:
7898c2ecf20Sopenharmony_ci	for (i--; i >= 1; i--)
7908c2ecf20Sopenharmony_ci		nvme_rdma_free_queue(&ctrl->queues[i]);
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci	return ret;
7938c2ecf20Sopenharmony_ci}
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_cistatic struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
7968c2ecf20Sopenharmony_ci		bool admin)
7978c2ecf20Sopenharmony_ci{
7988c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
7998c2ecf20Sopenharmony_ci	struct blk_mq_tag_set *set;
8008c2ecf20Sopenharmony_ci	int ret;
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ci	if (admin) {
8038c2ecf20Sopenharmony_ci		set = &ctrl->admin_tag_set;
8048c2ecf20Sopenharmony_ci		memset(set, 0, sizeof(*set));
8058c2ecf20Sopenharmony_ci		set->ops = &nvme_rdma_admin_mq_ops;
8068c2ecf20Sopenharmony_ci		set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
8078c2ecf20Sopenharmony_ci		set->reserved_tags = 2; /* connect + keep-alive */
8088c2ecf20Sopenharmony_ci		set->numa_node = nctrl->numa_node;
8098c2ecf20Sopenharmony_ci		set->cmd_size = sizeof(struct nvme_rdma_request) +
8108c2ecf20Sopenharmony_ci				NVME_RDMA_DATA_SGL_SIZE;
8118c2ecf20Sopenharmony_ci		set->driver_data = ctrl;
8128c2ecf20Sopenharmony_ci		set->nr_hw_queues = 1;
8138c2ecf20Sopenharmony_ci		set->timeout = ADMIN_TIMEOUT;
8148c2ecf20Sopenharmony_ci		set->flags = BLK_MQ_F_NO_SCHED;
8158c2ecf20Sopenharmony_ci	} else {
8168c2ecf20Sopenharmony_ci		set = &ctrl->tag_set;
8178c2ecf20Sopenharmony_ci		memset(set, 0, sizeof(*set));
8188c2ecf20Sopenharmony_ci		set->ops = &nvme_rdma_mq_ops;
8198c2ecf20Sopenharmony_ci		set->queue_depth = nctrl->sqsize + 1;
8208c2ecf20Sopenharmony_ci		set->reserved_tags = 1; /* fabric connect */
8218c2ecf20Sopenharmony_ci		set->numa_node = nctrl->numa_node;
8228c2ecf20Sopenharmony_ci		set->flags = BLK_MQ_F_SHOULD_MERGE;
8238c2ecf20Sopenharmony_ci		set->cmd_size = sizeof(struct nvme_rdma_request) +
8248c2ecf20Sopenharmony_ci				NVME_RDMA_DATA_SGL_SIZE;
8258c2ecf20Sopenharmony_ci		if (nctrl->max_integrity_segments)
8268c2ecf20Sopenharmony_ci			set->cmd_size += sizeof(struct nvme_rdma_sgl) +
8278c2ecf20Sopenharmony_ci					 NVME_RDMA_METADATA_SGL_SIZE;
8288c2ecf20Sopenharmony_ci		set->driver_data = ctrl;
8298c2ecf20Sopenharmony_ci		set->nr_hw_queues = nctrl->queue_count - 1;
8308c2ecf20Sopenharmony_ci		set->timeout = NVME_IO_TIMEOUT;
8318c2ecf20Sopenharmony_ci		set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
8328c2ecf20Sopenharmony_ci	}
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_ci	ret = blk_mq_alloc_tag_set(set);
8358c2ecf20Sopenharmony_ci	if (ret)
8368c2ecf20Sopenharmony_ci		return ERR_PTR(ret);
8378c2ecf20Sopenharmony_ci
8388c2ecf20Sopenharmony_ci	return set;
8398c2ecf20Sopenharmony_ci}
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_cistatic void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
8428c2ecf20Sopenharmony_ci		bool remove)
8438c2ecf20Sopenharmony_ci{
8448c2ecf20Sopenharmony_ci	if (remove) {
8458c2ecf20Sopenharmony_ci		blk_cleanup_queue(ctrl->ctrl.admin_q);
8468c2ecf20Sopenharmony_ci		blk_cleanup_queue(ctrl->ctrl.fabrics_q);
8478c2ecf20Sopenharmony_ci		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
8488c2ecf20Sopenharmony_ci	}
8498c2ecf20Sopenharmony_ci	if (ctrl->async_event_sqe.data) {
8508c2ecf20Sopenharmony_ci		cancel_work_sync(&ctrl->ctrl.async_event_work);
8518c2ecf20Sopenharmony_ci		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
8528c2ecf20Sopenharmony_ci				sizeof(struct nvme_command), DMA_TO_DEVICE);
8538c2ecf20Sopenharmony_ci		ctrl->async_event_sqe.data = NULL;
8548c2ecf20Sopenharmony_ci	}
8558c2ecf20Sopenharmony_ci	nvme_rdma_free_queue(&ctrl->queues[0]);
8568c2ecf20Sopenharmony_ci}
8578c2ecf20Sopenharmony_ci
8588c2ecf20Sopenharmony_cistatic int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
8598c2ecf20Sopenharmony_ci		bool new)
8608c2ecf20Sopenharmony_ci{
8618c2ecf20Sopenharmony_ci	bool pi_capable = false;
8628c2ecf20Sopenharmony_ci	int error;
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_ci	error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
8658c2ecf20Sopenharmony_ci	if (error)
8668c2ecf20Sopenharmony_ci		return error;
8678c2ecf20Sopenharmony_ci
8688c2ecf20Sopenharmony_ci	ctrl->device = ctrl->queues[0].device;
8698c2ecf20Sopenharmony_ci	ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev);
8708c2ecf20Sopenharmony_ci
8718c2ecf20Sopenharmony_ci	/* T10-PI support */
8728c2ecf20Sopenharmony_ci	if (ctrl->device->dev->attrs.device_cap_flags &
8738c2ecf20Sopenharmony_ci	    IB_DEVICE_INTEGRITY_HANDOVER)
8748c2ecf20Sopenharmony_ci		pi_capable = true;
8758c2ecf20Sopenharmony_ci
8768c2ecf20Sopenharmony_ci	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
8778c2ecf20Sopenharmony_ci							pi_capable);
8788c2ecf20Sopenharmony_ci
8798c2ecf20Sopenharmony_ci	/*
8808c2ecf20Sopenharmony_ci	 * Bind the async event SQE DMA mapping to the admin queue lifetime.
8818c2ecf20Sopenharmony_ci	 * It's safe, since any chage in the underlying RDMA device will issue
8828c2ecf20Sopenharmony_ci	 * error recovery and queue re-creation.
8838c2ecf20Sopenharmony_ci	 */
8848c2ecf20Sopenharmony_ci	error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
8858c2ecf20Sopenharmony_ci			sizeof(struct nvme_command), DMA_TO_DEVICE);
8868c2ecf20Sopenharmony_ci	if (error)
8878c2ecf20Sopenharmony_ci		goto out_free_queue;
8888c2ecf20Sopenharmony_ci
8898c2ecf20Sopenharmony_ci	if (new) {
8908c2ecf20Sopenharmony_ci		ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
8918c2ecf20Sopenharmony_ci		if (IS_ERR(ctrl->ctrl.admin_tagset)) {
8928c2ecf20Sopenharmony_ci			error = PTR_ERR(ctrl->ctrl.admin_tagset);
8938c2ecf20Sopenharmony_ci			goto out_free_async_qe;
8948c2ecf20Sopenharmony_ci		}
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci		ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set);
8978c2ecf20Sopenharmony_ci		if (IS_ERR(ctrl->ctrl.fabrics_q)) {
8988c2ecf20Sopenharmony_ci			error = PTR_ERR(ctrl->ctrl.fabrics_q);
8998c2ecf20Sopenharmony_ci			goto out_free_tagset;
9008c2ecf20Sopenharmony_ci		}
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci		ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
9038c2ecf20Sopenharmony_ci		if (IS_ERR(ctrl->ctrl.admin_q)) {
9048c2ecf20Sopenharmony_ci			error = PTR_ERR(ctrl->ctrl.admin_q);
9058c2ecf20Sopenharmony_ci			goto out_cleanup_fabrics_q;
9068c2ecf20Sopenharmony_ci		}
9078c2ecf20Sopenharmony_ci	}
9088c2ecf20Sopenharmony_ci
9098c2ecf20Sopenharmony_ci	error = nvme_rdma_start_queue(ctrl, 0);
9108c2ecf20Sopenharmony_ci	if (error)
9118c2ecf20Sopenharmony_ci		goto out_cleanup_queue;
9128c2ecf20Sopenharmony_ci
9138c2ecf20Sopenharmony_ci	error = nvme_enable_ctrl(&ctrl->ctrl);
9148c2ecf20Sopenharmony_ci	if (error)
9158c2ecf20Sopenharmony_ci		goto out_stop_queue;
9168c2ecf20Sopenharmony_ci
9178c2ecf20Sopenharmony_ci	ctrl->ctrl.max_segments = ctrl->max_fr_pages;
9188c2ecf20Sopenharmony_ci	ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
9198c2ecf20Sopenharmony_ci	if (pi_capable)
9208c2ecf20Sopenharmony_ci		ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
9218c2ecf20Sopenharmony_ci	else
9228c2ecf20Sopenharmony_ci		ctrl->ctrl.max_integrity_segments = 0;
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_ci	nvme_start_admin_queue(&ctrl->ctrl);
9258c2ecf20Sopenharmony_ci
9268c2ecf20Sopenharmony_ci	error = nvme_init_identify(&ctrl->ctrl);
9278c2ecf20Sopenharmony_ci	if (error)
9288c2ecf20Sopenharmony_ci		goto out_quiesce_queue;
9298c2ecf20Sopenharmony_ci
9308c2ecf20Sopenharmony_ci	return 0;
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ciout_quiesce_queue:
9338c2ecf20Sopenharmony_ci	nvme_stop_admin_queue(&ctrl->ctrl);
9348c2ecf20Sopenharmony_ci	blk_sync_queue(ctrl->ctrl.admin_q);
9358c2ecf20Sopenharmony_ciout_stop_queue:
9368c2ecf20Sopenharmony_ci	nvme_rdma_stop_queue(&ctrl->queues[0]);
9378c2ecf20Sopenharmony_ci	nvme_cancel_admin_tagset(&ctrl->ctrl);
9388c2ecf20Sopenharmony_ciout_cleanup_queue:
9398c2ecf20Sopenharmony_ci	if (new)
9408c2ecf20Sopenharmony_ci		blk_cleanup_queue(ctrl->ctrl.admin_q);
9418c2ecf20Sopenharmony_ciout_cleanup_fabrics_q:
9428c2ecf20Sopenharmony_ci	if (new)
9438c2ecf20Sopenharmony_ci		blk_cleanup_queue(ctrl->ctrl.fabrics_q);
9448c2ecf20Sopenharmony_ciout_free_tagset:
9458c2ecf20Sopenharmony_ci	if (new)
9468c2ecf20Sopenharmony_ci		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
9478c2ecf20Sopenharmony_ciout_free_async_qe:
9488c2ecf20Sopenharmony_ci	if (ctrl->async_event_sqe.data) {
9498c2ecf20Sopenharmony_ci		nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
9508c2ecf20Sopenharmony_ci			sizeof(struct nvme_command), DMA_TO_DEVICE);
9518c2ecf20Sopenharmony_ci		ctrl->async_event_sqe.data = NULL;
9528c2ecf20Sopenharmony_ci	}
9538c2ecf20Sopenharmony_ciout_free_queue:
9548c2ecf20Sopenharmony_ci	nvme_rdma_free_queue(&ctrl->queues[0]);
9558c2ecf20Sopenharmony_ci	return error;
9568c2ecf20Sopenharmony_ci}
9578c2ecf20Sopenharmony_ci
9588c2ecf20Sopenharmony_cistatic void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
9598c2ecf20Sopenharmony_ci		bool remove)
9608c2ecf20Sopenharmony_ci{
9618c2ecf20Sopenharmony_ci	if (remove) {
9628c2ecf20Sopenharmony_ci		blk_cleanup_queue(ctrl->ctrl.connect_q);
9638c2ecf20Sopenharmony_ci		blk_mq_free_tag_set(ctrl->ctrl.tagset);
9648c2ecf20Sopenharmony_ci	}
9658c2ecf20Sopenharmony_ci	nvme_rdma_free_io_queues(ctrl);
9668c2ecf20Sopenharmony_ci}
9678c2ecf20Sopenharmony_ci
9688c2ecf20Sopenharmony_cistatic int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
9698c2ecf20Sopenharmony_ci{
9708c2ecf20Sopenharmony_ci	int ret;
9718c2ecf20Sopenharmony_ci
9728c2ecf20Sopenharmony_ci	ret = nvme_rdma_alloc_io_queues(ctrl);
9738c2ecf20Sopenharmony_ci	if (ret)
9748c2ecf20Sopenharmony_ci		return ret;
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_ci	if (new) {
9778c2ecf20Sopenharmony_ci		ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false);
9788c2ecf20Sopenharmony_ci		if (IS_ERR(ctrl->ctrl.tagset)) {
9798c2ecf20Sopenharmony_ci			ret = PTR_ERR(ctrl->ctrl.tagset);
9808c2ecf20Sopenharmony_ci			goto out_free_io_queues;
9818c2ecf20Sopenharmony_ci		}
9828c2ecf20Sopenharmony_ci
9838c2ecf20Sopenharmony_ci		ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
9848c2ecf20Sopenharmony_ci		if (IS_ERR(ctrl->ctrl.connect_q)) {
9858c2ecf20Sopenharmony_ci			ret = PTR_ERR(ctrl->ctrl.connect_q);
9868c2ecf20Sopenharmony_ci			goto out_free_tag_set;
9878c2ecf20Sopenharmony_ci		}
9888c2ecf20Sopenharmony_ci	}
9898c2ecf20Sopenharmony_ci
9908c2ecf20Sopenharmony_ci	ret = nvme_rdma_start_io_queues(ctrl);
9918c2ecf20Sopenharmony_ci	if (ret)
9928c2ecf20Sopenharmony_ci		goto out_cleanup_connect_q;
9938c2ecf20Sopenharmony_ci
9948c2ecf20Sopenharmony_ci	if (!new) {
9958c2ecf20Sopenharmony_ci		nvme_start_freeze(&ctrl->ctrl);
9968c2ecf20Sopenharmony_ci		nvme_start_queues(&ctrl->ctrl);
9978c2ecf20Sopenharmony_ci		if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
9988c2ecf20Sopenharmony_ci			/*
9998c2ecf20Sopenharmony_ci			 * If we timed out waiting for freeze we are likely to
10008c2ecf20Sopenharmony_ci			 * be stuck.  Fail the controller initialization just
10018c2ecf20Sopenharmony_ci			 * to be safe.
10028c2ecf20Sopenharmony_ci			 */
10038c2ecf20Sopenharmony_ci			ret = -ENODEV;
10048c2ecf20Sopenharmony_ci			nvme_unfreeze(&ctrl->ctrl);
10058c2ecf20Sopenharmony_ci			goto out_wait_freeze_timed_out;
10068c2ecf20Sopenharmony_ci		}
10078c2ecf20Sopenharmony_ci		blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
10088c2ecf20Sopenharmony_ci			ctrl->ctrl.queue_count - 1);
10098c2ecf20Sopenharmony_ci		nvme_unfreeze(&ctrl->ctrl);
10108c2ecf20Sopenharmony_ci	}
10118c2ecf20Sopenharmony_ci
10128c2ecf20Sopenharmony_ci	return 0;
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_ciout_wait_freeze_timed_out:
10158c2ecf20Sopenharmony_ci	nvme_stop_queues(&ctrl->ctrl);
10168c2ecf20Sopenharmony_ci	nvme_sync_io_queues(&ctrl->ctrl);
10178c2ecf20Sopenharmony_ci	nvme_rdma_stop_io_queues(ctrl);
10188c2ecf20Sopenharmony_ciout_cleanup_connect_q:
10198c2ecf20Sopenharmony_ci	nvme_cancel_tagset(&ctrl->ctrl);
10208c2ecf20Sopenharmony_ci	if (new)
10218c2ecf20Sopenharmony_ci		blk_cleanup_queue(ctrl->ctrl.connect_q);
10228c2ecf20Sopenharmony_ciout_free_tag_set:
10238c2ecf20Sopenharmony_ci	if (new)
10248c2ecf20Sopenharmony_ci		blk_mq_free_tag_set(ctrl->ctrl.tagset);
10258c2ecf20Sopenharmony_ciout_free_io_queues:
10268c2ecf20Sopenharmony_ci	nvme_rdma_free_io_queues(ctrl);
10278c2ecf20Sopenharmony_ci	return ret;
10288c2ecf20Sopenharmony_ci}
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_cistatic void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
10318c2ecf20Sopenharmony_ci		bool remove)
10328c2ecf20Sopenharmony_ci{
10338c2ecf20Sopenharmony_ci	nvme_stop_admin_queue(&ctrl->ctrl);
10348c2ecf20Sopenharmony_ci	blk_sync_queue(ctrl->ctrl.admin_q);
10358c2ecf20Sopenharmony_ci	nvme_rdma_stop_queue(&ctrl->queues[0]);
10368c2ecf20Sopenharmony_ci	if (ctrl->ctrl.admin_tagset) {
10378c2ecf20Sopenharmony_ci		blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset,
10388c2ecf20Sopenharmony_ci			nvme_cancel_request, &ctrl->ctrl);
10398c2ecf20Sopenharmony_ci		blk_mq_tagset_wait_completed_request(ctrl->ctrl.admin_tagset);
10408c2ecf20Sopenharmony_ci	}
10418c2ecf20Sopenharmony_ci	if (remove)
10428c2ecf20Sopenharmony_ci		nvme_start_admin_queue(&ctrl->ctrl);
10438c2ecf20Sopenharmony_ci	nvme_rdma_destroy_admin_queue(ctrl, remove);
10448c2ecf20Sopenharmony_ci}
10458c2ecf20Sopenharmony_ci
10468c2ecf20Sopenharmony_cistatic void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
10478c2ecf20Sopenharmony_ci		bool remove)
10488c2ecf20Sopenharmony_ci{
10498c2ecf20Sopenharmony_ci	if (ctrl->ctrl.queue_count > 1) {
10508c2ecf20Sopenharmony_ci		nvme_stop_queues(&ctrl->ctrl);
10518c2ecf20Sopenharmony_ci		nvme_sync_io_queues(&ctrl->ctrl);
10528c2ecf20Sopenharmony_ci		nvme_rdma_stop_io_queues(ctrl);
10538c2ecf20Sopenharmony_ci		if (ctrl->ctrl.tagset) {
10548c2ecf20Sopenharmony_ci			blk_mq_tagset_busy_iter(ctrl->ctrl.tagset,
10558c2ecf20Sopenharmony_ci				nvme_cancel_request, &ctrl->ctrl);
10568c2ecf20Sopenharmony_ci			blk_mq_tagset_wait_completed_request(ctrl->ctrl.tagset);
10578c2ecf20Sopenharmony_ci		}
10588c2ecf20Sopenharmony_ci		if (remove)
10598c2ecf20Sopenharmony_ci			nvme_start_queues(&ctrl->ctrl);
10608c2ecf20Sopenharmony_ci		nvme_rdma_destroy_io_queues(ctrl, remove);
10618c2ecf20Sopenharmony_ci	}
10628c2ecf20Sopenharmony_ci}
10638c2ecf20Sopenharmony_ci
10648c2ecf20Sopenharmony_cistatic void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
10658c2ecf20Sopenharmony_ci{
10668c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
10678c2ecf20Sopenharmony_ci
10688c2ecf20Sopenharmony_ci	cancel_work_sync(&ctrl->err_work);
10698c2ecf20Sopenharmony_ci	cancel_delayed_work_sync(&ctrl->reconnect_work);
10708c2ecf20Sopenharmony_ci}
10718c2ecf20Sopenharmony_ci
10728c2ecf20Sopenharmony_cistatic void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
10738c2ecf20Sopenharmony_ci{
10748c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
10758c2ecf20Sopenharmony_ci
10768c2ecf20Sopenharmony_ci	if (list_empty(&ctrl->list))
10778c2ecf20Sopenharmony_ci		goto free_ctrl;
10788c2ecf20Sopenharmony_ci
10798c2ecf20Sopenharmony_ci	mutex_lock(&nvme_rdma_ctrl_mutex);
10808c2ecf20Sopenharmony_ci	list_del(&ctrl->list);
10818c2ecf20Sopenharmony_ci	mutex_unlock(&nvme_rdma_ctrl_mutex);
10828c2ecf20Sopenharmony_ci
10838c2ecf20Sopenharmony_ci	nvmf_free_options(nctrl->opts);
10848c2ecf20Sopenharmony_cifree_ctrl:
10858c2ecf20Sopenharmony_ci	kfree(ctrl->queues);
10868c2ecf20Sopenharmony_ci	kfree(ctrl);
10878c2ecf20Sopenharmony_ci}
10888c2ecf20Sopenharmony_ci
10898c2ecf20Sopenharmony_cistatic void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
10908c2ecf20Sopenharmony_ci{
10918c2ecf20Sopenharmony_ci	/* If we are resetting/deleting then do nothing */
10928c2ecf20Sopenharmony_ci	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
10938c2ecf20Sopenharmony_ci		WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
10948c2ecf20Sopenharmony_ci			ctrl->ctrl.state == NVME_CTRL_LIVE);
10958c2ecf20Sopenharmony_ci		return;
10968c2ecf20Sopenharmony_ci	}
10978c2ecf20Sopenharmony_ci
10988c2ecf20Sopenharmony_ci	if (nvmf_should_reconnect(&ctrl->ctrl)) {
10998c2ecf20Sopenharmony_ci		dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
11008c2ecf20Sopenharmony_ci			ctrl->ctrl.opts->reconnect_delay);
11018c2ecf20Sopenharmony_ci		queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
11028c2ecf20Sopenharmony_ci				ctrl->ctrl.opts->reconnect_delay * HZ);
11038c2ecf20Sopenharmony_ci	} else {
11048c2ecf20Sopenharmony_ci		nvme_delete_ctrl(&ctrl->ctrl);
11058c2ecf20Sopenharmony_ci	}
11068c2ecf20Sopenharmony_ci}
11078c2ecf20Sopenharmony_ci
11088c2ecf20Sopenharmony_cistatic int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
11098c2ecf20Sopenharmony_ci{
11108c2ecf20Sopenharmony_ci	int ret = -EINVAL;
11118c2ecf20Sopenharmony_ci	bool changed;
11128c2ecf20Sopenharmony_ci
11138c2ecf20Sopenharmony_ci	ret = nvme_rdma_configure_admin_queue(ctrl, new);
11148c2ecf20Sopenharmony_ci	if (ret)
11158c2ecf20Sopenharmony_ci		return ret;
11168c2ecf20Sopenharmony_ci
11178c2ecf20Sopenharmony_ci	if (ctrl->ctrl.icdoff) {
11188c2ecf20Sopenharmony_ci		ret = -EOPNOTSUPP;
11198c2ecf20Sopenharmony_ci		dev_err(ctrl->ctrl.device, "icdoff is not supported!\n");
11208c2ecf20Sopenharmony_ci		goto destroy_admin;
11218c2ecf20Sopenharmony_ci	}
11228c2ecf20Sopenharmony_ci
11238c2ecf20Sopenharmony_ci	if (!(ctrl->ctrl.sgls & (1 << 2))) {
11248c2ecf20Sopenharmony_ci		ret = -EOPNOTSUPP;
11258c2ecf20Sopenharmony_ci		dev_err(ctrl->ctrl.device,
11268c2ecf20Sopenharmony_ci			"Mandatory keyed sgls are not supported!\n");
11278c2ecf20Sopenharmony_ci		goto destroy_admin;
11288c2ecf20Sopenharmony_ci	}
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_ci	if (ctrl->ctrl.opts->queue_size > ctrl->ctrl.sqsize + 1) {
11318c2ecf20Sopenharmony_ci		dev_warn(ctrl->ctrl.device,
11328c2ecf20Sopenharmony_ci			"queue_size %zu > ctrl sqsize %u, clamping down\n",
11338c2ecf20Sopenharmony_ci			ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
11348c2ecf20Sopenharmony_ci	}
11358c2ecf20Sopenharmony_ci
11368c2ecf20Sopenharmony_ci	if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
11378c2ecf20Sopenharmony_ci		dev_warn(ctrl->ctrl.device,
11388c2ecf20Sopenharmony_ci			"sqsize %u > ctrl maxcmd %u, clamping down\n",
11398c2ecf20Sopenharmony_ci			ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
11408c2ecf20Sopenharmony_ci		ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
11418c2ecf20Sopenharmony_ci	}
11428c2ecf20Sopenharmony_ci
11438c2ecf20Sopenharmony_ci	if (ctrl->ctrl.sgls & (1 << 20))
11448c2ecf20Sopenharmony_ci		ctrl->use_inline_data = true;
11458c2ecf20Sopenharmony_ci
11468c2ecf20Sopenharmony_ci	if (ctrl->ctrl.queue_count > 1) {
11478c2ecf20Sopenharmony_ci		ret = nvme_rdma_configure_io_queues(ctrl, new);
11488c2ecf20Sopenharmony_ci		if (ret)
11498c2ecf20Sopenharmony_ci			goto destroy_admin;
11508c2ecf20Sopenharmony_ci	}
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_ci	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
11538c2ecf20Sopenharmony_ci	if (!changed) {
11548c2ecf20Sopenharmony_ci		/*
11558c2ecf20Sopenharmony_ci		 * state change failure is ok if we started ctrl delete,
11568c2ecf20Sopenharmony_ci		 * unless we're during creation of a new controller to
11578c2ecf20Sopenharmony_ci		 * avoid races with teardown flow.
11588c2ecf20Sopenharmony_ci		 */
11598c2ecf20Sopenharmony_ci		WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
11608c2ecf20Sopenharmony_ci			     ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
11618c2ecf20Sopenharmony_ci		WARN_ON_ONCE(new);
11628c2ecf20Sopenharmony_ci		ret = -EINVAL;
11638c2ecf20Sopenharmony_ci		goto destroy_io;
11648c2ecf20Sopenharmony_ci	}
11658c2ecf20Sopenharmony_ci
11668c2ecf20Sopenharmony_ci	nvme_start_ctrl(&ctrl->ctrl);
11678c2ecf20Sopenharmony_ci	return 0;
11688c2ecf20Sopenharmony_ci
11698c2ecf20Sopenharmony_cidestroy_io:
11708c2ecf20Sopenharmony_ci	if (ctrl->ctrl.queue_count > 1) {
11718c2ecf20Sopenharmony_ci		nvme_stop_queues(&ctrl->ctrl);
11728c2ecf20Sopenharmony_ci		nvme_sync_io_queues(&ctrl->ctrl);
11738c2ecf20Sopenharmony_ci		nvme_rdma_stop_io_queues(ctrl);
11748c2ecf20Sopenharmony_ci		nvme_cancel_tagset(&ctrl->ctrl);
11758c2ecf20Sopenharmony_ci		nvme_rdma_destroy_io_queues(ctrl, new);
11768c2ecf20Sopenharmony_ci	}
11778c2ecf20Sopenharmony_cidestroy_admin:
11788c2ecf20Sopenharmony_ci	nvme_stop_admin_queue(&ctrl->ctrl);
11798c2ecf20Sopenharmony_ci	blk_sync_queue(ctrl->ctrl.admin_q);
11808c2ecf20Sopenharmony_ci	nvme_rdma_stop_queue(&ctrl->queues[0]);
11818c2ecf20Sopenharmony_ci	nvme_cancel_admin_tagset(&ctrl->ctrl);
11828c2ecf20Sopenharmony_ci	nvme_rdma_destroy_admin_queue(ctrl, new);
11838c2ecf20Sopenharmony_ci	return ret;
11848c2ecf20Sopenharmony_ci}
11858c2ecf20Sopenharmony_ci
11868c2ecf20Sopenharmony_cistatic void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
11878c2ecf20Sopenharmony_ci{
11888c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
11898c2ecf20Sopenharmony_ci			struct nvme_rdma_ctrl, reconnect_work);
11908c2ecf20Sopenharmony_ci
11918c2ecf20Sopenharmony_ci	++ctrl->ctrl.nr_reconnects;
11928c2ecf20Sopenharmony_ci
11938c2ecf20Sopenharmony_ci	if (nvme_rdma_setup_ctrl(ctrl, false))
11948c2ecf20Sopenharmony_ci		goto requeue;
11958c2ecf20Sopenharmony_ci
11968c2ecf20Sopenharmony_ci	dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
11978c2ecf20Sopenharmony_ci			ctrl->ctrl.nr_reconnects);
11988c2ecf20Sopenharmony_ci
11998c2ecf20Sopenharmony_ci	ctrl->ctrl.nr_reconnects = 0;
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_ci	return;
12028c2ecf20Sopenharmony_ci
12038c2ecf20Sopenharmony_cirequeue:
12048c2ecf20Sopenharmony_ci	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
12058c2ecf20Sopenharmony_ci			ctrl->ctrl.nr_reconnects);
12068c2ecf20Sopenharmony_ci	nvme_rdma_reconnect_or_remove(ctrl);
12078c2ecf20Sopenharmony_ci}
12088c2ecf20Sopenharmony_ci
12098c2ecf20Sopenharmony_cistatic void nvme_rdma_error_recovery_work(struct work_struct *work)
12108c2ecf20Sopenharmony_ci{
12118c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = container_of(work,
12128c2ecf20Sopenharmony_ci			struct nvme_rdma_ctrl, err_work);
12138c2ecf20Sopenharmony_ci
12148c2ecf20Sopenharmony_ci	nvme_stop_keep_alive(&ctrl->ctrl);
12158c2ecf20Sopenharmony_ci	flush_work(&ctrl->ctrl.async_event_work);
12168c2ecf20Sopenharmony_ci	nvme_rdma_teardown_io_queues(ctrl, false);
12178c2ecf20Sopenharmony_ci	nvme_start_queues(&ctrl->ctrl);
12188c2ecf20Sopenharmony_ci	nvme_rdma_teardown_admin_queue(ctrl, false);
12198c2ecf20Sopenharmony_ci	nvme_start_admin_queue(&ctrl->ctrl);
12208c2ecf20Sopenharmony_ci
12218c2ecf20Sopenharmony_ci	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
12228c2ecf20Sopenharmony_ci		/* state change failure is ok if we started ctrl delete */
12238c2ecf20Sopenharmony_ci		WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
12248c2ecf20Sopenharmony_ci			     ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
12258c2ecf20Sopenharmony_ci		return;
12268c2ecf20Sopenharmony_ci	}
12278c2ecf20Sopenharmony_ci
12288c2ecf20Sopenharmony_ci	nvme_rdma_reconnect_or_remove(ctrl);
12298c2ecf20Sopenharmony_ci}
12308c2ecf20Sopenharmony_ci
12318c2ecf20Sopenharmony_cistatic void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
12328c2ecf20Sopenharmony_ci{
12338c2ecf20Sopenharmony_ci	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
12348c2ecf20Sopenharmony_ci		return;
12358c2ecf20Sopenharmony_ci
12368c2ecf20Sopenharmony_ci	dev_warn(ctrl->ctrl.device, "starting error recovery\n");
12378c2ecf20Sopenharmony_ci	queue_work(nvme_reset_wq, &ctrl->err_work);
12388c2ecf20Sopenharmony_ci}
12398c2ecf20Sopenharmony_ci
12408c2ecf20Sopenharmony_cistatic void nvme_rdma_end_request(struct nvme_rdma_request *req)
12418c2ecf20Sopenharmony_ci{
12428c2ecf20Sopenharmony_ci	struct request *rq = blk_mq_rq_from_pdu(req);
12438c2ecf20Sopenharmony_ci
12448c2ecf20Sopenharmony_ci	if (!refcount_dec_and_test(&req->ref))
12458c2ecf20Sopenharmony_ci		return;
12468c2ecf20Sopenharmony_ci	if (!nvme_try_complete_req(rq, req->status, req->result))
12478c2ecf20Sopenharmony_ci		nvme_rdma_complete_rq(rq);
12488c2ecf20Sopenharmony_ci}
12498c2ecf20Sopenharmony_ci
12508c2ecf20Sopenharmony_cistatic void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
12518c2ecf20Sopenharmony_ci		const char *op)
12528c2ecf20Sopenharmony_ci{
12538c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = wc->qp->qp_context;
12548c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
12558c2ecf20Sopenharmony_ci
12568c2ecf20Sopenharmony_ci	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
12578c2ecf20Sopenharmony_ci		dev_info(ctrl->ctrl.device,
12588c2ecf20Sopenharmony_ci			     "%s for CQE 0x%p failed with status %s (%d)\n",
12598c2ecf20Sopenharmony_ci			     op, wc->wr_cqe,
12608c2ecf20Sopenharmony_ci			     ib_wc_status_msg(wc->status), wc->status);
12618c2ecf20Sopenharmony_ci	nvme_rdma_error_recovery(ctrl);
12628c2ecf20Sopenharmony_ci}
12638c2ecf20Sopenharmony_ci
12648c2ecf20Sopenharmony_cistatic void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc)
12658c2ecf20Sopenharmony_ci{
12668c2ecf20Sopenharmony_ci	if (unlikely(wc->status != IB_WC_SUCCESS))
12678c2ecf20Sopenharmony_ci		nvme_rdma_wr_error(cq, wc, "MEMREG");
12688c2ecf20Sopenharmony_ci}
12698c2ecf20Sopenharmony_ci
12708c2ecf20Sopenharmony_cistatic void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
12718c2ecf20Sopenharmony_ci{
12728c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req =
12738c2ecf20Sopenharmony_ci		container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
12748c2ecf20Sopenharmony_ci
12758c2ecf20Sopenharmony_ci	if (unlikely(wc->status != IB_WC_SUCCESS))
12768c2ecf20Sopenharmony_ci		nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
12778c2ecf20Sopenharmony_ci	else
12788c2ecf20Sopenharmony_ci		nvme_rdma_end_request(req);
12798c2ecf20Sopenharmony_ci}
12808c2ecf20Sopenharmony_ci
12818c2ecf20Sopenharmony_cistatic int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
12828c2ecf20Sopenharmony_ci		struct nvme_rdma_request *req)
12838c2ecf20Sopenharmony_ci{
12848c2ecf20Sopenharmony_ci	struct ib_send_wr wr = {
12858c2ecf20Sopenharmony_ci		.opcode		    = IB_WR_LOCAL_INV,
12868c2ecf20Sopenharmony_ci		.next		    = NULL,
12878c2ecf20Sopenharmony_ci		.num_sge	    = 0,
12888c2ecf20Sopenharmony_ci		.send_flags	    = IB_SEND_SIGNALED,
12898c2ecf20Sopenharmony_ci		.ex.invalidate_rkey = req->mr->rkey,
12908c2ecf20Sopenharmony_ci	};
12918c2ecf20Sopenharmony_ci
12928c2ecf20Sopenharmony_ci	req->reg_cqe.done = nvme_rdma_inv_rkey_done;
12938c2ecf20Sopenharmony_ci	wr.wr_cqe = &req->reg_cqe;
12948c2ecf20Sopenharmony_ci
12958c2ecf20Sopenharmony_ci	return ib_post_send(queue->qp, &wr, NULL);
12968c2ecf20Sopenharmony_ci}
12978c2ecf20Sopenharmony_ci
12988c2ecf20Sopenharmony_cistatic void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
12998c2ecf20Sopenharmony_ci		struct request *rq)
13008c2ecf20Sopenharmony_ci{
13018c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
13028c2ecf20Sopenharmony_ci	struct nvme_rdma_device *dev = queue->device;
13038c2ecf20Sopenharmony_ci	struct ib_device *ibdev = dev->dev;
13048c2ecf20Sopenharmony_ci	struct list_head *pool = &queue->qp->rdma_mrs;
13058c2ecf20Sopenharmony_ci
13068c2ecf20Sopenharmony_ci	if (!blk_rq_nr_phys_segments(rq))
13078c2ecf20Sopenharmony_ci		return;
13088c2ecf20Sopenharmony_ci
13098c2ecf20Sopenharmony_ci	if (blk_integrity_rq(rq)) {
13108c2ecf20Sopenharmony_ci		ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
13118c2ecf20Sopenharmony_ci				req->metadata_sgl->nents, rq_dma_dir(rq));
13128c2ecf20Sopenharmony_ci		sg_free_table_chained(&req->metadata_sgl->sg_table,
13138c2ecf20Sopenharmony_ci				      NVME_INLINE_METADATA_SG_CNT);
13148c2ecf20Sopenharmony_ci	}
13158c2ecf20Sopenharmony_ci
13168c2ecf20Sopenharmony_ci	if (req->use_sig_mr)
13178c2ecf20Sopenharmony_ci		pool = &queue->qp->sig_mrs;
13188c2ecf20Sopenharmony_ci
13198c2ecf20Sopenharmony_ci	if (req->mr) {
13208c2ecf20Sopenharmony_ci		ib_mr_pool_put(queue->qp, pool, req->mr);
13218c2ecf20Sopenharmony_ci		req->mr = NULL;
13228c2ecf20Sopenharmony_ci	}
13238c2ecf20Sopenharmony_ci
13248c2ecf20Sopenharmony_ci	ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
13258c2ecf20Sopenharmony_ci			rq_dma_dir(rq));
13268c2ecf20Sopenharmony_ci	sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
13278c2ecf20Sopenharmony_ci}
13288c2ecf20Sopenharmony_ci
13298c2ecf20Sopenharmony_cistatic int nvme_rdma_set_sg_null(struct nvme_command *c)
13308c2ecf20Sopenharmony_ci{
13318c2ecf20Sopenharmony_ci	struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
13328c2ecf20Sopenharmony_ci
13338c2ecf20Sopenharmony_ci	sg->addr = 0;
13348c2ecf20Sopenharmony_ci	put_unaligned_le24(0, sg->length);
13358c2ecf20Sopenharmony_ci	put_unaligned_le32(0, sg->key);
13368c2ecf20Sopenharmony_ci	sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
13378c2ecf20Sopenharmony_ci	return 0;
13388c2ecf20Sopenharmony_ci}
13398c2ecf20Sopenharmony_ci
13408c2ecf20Sopenharmony_cistatic int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
13418c2ecf20Sopenharmony_ci		struct nvme_rdma_request *req, struct nvme_command *c,
13428c2ecf20Sopenharmony_ci		int count)
13438c2ecf20Sopenharmony_ci{
13448c2ecf20Sopenharmony_ci	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
13458c2ecf20Sopenharmony_ci	struct ib_sge *sge = &req->sge[1];
13468c2ecf20Sopenharmony_ci	struct scatterlist *sgl;
13478c2ecf20Sopenharmony_ci	u32 len = 0;
13488c2ecf20Sopenharmony_ci	int i;
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_ci	for_each_sg(req->data_sgl.sg_table.sgl, sgl, count, i) {
13518c2ecf20Sopenharmony_ci		sge->addr = sg_dma_address(sgl);
13528c2ecf20Sopenharmony_ci		sge->length = sg_dma_len(sgl);
13538c2ecf20Sopenharmony_ci		sge->lkey = queue->device->pd->local_dma_lkey;
13548c2ecf20Sopenharmony_ci		len += sge->length;
13558c2ecf20Sopenharmony_ci		sge++;
13568c2ecf20Sopenharmony_ci	}
13578c2ecf20Sopenharmony_ci
13588c2ecf20Sopenharmony_ci	sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
13598c2ecf20Sopenharmony_ci	sg->length = cpu_to_le32(len);
13608c2ecf20Sopenharmony_ci	sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
13618c2ecf20Sopenharmony_ci
13628c2ecf20Sopenharmony_ci	req->num_sge += count;
13638c2ecf20Sopenharmony_ci	return 0;
13648c2ecf20Sopenharmony_ci}
13658c2ecf20Sopenharmony_ci
13668c2ecf20Sopenharmony_cistatic int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue,
13678c2ecf20Sopenharmony_ci		struct nvme_rdma_request *req, struct nvme_command *c)
13688c2ecf20Sopenharmony_ci{
13698c2ecf20Sopenharmony_ci	struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
13708c2ecf20Sopenharmony_ci
13718c2ecf20Sopenharmony_ci	sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl));
13728c2ecf20Sopenharmony_ci	put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length);
13738c2ecf20Sopenharmony_ci	put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key);
13748c2ecf20Sopenharmony_ci	sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
13758c2ecf20Sopenharmony_ci	return 0;
13768c2ecf20Sopenharmony_ci}
13778c2ecf20Sopenharmony_ci
13788c2ecf20Sopenharmony_cistatic int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
13798c2ecf20Sopenharmony_ci		struct nvme_rdma_request *req, struct nvme_command *c,
13808c2ecf20Sopenharmony_ci		int count)
13818c2ecf20Sopenharmony_ci{
13828c2ecf20Sopenharmony_ci	struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
13838c2ecf20Sopenharmony_ci	int nr;
13848c2ecf20Sopenharmony_ci
13858c2ecf20Sopenharmony_ci	req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
13868c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(!req->mr))
13878c2ecf20Sopenharmony_ci		return -EAGAIN;
13888c2ecf20Sopenharmony_ci
13898c2ecf20Sopenharmony_ci	/*
13908c2ecf20Sopenharmony_ci	 * Align the MR to a 4K page size to match the ctrl page size and
13918c2ecf20Sopenharmony_ci	 * the block virtual boundary.
13928c2ecf20Sopenharmony_ci	 */
13938c2ecf20Sopenharmony_ci	nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL,
13948c2ecf20Sopenharmony_ci			  SZ_4K);
13958c2ecf20Sopenharmony_ci	if (unlikely(nr < count)) {
13968c2ecf20Sopenharmony_ci		ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
13978c2ecf20Sopenharmony_ci		req->mr = NULL;
13988c2ecf20Sopenharmony_ci		if (nr < 0)
13998c2ecf20Sopenharmony_ci			return nr;
14008c2ecf20Sopenharmony_ci		return -EINVAL;
14018c2ecf20Sopenharmony_ci	}
14028c2ecf20Sopenharmony_ci
14038c2ecf20Sopenharmony_ci	ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
14048c2ecf20Sopenharmony_ci
14058c2ecf20Sopenharmony_ci	req->reg_cqe.done = nvme_rdma_memreg_done;
14068c2ecf20Sopenharmony_ci	memset(&req->reg_wr, 0, sizeof(req->reg_wr));
14078c2ecf20Sopenharmony_ci	req->reg_wr.wr.opcode = IB_WR_REG_MR;
14088c2ecf20Sopenharmony_ci	req->reg_wr.wr.wr_cqe = &req->reg_cqe;
14098c2ecf20Sopenharmony_ci	req->reg_wr.wr.num_sge = 0;
14108c2ecf20Sopenharmony_ci	req->reg_wr.mr = req->mr;
14118c2ecf20Sopenharmony_ci	req->reg_wr.key = req->mr->rkey;
14128c2ecf20Sopenharmony_ci	req->reg_wr.access = IB_ACCESS_LOCAL_WRITE |
14138c2ecf20Sopenharmony_ci			     IB_ACCESS_REMOTE_READ |
14148c2ecf20Sopenharmony_ci			     IB_ACCESS_REMOTE_WRITE;
14158c2ecf20Sopenharmony_ci
14168c2ecf20Sopenharmony_ci	sg->addr = cpu_to_le64(req->mr->iova);
14178c2ecf20Sopenharmony_ci	put_unaligned_le24(req->mr->length, sg->length);
14188c2ecf20Sopenharmony_ci	put_unaligned_le32(req->mr->rkey, sg->key);
14198c2ecf20Sopenharmony_ci	sg->type = (NVME_KEY_SGL_FMT_DATA_DESC << 4) |
14208c2ecf20Sopenharmony_ci			NVME_SGL_FMT_INVALIDATE;
14218c2ecf20Sopenharmony_ci
14228c2ecf20Sopenharmony_ci	return 0;
14238c2ecf20Sopenharmony_ci}
14248c2ecf20Sopenharmony_ci
14258c2ecf20Sopenharmony_cistatic void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
14268c2ecf20Sopenharmony_ci		struct nvme_command *cmd, struct ib_sig_domain *domain,
14278c2ecf20Sopenharmony_ci		u16 control, u8 pi_type)
14288c2ecf20Sopenharmony_ci{
14298c2ecf20Sopenharmony_ci	domain->sig_type = IB_SIG_TYPE_T10_DIF;
14308c2ecf20Sopenharmony_ci	domain->sig.dif.bg_type = IB_T10DIF_CRC;
14318c2ecf20Sopenharmony_ci	domain->sig.dif.pi_interval = 1 << bi->interval_exp;
14328c2ecf20Sopenharmony_ci	domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
14338c2ecf20Sopenharmony_ci	if (control & NVME_RW_PRINFO_PRCHK_REF)
14348c2ecf20Sopenharmony_ci		domain->sig.dif.ref_remap = true;
14358c2ecf20Sopenharmony_ci
14368c2ecf20Sopenharmony_ci	domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
14378c2ecf20Sopenharmony_ci	domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
14388c2ecf20Sopenharmony_ci	domain->sig.dif.app_escape = true;
14398c2ecf20Sopenharmony_ci	if (pi_type == NVME_NS_DPS_PI_TYPE3)
14408c2ecf20Sopenharmony_ci		domain->sig.dif.ref_escape = true;
14418c2ecf20Sopenharmony_ci}
14428c2ecf20Sopenharmony_ci
14438c2ecf20Sopenharmony_cistatic void nvme_rdma_set_sig_attrs(struct blk_integrity *bi,
14448c2ecf20Sopenharmony_ci		struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs,
14458c2ecf20Sopenharmony_ci		u8 pi_type)
14468c2ecf20Sopenharmony_ci{
14478c2ecf20Sopenharmony_ci	u16 control = le16_to_cpu(cmd->rw.control);
14488c2ecf20Sopenharmony_ci
14498c2ecf20Sopenharmony_ci	memset(sig_attrs, 0, sizeof(*sig_attrs));
14508c2ecf20Sopenharmony_ci	if (control & NVME_RW_PRINFO_PRACT) {
14518c2ecf20Sopenharmony_ci		/* for WRITE_INSERT/READ_STRIP no memory domain */
14528c2ecf20Sopenharmony_ci		sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
14538c2ecf20Sopenharmony_ci		nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
14548c2ecf20Sopenharmony_ci					 pi_type);
14558c2ecf20Sopenharmony_ci		/* Clear the PRACT bit since HCA will generate/verify the PI */
14568c2ecf20Sopenharmony_ci		control &= ~NVME_RW_PRINFO_PRACT;
14578c2ecf20Sopenharmony_ci		cmd->rw.control = cpu_to_le16(control);
14588c2ecf20Sopenharmony_ci	} else {
14598c2ecf20Sopenharmony_ci		/* for WRITE_PASS/READ_PASS both wire/memory domains exist */
14608c2ecf20Sopenharmony_ci		nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
14618c2ecf20Sopenharmony_ci					 pi_type);
14628c2ecf20Sopenharmony_ci		nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
14638c2ecf20Sopenharmony_ci					 pi_type);
14648c2ecf20Sopenharmony_ci	}
14658c2ecf20Sopenharmony_ci}
14668c2ecf20Sopenharmony_ci
14678c2ecf20Sopenharmony_cistatic void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask)
14688c2ecf20Sopenharmony_ci{
14698c2ecf20Sopenharmony_ci	*mask = 0;
14708c2ecf20Sopenharmony_ci	if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF)
14718c2ecf20Sopenharmony_ci		*mask |= IB_SIG_CHECK_REFTAG;
14728c2ecf20Sopenharmony_ci	if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD)
14738c2ecf20Sopenharmony_ci		*mask |= IB_SIG_CHECK_GUARD;
14748c2ecf20Sopenharmony_ci}
14758c2ecf20Sopenharmony_ci
14768c2ecf20Sopenharmony_cistatic void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc)
14778c2ecf20Sopenharmony_ci{
14788c2ecf20Sopenharmony_ci	if (unlikely(wc->status != IB_WC_SUCCESS))
14798c2ecf20Sopenharmony_ci		nvme_rdma_wr_error(cq, wc, "SIG");
14808c2ecf20Sopenharmony_ci}
14818c2ecf20Sopenharmony_ci
14828c2ecf20Sopenharmony_cistatic int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
14838c2ecf20Sopenharmony_ci		struct nvme_rdma_request *req, struct nvme_command *c,
14848c2ecf20Sopenharmony_ci		int count, int pi_count)
14858c2ecf20Sopenharmony_ci{
14868c2ecf20Sopenharmony_ci	struct nvme_rdma_sgl *sgl = &req->data_sgl;
14878c2ecf20Sopenharmony_ci	struct ib_reg_wr *wr = &req->reg_wr;
14888c2ecf20Sopenharmony_ci	struct request *rq = blk_mq_rq_from_pdu(req);
14898c2ecf20Sopenharmony_ci	struct nvme_ns *ns = rq->q->queuedata;
14908c2ecf20Sopenharmony_ci	struct bio *bio = rq->bio;
14918c2ecf20Sopenharmony_ci	struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
14928c2ecf20Sopenharmony_ci	int nr;
14938c2ecf20Sopenharmony_ci
14948c2ecf20Sopenharmony_ci	req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
14958c2ecf20Sopenharmony_ci	if (WARN_ON_ONCE(!req->mr))
14968c2ecf20Sopenharmony_ci		return -EAGAIN;
14978c2ecf20Sopenharmony_ci
14988c2ecf20Sopenharmony_ci	nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL,
14998c2ecf20Sopenharmony_ci			     req->metadata_sgl->sg_table.sgl, pi_count, NULL,
15008c2ecf20Sopenharmony_ci			     SZ_4K);
15018c2ecf20Sopenharmony_ci	if (unlikely(nr))
15028c2ecf20Sopenharmony_ci		goto mr_put;
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_ci	nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c,
15058c2ecf20Sopenharmony_ci				req->mr->sig_attrs, ns->pi_type);
15068c2ecf20Sopenharmony_ci	nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
15078c2ecf20Sopenharmony_ci
15088c2ecf20Sopenharmony_ci	ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
15098c2ecf20Sopenharmony_ci
15108c2ecf20Sopenharmony_ci	req->reg_cqe.done = nvme_rdma_sig_done;
15118c2ecf20Sopenharmony_ci	memset(wr, 0, sizeof(*wr));
15128c2ecf20Sopenharmony_ci	wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
15138c2ecf20Sopenharmony_ci	wr->wr.wr_cqe = &req->reg_cqe;
15148c2ecf20Sopenharmony_ci	wr->wr.num_sge = 0;
15158c2ecf20Sopenharmony_ci	wr->wr.send_flags = 0;
15168c2ecf20Sopenharmony_ci	wr->mr = req->mr;
15178c2ecf20Sopenharmony_ci	wr->key = req->mr->rkey;
15188c2ecf20Sopenharmony_ci	wr->access = IB_ACCESS_LOCAL_WRITE |
15198c2ecf20Sopenharmony_ci		     IB_ACCESS_REMOTE_READ |
15208c2ecf20Sopenharmony_ci		     IB_ACCESS_REMOTE_WRITE;
15218c2ecf20Sopenharmony_ci
15228c2ecf20Sopenharmony_ci	sg->addr = cpu_to_le64(req->mr->iova);
15238c2ecf20Sopenharmony_ci	put_unaligned_le24(req->mr->length, sg->length);
15248c2ecf20Sopenharmony_ci	put_unaligned_le32(req->mr->rkey, sg->key);
15258c2ecf20Sopenharmony_ci	sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
15268c2ecf20Sopenharmony_ci
15278c2ecf20Sopenharmony_ci	return 0;
15288c2ecf20Sopenharmony_ci
15298c2ecf20Sopenharmony_cimr_put:
15308c2ecf20Sopenharmony_ci	ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr);
15318c2ecf20Sopenharmony_ci	req->mr = NULL;
15328c2ecf20Sopenharmony_ci	if (nr < 0)
15338c2ecf20Sopenharmony_ci		return nr;
15348c2ecf20Sopenharmony_ci	return -EINVAL;
15358c2ecf20Sopenharmony_ci}
15368c2ecf20Sopenharmony_ci
15378c2ecf20Sopenharmony_cistatic int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
15388c2ecf20Sopenharmony_ci		struct request *rq, struct nvme_command *c)
15398c2ecf20Sopenharmony_ci{
15408c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
15418c2ecf20Sopenharmony_ci	struct nvme_rdma_device *dev = queue->device;
15428c2ecf20Sopenharmony_ci	struct ib_device *ibdev = dev->dev;
15438c2ecf20Sopenharmony_ci	int pi_count = 0;
15448c2ecf20Sopenharmony_ci	int count, ret;
15458c2ecf20Sopenharmony_ci
15468c2ecf20Sopenharmony_ci	req->num_sge = 1;
15478c2ecf20Sopenharmony_ci	refcount_set(&req->ref, 2); /* send and recv completions */
15488c2ecf20Sopenharmony_ci
15498c2ecf20Sopenharmony_ci	c->common.flags |= NVME_CMD_SGL_METABUF;
15508c2ecf20Sopenharmony_ci
15518c2ecf20Sopenharmony_ci	if (!blk_rq_nr_phys_segments(rq))
15528c2ecf20Sopenharmony_ci		return nvme_rdma_set_sg_null(c);
15538c2ecf20Sopenharmony_ci
15548c2ecf20Sopenharmony_ci	req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1);
15558c2ecf20Sopenharmony_ci	ret = sg_alloc_table_chained(&req->data_sgl.sg_table,
15568c2ecf20Sopenharmony_ci			blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl,
15578c2ecf20Sopenharmony_ci			NVME_INLINE_SG_CNT);
15588c2ecf20Sopenharmony_ci	if (ret)
15598c2ecf20Sopenharmony_ci		return -ENOMEM;
15608c2ecf20Sopenharmony_ci
15618c2ecf20Sopenharmony_ci	req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
15628c2ecf20Sopenharmony_ci					    req->data_sgl.sg_table.sgl);
15638c2ecf20Sopenharmony_ci
15648c2ecf20Sopenharmony_ci	count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
15658c2ecf20Sopenharmony_ci			      req->data_sgl.nents, rq_dma_dir(rq));
15668c2ecf20Sopenharmony_ci	if (unlikely(count <= 0)) {
15678c2ecf20Sopenharmony_ci		ret = -EIO;
15688c2ecf20Sopenharmony_ci		goto out_free_table;
15698c2ecf20Sopenharmony_ci	}
15708c2ecf20Sopenharmony_ci
15718c2ecf20Sopenharmony_ci	if (blk_integrity_rq(rq)) {
15728c2ecf20Sopenharmony_ci		req->metadata_sgl->sg_table.sgl =
15738c2ecf20Sopenharmony_ci			(struct scatterlist *)(req->metadata_sgl + 1);
15748c2ecf20Sopenharmony_ci		ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table,
15758c2ecf20Sopenharmony_ci				blk_rq_count_integrity_sg(rq->q, rq->bio),
15768c2ecf20Sopenharmony_ci				req->metadata_sgl->sg_table.sgl,
15778c2ecf20Sopenharmony_ci				NVME_INLINE_METADATA_SG_CNT);
15788c2ecf20Sopenharmony_ci		if (unlikely(ret)) {
15798c2ecf20Sopenharmony_ci			ret = -ENOMEM;
15808c2ecf20Sopenharmony_ci			goto out_unmap_sg;
15818c2ecf20Sopenharmony_ci		}
15828c2ecf20Sopenharmony_ci
15838c2ecf20Sopenharmony_ci		req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
15848c2ecf20Sopenharmony_ci				rq->bio, req->metadata_sgl->sg_table.sgl);
15858c2ecf20Sopenharmony_ci		pi_count = ib_dma_map_sg(ibdev,
15868c2ecf20Sopenharmony_ci					 req->metadata_sgl->sg_table.sgl,
15878c2ecf20Sopenharmony_ci					 req->metadata_sgl->nents,
15888c2ecf20Sopenharmony_ci					 rq_dma_dir(rq));
15898c2ecf20Sopenharmony_ci		if (unlikely(pi_count <= 0)) {
15908c2ecf20Sopenharmony_ci			ret = -EIO;
15918c2ecf20Sopenharmony_ci			goto out_free_pi_table;
15928c2ecf20Sopenharmony_ci		}
15938c2ecf20Sopenharmony_ci	}
15948c2ecf20Sopenharmony_ci
15958c2ecf20Sopenharmony_ci	if (req->use_sig_mr) {
15968c2ecf20Sopenharmony_ci		ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count);
15978c2ecf20Sopenharmony_ci		goto out;
15988c2ecf20Sopenharmony_ci	}
15998c2ecf20Sopenharmony_ci
16008c2ecf20Sopenharmony_ci	if (count <= dev->num_inline_segments) {
16018c2ecf20Sopenharmony_ci		if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
16028c2ecf20Sopenharmony_ci		    queue->ctrl->use_inline_data &&
16038c2ecf20Sopenharmony_ci		    blk_rq_payload_bytes(rq) <=
16048c2ecf20Sopenharmony_ci				nvme_rdma_inline_data_size(queue)) {
16058c2ecf20Sopenharmony_ci			ret = nvme_rdma_map_sg_inline(queue, req, c, count);
16068c2ecf20Sopenharmony_ci			goto out;
16078c2ecf20Sopenharmony_ci		}
16088c2ecf20Sopenharmony_ci
16098c2ecf20Sopenharmony_ci		if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
16108c2ecf20Sopenharmony_ci			ret = nvme_rdma_map_sg_single(queue, req, c);
16118c2ecf20Sopenharmony_ci			goto out;
16128c2ecf20Sopenharmony_ci		}
16138c2ecf20Sopenharmony_ci	}
16148c2ecf20Sopenharmony_ci
16158c2ecf20Sopenharmony_ci	ret = nvme_rdma_map_sg_fr(queue, req, c, count);
16168c2ecf20Sopenharmony_ciout:
16178c2ecf20Sopenharmony_ci	if (unlikely(ret))
16188c2ecf20Sopenharmony_ci		goto out_unmap_pi_sg;
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci	return 0;
16218c2ecf20Sopenharmony_ci
16228c2ecf20Sopenharmony_ciout_unmap_pi_sg:
16238c2ecf20Sopenharmony_ci	if (blk_integrity_rq(rq))
16248c2ecf20Sopenharmony_ci		ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
16258c2ecf20Sopenharmony_ci				req->metadata_sgl->nents, rq_dma_dir(rq));
16268c2ecf20Sopenharmony_ciout_free_pi_table:
16278c2ecf20Sopenharmony_ci	if (blk_integrity_rq(rq))
16288c2ecf20Sopenharmony_ci		sg_free_table_chained(&req->metadata_sgl->sg_table,
16298c2ecf20Sopenharmony_ci				      NVME_INLINE_METADATA_SG_CNT);
16308c2ecf20Sopenharmony_ciout_unmap_sg:
16318c2ecf20Sopenharmony_ci	ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
16328c2ecf20Sopenharmony_ci			rq_dma_dir(rq));
16338c2ecf20Sopenharmony_ciout_free_table:
16348c2ecf20Sopenharmony_ci	sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
16358c2ecf20Sopenharmony_ci	return ret;
16368c2ecf20Sopenharmony_ci}
16378c2ecf20Sopenharmony_ci
16388c2ecf20Sopenharmony_cistatic void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
16398c2ecf20Sopenharmony_ci{
16408c2ecf20Sopenharmony_ci	struct nvme_rdma_qe *qe =
16418c2ecf20Sopenharmony_ci		container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
16428c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req =
16438c2ecf20Sopenharmony_ci		container_of(qe, struct nvme_rdma_request, sqe);
16448c2ecf20Sopenharmony_ci
16458c2ecf20Sopenharmony_ci	if (unlikely(wc->status != IB_WC_SUCCESS))
16468c2ecf20Sopenharmony_ci		nvme_rdma_wr_error(cq, wc, "SEND");
16478c2ecf20Sopenharmony_ci	else
16488c2ecf20Sopenharmony_ci		nvme_rdma_end_request(req);
16498c2ecf20Sopenharmony_ci}
16508c2ecf20Sopenharmony_ci
16518c2ecf20Sopenharmony_cistatic int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
16528c2ecf20Sopenharmony_ci		struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
16538c2ecf20Sopenharmony_ci		struct ib_send_wr *first)
16548c2ecf20Sopenharmony_ci{
16558c2ecf20Sopenharmony_ci	struct ib_send_wr wr;
16568c2ecf20Sopenharmony_ci	int ret;
16578c2ecf20Sopenharmony_ci
16588c2ecf20Sopenharmony_ci	sge->addr   = qe->dma;
16598c2ecf20Sopenharmony_ci	sge->length = sizeof(struct nvme_command);
16608c2ecf20Sopenharmony_ci	sge->lkey   = queue->device->pd->local_dma_lkey;
16618c2ecf20Sopenharmony_ci
16628c2ecf20Sopenharmony_ci	wr.next       = NULL;
16638c2ecf20Sopenharmony_ci	wr.wr_cqe     = &qe->cqe;
16648c2ecf20Sopenharmony_ci	wr.sg_list    = sge;
16658c2ecf20Sopenharmony_ci	wr.num_sge    = num_sge;
16668c2ecf20Sopenharmony_ci	wr.opcode     = IB_WR_SEND;
16678c2ecf20Sopenharmony_ci	wr.send_flags = IB_SEND_SIGNALED;
16688c2ecf20Sopenharmony_ci
16698c2ecf20Sopenharmony_ci	if (first)
16708c2ecf20Sopenharmony_ci		first->next = &wr;
16718c2ecf20Sopenharmony_ci	else
16728c2ecf20Sopenharmony_ci		first = &wr;
16738c2ecf20Sopenharmony_ci
16748c2ecf20Sopenharmony_ci	ret = ib_post_send(queue->qp, first, NULL);
16758c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
16768c2ecf20Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
16778c2ecf20Sopenharmony_ci			     "%s failed with error code %d\n", __func__, ret);
16788c2ecf20Sopenharmony_ci	}
16798c2ecf20Sopenharmony_ci	return ret;
16808c2ecf20Sopenharmony_ci}
16818c2ecf20Sopenharmony_ci
16828c2ecf20Sopenharmony_cistatic int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
16838c2ecf20Sopenharmony_ci		struct nvme_rdma_qe *qe)
16848c2ecf20Sopenharmony_ci{
16858c2ecf20Sopenharmony_ci	struct ib_recv_wr wr;
16868c2ecf20Sopenharmony_ci	struct ib_sge list;
16878c2ecf20Sopenharmony_ci	int ret;
16888c2ecf20Sopenharmony_ci
16898c2ecf20Sopenharmony_ci	list.addr   = qe->dma;
16908c2ecf20Sopenharmony_ci	list.length = sizeof(struct nvme_completion);
16918c2ecf20Sopenharmony_ci	list.lkey   = queue->device->pd->local_dma_lkey;
16928c2ecf20Sopenharmony_ci
16938c2ecf20Sopenharmony_ci	qe->cqe.done = nvme_rdma_recv_done;
16948c2ecf20Sopenharmony_ci
16958c2ecf20Sopenharmony_ci	wr.next     = NULL;
16968c2ecf20Sopenharmony_ci	wr.wr_cqe   = &qe->cqe;
16978c2ecf20Sopenharmony_ci	wr.sg_list  = &list;
16988c2ecf20Sopenharmony_ci	wr.num_sge  = 1;
16998c2ecf20Sopenharmony_ci
17008c2ecf20Sopenharmony_ci	ret = ib_post_recv(queue->qp, &wr, NULL);
17018c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
17028c2ecf20Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
17038c2ecf20Sopenharmony_ci			"%s failed with error code %d\n", __func__, ret);
17048c2ecf20Sopenharmony_ci	}
17058c2ecf20Sopenharmony_ci	return ret;
17068c2ecf20Sopenharmony_ci}
17078c2ecf20Sopenharmony_ci
17088c2ecf20Sopenharmony_cistatic struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue)
17098c2ecf20Sopenharmony_ci{
17108c2ecf20Sopenharmony_ci	u32 queue_idx = nvme_rdma_queue_idx(queue);
17118c2ecf20Sopenharmony_ci
17128c2ecf20Sopenharmony_ci	if (queue_idx == 0)
17138c2ecf20Sopenharmony_ci		return queue->ctrl->admin_tag_set.tags[queue_idx];
17148c2ecf20Sopenharmony_ci	return queue->ctrl->tag_set.tags[queue_idx - 1];
17158c2ecf20Sopenharmony_ci}
17168c2ecf20Sopenharmony_ci
17178c2ecf20Sopenharmony_cistatic void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc)
17188c2ecf20Sopenharmony_ci{
17198c2ecf20Sopenharmony_ci	if (unlikely(wc->status != IB_WC_SUCCESS))
17208c2ecf20Sopenharmony_ci		nvme_rdma_wr_error(cq, wc, "ASYNC");
17218c2ecf20Sopenharmony_ci}
17228c2ecf20Sopenharmony_ci
17238c2ecf20Sopenharmony_cistatic void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
17248c2ecf20Sopenharmony_ci{
17258c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
17268c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = &ctrl->queues[0];
17278c2ecf20Sopenharmony_ci	struct ib_device *dev = queue->device->dev;
17288c2ecf20Sopenharmony_ci	struct nvme_rdma_qe *sqe = &ctrl->async_event_sqe;
17298c2ecf20Sopenharmony_ci	struct nvme_command *cmd = sqe->data;
17308c2ecf20Sopenharmony_ci	struct ib_sge sge;
17318c2ecf20Sopenharmony_ci	int ret;
17328c2ecf20Sopenharmony_ci
17338c2ecf20Sopenharmony_ci	ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE);
17348c2ecf20Sopenharmony_ci
17358c2ecf20Sopenharmony_ci	memset(cmd, 0, sizeof(*cmd));
17368c2ecf20Sopenharmony_ci	cmd->common.opcode = nvme_admin_async_event;
17378c2ecf20Sopenharmony_ci	cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH;
17388c2ecf20Sopenharmony_ci	cmd->common.flags |= NVME_CMD_SGL_METABUF;
17398c2ecf20Sopenharmony_ci	nvme_rdma_set_sg_null(cmd);
17408c2ecf20Sopenharmony_ci
17418c2ecf20Sopenharmony_ci	sqe->cqe.done = nvme_rdma_async_done;
17428c2ecf20Sopenharmony_ci
17438c2ecf20Sopenharmony_ci	ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
17448c2ecf20Sopenharmony_ci			DMA_TO_DEVICE);
17458c2ecf20Sopenharmony_ci
17468c2ecf20Sopenharmony_ci	ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL);
17478c2ecf20Sopenharmony_ci	WARN_ON_ONCE(ret);
17488c2ecf20Sopenharmony_ci}
17498c2ecf20Sopenharmony_ci
17508c2ecf20Sopenharmony_cistatic void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
17518c2ecf20Sopenharmony_ci		struct nvme_completion *cqe, struct ib_wc *wc)
17528c2ecf20Sopenharmony_ci{
17538c2ecf20Sopenharmony_ci	struct request *rq;
17548c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req;
17558c2ecf20Sopenharmony_ci
17568c2ecf20Sopenharmony_ci	rq = nvme_find_rq(nvme_rdma_tagset(queue), cqe->command_id);
17578c2ecf20Sopenharmony_ci	if (!rq) {
17588c2ecf20Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
17598c2ecf20Sopenharmony_ci			"got bad command_id %#x on QP %#x\n",
17608c2ecf20Sopenharmony_ci			cqe->command_id, queue->qp->qp_num);
17618c2ecf20Sopenharmony_ci		nvme_rdma_error_recovery(queue->ctrl);
17628c2ecf20Sopenharmony_ci		return;
17638c2ecf20Sopenharmony_ci	}
17648c2ecf20Sopenharmony_ci	req = blk_mq_rq_to_pdu(rq);
17658c2ecf20Sopenharmony_ci
17668c2ecf20Sopenharmony_ci	req->status = cqe->status;
17678c2ecf20Sopenharmony_ci	req->result = cqe->result;
17688c2ecf20Sopenharmony_ci
17698c2ecf20Sopenharmony_ci	if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
17708c2ecf20Sopenharmony_ci		if (unlikely(!req->mr ||
17718c2ecf20Sopenharmony_ci			     wc->ex.invalidate_rkey != req->mr->rkey)) {
17728c2ecf20Sopenharmony_ci			dev_err(queue->ctrl->ctrl.device,
17738c2ecf20Sopenharmony_ci				"Bogus remote invalidation for rkey %#x\n",
17748c2ecf20Sopenharmony_ci				req->mr ? req->mr->rkey : 0);
17758c2ecf20Sopenharmony_ci			nvme_rdma_error_recovery(queue->ctrl);
17768c2ecf20Sopenharmony_ci		}
17778c2ecf20Sopenharmony_ci	} else if (req->mr) {
17788c2ecf20Sopenharmony_ci		int ret;
17798c2ecf20Sopenharmony_ci
17808c2ecf20Sopenharmony_ci		ret = nvme_rdma_inv_rkey(queue, req);
17818c2ecf20Sopenharmony_ci		if (unlikely(ret < 0)) {
17828c2ecf20Sopenharmony_ci			dev_err(queue->ctrl->ctrl.device,
17838c2ecf20Sopenharmony_ci				"Queueing INV WR for rkey %#x failed (%d)\n",
17848c2ecf20Sopenharmony_ci				req->mr->rkey, ret);
17858c2ecf20Sopenharmony_ci			nvme_rdma_error_recovery(queue->ctrl);
17868c2ecf20Sopenharmony_ci		}
17878c2ecf20Sopenharmony_ci		/* the local invalidation completion will end the request */
17888c2ecf20Sopenharmony_ci		return;
17898c2ecf20Sopenharmony_ci	}
17908c2ecf20Sopenharmony_ci
17918c2ecf20Sopenharmony_ci	nvme_rdma_end_request(req);
17928c2ecf20Sopenharmony_ci}
17938c2ecf20Sopenharmony_ci
17948c2ecf20Sopenharmony_cistatic void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
17958c2ecf20Sopenharmony_ci{
17968c2ecf20Sopenharmony_ci	struct nvme_rdma_qe *qe =
17978c2ecf20Sopenharmony_ci		container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
17988c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = wc->qp->qp_context;
17998c2ecf20Sopenharmony_ci	struct ib_device *ibdev = queue->device->dev;
18008c2ecf20Sopenharmony_ci	struct nvme_completion *cqe = qe->data;
18018c2ecf20Sopenharmony_ci	const size_t len = sizeof(struct nvme_completion);
18028c2ecf20Sopenharmony_ci
18038c2ecf20Sopenharmony_ci	if (unlikely(wc->status != IB_WC_SUCCESS)) {
18048c2ecf20Sopenharmony_ci		nvme_rdma_wr_error(cq, wc, "RECV");
18058c2ecf20Sopenharmony_ci		return;
18068c2ecf20Sopenharmony_ci	}
18078c2ecf20Sopenharmony_ci
18088c2ecf20Sopenharmony_ci	/* sanity checking for received data length */
18098c2ecf20Sopenharmony_ci	if (unlikely(wc->byte_len < len)) {
18108c2ecf20Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
18118c2ecf20Sopenharmony_ci			"Unexpected nvme completion length(%d)\n", wc->byte_len);
18128c2ecf20Sopenharmony_ci		nvme_rdma_error_recovery(queue->ctrl);
18138c2ecf20Sopenharmony_ci		return;
18148c2ecf20Sopenharmony_ci	}
18158c2ecf20Sopenharmony_ci
18168c2ecf20Sopenharmony_ci	ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE);
18178c2ecf20Sopenharmony_ci	/*
18188c2ecf20Sopenharmony_ci	 * AEN requests are special as they don't time out and can
18198c2ecf20Sopenharmony_ci	 * survive any kind of queue freeze and often don't respond to
18208c2ecf20Sopenharmony_ci	 * aborts.  We don't even bother to allocate a struct request
18218c2ecf20Sopenharmony_ci	 * for them but rather special case them here.
18228c2ecf20Sopenharmony_ci	 */
18238c2ecf20Sopenharmony_ci	if (unlikely(nvme_is_aen_req(nvme_rdma_queue_idx(queue),
18248c2ecf20Sopenharmony_ci				     cqe->command_id)))
18258c2ecf20Sopenharmony_ci		nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
18268c2ecf20Sopenharmony_ci				&cqe->result);
18278c2ecf20Sopenharmony_ci	else
18288c2ecf20Sopenharmony_ci		nvme_rdma_process_nvme_rsp(queue, cqe, wc);
18298c2ecf20Sopenharmony_ci	ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE);
18308c2ecf20Sopenharmony_ci
18318c2ecf20Sopenharmony_ci	nvme_rdma_post_recv(queue, qe);
18328c2ecf20Sopenharmony_ci}
18338c2ecf20Sopenharmony_ci
18348c2ecf20Sopenharmony_cistatic int nvme_rdma_conn_established(struct nvme_rdma_queue *queue)
18358c2ecf20Sopenharmony_ci{
18368c2ecf20Sopenharmony_ci	int ret, i;
18378c2ecf20Sopenharmony_ci
18388c2ecf20Sopenharmony_ci	for (i = 0; i < queue->queue_size; i++) {
18398c2ecf20Sopenharmony_ci		ret = nvme_rdma_post_recv(queue, &queue->rsp_ring[i]);
18408c2ecf20Sopenharmony_ci		if (ret)
18418c2ecf20Sopenharmony_ci			return ret;
18428c2ecf20Sopenharmony_ci	}
18438c2ecf20Sopenharmony_ci
18448c2ecf20Sopenharmony_ci	return 0;
18458c2ecf20Sopenharmony_ci}
18468c2ecf20Sopenharmony_ci
18478c2ecf20Sopenharmony_cistatic int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
18488c2ecf20Sopenharmony_ci		struct rdma_cm_event *ev)
18498c2ecf20Sopenharmony_ci{
18508c2ecf20Sopenharmony_ci	struct rdma_cm_id *cm_id = queue->cm_id;
18518c2ecf20Sopenharmony_ci	int status = ev->status;
18528c2ecf20Sopenharmony_ci	const char *rej_msg;
18538c2ecf20Sopenharmony_ci	const struct nvme_rdma_cm_rej *rej_data;
18548c2ecf20Sopenharmony_ci	u8 rej_data_len;
18558c2ecf20Sopenharmony_ci
18568c2ecf20Sopenharmony_ci	rej_msg = rdma_reject_msg(cm_id, status);
18578c2ecf20Sopenharmony_ci	rej_data = rdma_consumer_reject_data(cm_id, ev, &rej_data_len);
18588c2ecf20Sopenharmony_ci
18598c2ecf20Sopenharmony_ci	if (rej_data && rej_data_len >= sizeof(u16)) {
18608c2ecf20Sopenharmony_ci		u16 sts = le16_to_cpu(rej_data->sts);
18618c2ecf20Sopenharmony_ci
18628c2ecf20Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
18638c2ecf20Sopenharmony_ci		      "Connect rejected: status %d (%s) nvme status %d (%s).\n",
18648c2ecf20Sopenharmony_ci		      status, rej_msg, sts, nvme_rdma_cm_msg(sts));
18658c2ecf20Sopenharmony_ci	} else {
18668c2ecf20Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
18678c2ecf20Sopenharmony_ci			"Connect rejected: status %d (%s).\n", status, rej_msg);
18688c2ecf20Sopenharmony_ci	}
18698c2ecf20Sopenharmony_ci
18708c2ecf20Sopenharmony_ci	return -ECONNRESET;
18718c2ecf20Sopenharmony_ci}
18728c2ecf20Sopenharmony_ci
18738c2ecf20Sopenharmony_cistatic int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue)
18748c2ecf20Sopenharmony_ci{
18758c2ecf20Sopenharmony_ci	struct nvme_ctrl *ctrl = &queue->ctrl->ctrl;
18768c2ecf20Sopenharmony_ci	int ret;
18778c2ecf20Sopenharmony_ci
18788c2ecf20Sopenharmony_ci	ret = nvme_rdma_create_queue_ib(queue);
18798c2ecf20Sopenharmony_ci	if (ret)
18808c2ecf20Sopenharmony_ci		return ret;
18818c2ecf20Sopenharmony_ci
18828c2ecf20Sopenharmony_ci	if (ctrl->opts->tos >= 0)
18838c2ecf20Sopenharmony_ci		rdma_set_service_type(queue->cm_id, ctrl->opts->tos);
18848c2ecf20Sopenharmony_ci	ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CONNECT_TIMEOUT_MS);
18858c2ecf20Sopenharmony_ci	if (ret) {
18868c2ecf20Sopenharmony_ci		dev_err(ctrl->device, "rdma_resolve_route failed (%d).\n",
18878c2ecf20Sopenharmony_ci			queue->cm_error);
18888c2ecf20Sopenharmony_ci		goto out_destroy_queue;
18898c2ecf20Sopenharmony_ci	}
18908c2ecf20Sopenharmony_ci
18918c2ecf20Sopenharmony_ci	return 0;
18928c2ecf20Sopenharmony_ci
18938c2ecf20Sopenharmony_ciout_destroy_queue:
18948c2ecf20Sopenharmony_ci	nvme_rdma_destroy_queue_ib(queue);
18958c2ecf20Sopenharmony_ci	return ret;
18968c2ecf20Sopenharmony_ci}
18978c2ecf20Sopenharmony_ci
18988c2ecf20Sopenharmony_cistatic int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue)
18998c2ecf20Sopenharmony_ci{
19008c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
19018c2ecf20Sopenharmony_ci	struct rdma_conn_param param = { };
19028c2ecf20Sopenharmony_ci	struct nvme_rdma_cm_req priv = { };
19038c2ecf20Sopenharmony_ci	int ret;
19048c2ecf20Sopenharmony_ci
19058c2ecf20Sopenharmony_ci	param.qp_num = queue->qp->qp_num;
19068c2ecf20Sopenharmony_ci	param.flow_control = 1;
19078c2ecf20Sopenharmony_ci
19088c2ecf20Sopenharmony_ci	param.responder_resources = queue->device->dev->attrs.max_qp_rd_atom;
19098c2ecf20Sopenharmony_ci	/* maximum retry count */
19108c2ecf20Sopenharmony_ci	param.retry_count = 7;
19118c2ecf20Sopenharmony_ci	param.rnr_retry_count = 7;
19128c2ecf20Sopenharmony_ci	param.private_data = &priv;
19138c2ecf20Sopenharmony_ci	param.private_data_len = sizeof(priv);
19148c2ecf20Sopenharmony_ci
19158c2ecf20Sopenharmony_ci	priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
19168c2ecf20Sopenharmony_ci	priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue));
19178c2ecf20Sopenharmony_ci	/*
19188c2ecf20Sopenharmony_ci	 * set the admin queue depth to the minimum size
19198c2ecf20Sopenharmony_ci	 * specified by the Fabrics standard.
19208c2ecf20Sopenharmony_ci	 */
19218c2ecf20Sopenharmony_ci	if (priv.qid == 0) {
19228c2ecf20Sopenharmony_ci		priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH);
19238c2ecf20Sopenharmony_ci		priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
19248c2ecf20Sopenharmony_ci	} else {
19258c2ecf20Sopenharmony_ci		/*
19268c2ecf20Sopenharmony_ci		 * current interpretation of the fabrics spec
19278c2ecf20Sopenharmony_ci		 * is at minimum you make hrqsize sqsize+1, or a
19288c2ecf20Sopenharmony_ci		 * 1's based representation of sqsize.
19298c2ecf20Sopenharmony_ci		 */
19308c2ecf20Sopenharmony_ci		priv.hrqsize = cpu_to_le16(queue->queue_size);
19318c2ecf20Sopenharmony_ci		priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize);
19328c2ecf20Sopenharmony_ci	}
19338c2ecf20Sopenharmony_ci
19348c2ecf20Sopenharmony_ci	ret = rdma_connect_locked(queue->cm_id, &param);
19358c2ecf20Sopenharmony_ci	if (ret) {
19368c2ecf20Sopenharmony_ci		dev_err(ctrl->ctrl.device,
19378c2ecf20Sopenharmony_ci			"rdma_connect_locked failed (%d).\n", ret);
19388c2ecf20Sopenharmony_ci		return ret;
19398c2ecf20Sopenharmony_ci	}
19408c2ecf20Sopenharmony_ci
19418c2ecf20Sopenharmony_ci	return 0;
19428c2ecf20Sopenharmony_ci}
19438c2ecf20Sopenharmony_ci
19448c2ecf20Sopenharmony_cistatic int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
19458c2ecf20Sopenharmony_ci		struct rdma_cm_event *ev)
19468c2ecf20Sopenharmony_ci{
19478c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = cm_id->context;
19488c2ecf20Sopenharmony_ci	int cm_error = 0;
19498c2ecf20Sopenharmony_ci
19508c2ecf20Sopenharmony_ci	dev_dbg(queue->ctrl->ctrl.device, "%s (%d): status %d id %p\n",
19518c2ecf20Sopenharmony_ci		rdma_event_msg(ev->event), ev->event,
19528c2ecf20Sopenharmony_ci		ev->status, cm_id);
19538c2ecf20Sopenharmony_ci
19548c2ecf20Sopenharmony_ci	switch (ev->event) {
19558c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ADDR_RESOLVED:
19568c2ecf20Sopenharmony_ci		cm_error = nvme_rdma_addr_resolved(queue);
19578c2ecf20Sopenharmony_ci		break;
19588c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ROUTE_RESOLVED:
19598c2ecf20Sopenharmony_ci		cm_error = nvme_rdma_route_resolved(queue);
19608c2ecf20Sopenharmony_ci		break;
19618c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ESTABLISHED:
19628c2ecf20Sopenharmony_ci		queue->cm_error = nvme_rdma_conn_established(queue);
19638c2ecf20Sopenharmony_ci		/* complete cm_done regardless of success/failure */
19648c2ecf20Sopenharmony_ci		complete(&queue->cm_done);
19658c2ecf20Sopenharmony_ci		return 0;
19668c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_REJECTED:
19678c2ecf20Sopenharmony_ci		cm_error = nvme_rdma_conn_rejected(queue, ev);
19688c2ecf20Sopenharmony_ci		break;
19698c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ROUTE_ERROR:
19708c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_CONNECT_ERROR:
19718c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_UNREACHABLE:
19728c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ADDR_ERROR:
19738c2ecf20Sopenharmony_ci		dev_dbg(queue->ctrl->ctrl.device,
19748c2ecf20Sopenharmony_ci			"CM error event %d\n", ev->event);
19758c2ecf20Sopenharmony_ci		cm_error = -ECONNRESET;
19768c2ecf20Sopenharmony_ci		break;
19778c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_DISCONNECTED:
19788c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ADDR_CHANGE:
19798c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
19808c2ecf20Sopenharmony_ci		dev_dbg(queue->ctrl->ctrl.device,
19818c2ecf20Sopenharmony_ci			"disconnect received - connection closed\n");
19828c2ecf20Sopenharmony_ci		nvme_rdma_error_recovery(queue->ctrl);
19838c2ecf20Sopenharmony_ci		break;
19848c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_DEVICE_REMOVAL:
19858c2ecf20Sopenharmony_ci		/* device removal is handled via the ib_client API */
19868c2ecf20Sopenharmony_ci		break;
19878c2ecf20Sopenharmony_ci	default:
19888c2ecf20Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
19898c2ecf20Sopenharmony_ci			"Unexpected RDMA CM event (%d)\n", ev->event);
19908c2ecf20Sopenharmony_ci		nvme_rdma_error_recovery(queue->ctrl);
19918c2ecf20Sopenharmony_ci		break;
19928c2ecf20Sopenharmony_ci	}
19938c2ecf20Sopenharmony_ci
19948c2ecf20Sopenharmony_ci	if (cm_error) {
19958c2ecf20Sopenharmony_ci		queue->cm_error = cm_error;
19968c2ecf20Sopenharmony_ci		complete(&queue->cm_done);
19978c2ecf20Sopenharmony_ci	}
19988c2ecf20Sopenharmony_ci
19998c2ecf20Sopenharmony_ci	return 0;
20008c2ecf20Sopenharmony_ci}
20018c2ecf20Sopenharmony_ci
20028c2ecf20Sopenharmony_cistatic void nvme_rdma_complete_timed_out(struct request *rq)
20038c2ecf20Sopenharmony_ci{
20048c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
20058c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = req->queue;
20068c2ecf20Sopenharmony_ci
20078c2ecf20Sopenharmony_ci	nvme_rdma_stop_queue(queue);
20088c2ecf20Sopenharmony_ci	if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
20098c2ecf20Sopenharmony_ci		nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
20108c2ecf20Sopenharmony_ci		blk_mq_complete_request(rq);
20118c2ecf20Sopenharmony_ci	}
20128c2ecf20Sopenharmony_ci}
20138c2ecf20Sopenharmony_ci
20148c2ecf20Sopenharmony_cistatic enum blk_eh_timer_return
20158c2ecf20Sopenharmony_cinvme_rdma_timeout(struct request *rq, bool reserved)
20168c2ecf20Sopenharmony_ci{
20178c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
20188c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = req->queue;
20198c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
20208c2ecf20Sopenharmony_ci
20218c2ecf20Sopenharmony_ci	dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
20228c2ecf20Sopenharmony_ci		 rq->tag, nvme_rdma_queue_idx(queue));
20238c2ecf20Sopenharmony_ci
20248c2ecf20Sopenharmony_ci	if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
20258c2ecf20Sopenharmony_ci		/*
20268c2ecf20Sopenharmony_ci		 * If we are resetting, connecting or deleting we should
20278c2ecf20Sopenharmony_ci		 * complete immediately because we may block controller
20288c2ecf20Sopenharmony_ci		 * teardown or setup sequence
20298c2ecf20Sopenharmony_ci		 * - ctrl disable/shutdown fabrics requests
20308c2ecf20Sopenharmony_ci		 * - connect requests
20318c2ecf20Sopenharmony_ci		 * - initialization admin requests
20328c2ecf20Sopenharmony_ci		 * - I/O requests that entered after unquiescing and
20338c2ecf20Sopenharmony_ci		 *   the controller stopped responding
20348c2ecf20Sopenharmony_ci		 *
20358c2ecf20Sopenharmony_ci		 * All other requests should be cancelled by the error
20368c2ecf20Sopenharmony_ci		 * recovery work, so it's fine that we fail it here.
20378c2ecf20Sopenharmony_ci		 */
20388c2ecf20Sopenharmony_ci		nvme_rdma_complete_timed_out(rq);
20398c2ecf20Sopenharmony_ci		return BLK_EH_DONE;
20408c2ecf20Sopenharmony_ci	}
20418c2ecf20Sopenharmony_ci
20428c2ecf20Sopenharmony_ci	/*
20438c2ecf20Sopenharmony_ci	 * LIVE state should trigger the normal error recovery which will
20448c2ecf20Sopenharmony_ci	 * handle completing this request.
20458c2ecf20Sopenharmony_ci	 */
20468c2ecf20Sopenharmony_ci	nvme_rdma_error_recovery(ctrl);
20478c2ecf20Sopenharmony_ci	return BLK_EH_RESET_TIMER;
20488c2ecf20Sopenharmony_ci}
20498c2ecf20Sopenharmony_ci
20508c2ecf20Sopenharmony_cistatic blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
20518c2ecf20Sopenharmony_ci		const struct blk_mq_queue_data *bd)
20528c2ecf20Sopenharmony_ci{
20538c2ecf20Sopenharmony_ci	struct nvme_ns *ns = hctx->queue->queuedata;
20548c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = hctx->driver_data;
20558c2ecf20Sopenharmony_ci	struct request *rq = bd->rq;
20568c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
20578c2ecf20Sopenharmony_ci	struct nvme_rdma_qe *sqe = &req->sqe;
20588c2ecf20Sopenharmony_ci	struct nvme_command *c = sqe->data;
20598c2ecf20Sopenharmony_ci	struct ib_device *dev;
20608c2ecf20Sopenharmony_ci	bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
20618c2ecf20Sopenharmony_ci	blk_status_t ret;
20628c2ecf20Sopenharmony_ci	int err;
20638c2ecf20Sopenharmony_ci
20648c2ecf20Sopenharmony_ci	WARN_ON_ONCE(rq->tag < 0);
20658c2ecf20Sopenharmony_ci
20668c2ecf20Sopenharmony_ci	if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
20678c2ecf20Sopenharmony_ci		return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
20688c2ecf20Sopenharmony_ci
20698c2ecf20Sopenharmony_ci	dev = queue->device->dev;
20708c2ecf20Sopenharmony_ci
20718c2ecf20Sopenharmony_ci	req->sqe.dma = ib_dma_map_single(dev, req->sqe.data,
20728c2ecf20Sopenharmony_ci					 sizeof(struct nvme_command),
20738c2ecf20Sopenharmony_ci					 DMA_TO_DEVICE);
20748c2ecf20Sopenharmony_ci	err = ib_dma_mapping_error(dev, req->sqe.dma);
20758c2ecf20Sopenharmony_ci	if (unlikely(err))
20768c2ecf20Sopenharmony_ci		return BLK_STS_RESOURCE;
20778c2ecf20Sopenharmony_ci
20788c2ecf20Sopenharmony_ci	ib_dma_sync_single_for_cpu(dev, sqe->dma,
20798c2ecf20Sopenharmony_ci			sizeof(struct nvme_command), DMA_TO_DEVICE);
20808c2ecf20Sopenharmony_ci
20818c2ecf20Sopenharmony_ci	ret = nvme_setup_cmd(ns, rq, c);
20828c2ecf20Sopenharmony_ci	if (ret)
20838c2ecf20Sopenharmony_ci		goto unmap_qe;
20848c2ecf20Sopenharmony_ci
20858c2ecf20Sopenharmony_ci	blk_mq_start_request(rq);
20868c2ecf20Sopenharmony_ci
20878c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
20888c2ecf20Sopenharmony_ci	    queue->pi_support &&
20898c2ecf20Sopenharmony_ci	    (c->common.opcode == nvme_cmd_write ||
20908c2ecf20Sopenharmony_ci	     c->common.opcode == nvme_cmd_read) &&
20918c2ecf20Sopenharmony_ci	    nvme_ns_has_pi(ns))
20928c2ecf20Sopenharmony_ci		req->use_sig_mr = true;
20938c2ecf20Sopenharmony_ci	else
20948c2ecf20Sopenharmony_ci		req->use_sig_mr = false;
20958c2ecf20Sopenharmony_ci
20968c2ecf20Sopenharmony_ci	err = nvme_rdma_map_data(queue, rq, c);
20978c2ecf20Sopenharmony_ci	if (unlikely(err < 0)) {
20988c2ecf20Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
20998c2ecf20Sopenharmony_ci			     "Failed to map data (%d)\n", err);
21008c2ecf20Sopenharmony_ci		goto err;
21018c2ecf20Sopenharmony_ci	}
21028c2ecf20Sopenharmony_ci
21038c2ecf20Sopenharmony_ci	sqe->cqe.done = nvme_rdma_send_done;
21048c2ecf20Sopenharmony_ci
21058c2ecf20Sopenharmony_ci	ib_dma_sync_single_for_device(dev, sqe->dma,
21068c2ecf20Sopenharmony_ci			sizeof(struct nvme_command), DMA_TO_DEVICE);
21078c2ecf20Sopenharmony_ci
21088c2ecf20Sopenharmony_ci	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
21098c2ecf20Sopenharmony_ci			req->mr ? &req->reg_wr.wr : NULL);
21108c2ecf20Sopenharmony_ci	if (unlikely(err))
21118c2ecf20Sopenharmony_ci		goto err_unmap;
21128c2ecf20Sopenharmony_ci
21138c2ecf20Sopenharmony_ci	return BLK_STS_OK;
21148c2ecf20Sopenharmony_ci
21158c2ecf20Sopenharmony_cierr_unmap:
21168c2ecf20Sopenharmony_ci	nvme_rdma_unmap_data(queue, rq);
21178c2ecf20Sopenharmony_cierr:
21188c2ecf20Sopenharmony_ci	if (err == -ENOMEM || err == -EAGAIN)
21198c2ecf20Sopenharmony_ci		ret = BLK_STS_RESOURCE;
21208c2ecf20Sopenharmony_ci	else
21218c2ecf20Sopenharmony_ci		ret = BLK_STS_IOERR;
21228c2ecf20Sopenharmony_ci	nvme_cleanup_cmd(rq);
21238c2ecf20Sopenharmony_ciunmap_qe:
21248c2ecf20Sopenharmony_ci	ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
21258c2ecf20Sopenharmony_ci			    DMA_TO_DEVICE);
21268c2ecf20Sopenharmony_ci	return ret;
21278c2ecf20Sopenharmony_ci}
21288c2ecf20Sopenharmony_ci
21298c2ecf20Sopenharmony_cistatic int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
21308c2ecf20Sopenharmony_ci{
21318c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = hctx->driver_data;
21328c2ecf20Sopenharmony_ci
21338c2ecf20Sopenharmony_ci	return ib_process_cq_direct(queue->ib_cq, -1);
21348c2ecf20Sopenharmony_ci}
21358c2ecf20Sopenharmony_ci
21368c2ecf20Sopenharmony_cistatic void nvme_rdma_check_pi_status(struct nvme_rdma_request *req)
21378c2ecf20Sopenharmony_ci{
21388c2ecf20Sopenharmony_ci	struct request *rq = blk_mq_rq_from_pdu(req);
21398c2ecf20Sopenharmony_ci	struct ib_mr_status mr_status;
21408c2ecf20Sopenharmony_ci	int ret;
21418c2ecf20Sopenharmony_ci
21428c2ecf20Sopenharmony_ci	ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
21438c2ecf20Sopenharmony_ci	if (ret) {
21448c2ecf20Sopenharmony_ci		pr_err("ib_check_mr_status failed, ret %d\n", ret);
21458c2ecf20Sopenharmony_ci		nvme_req(rq)->status = NVME_SC_INVALID_PI;
21468c2ecf20Sopenharmony_ci		return;
21478c2ecf20Sopenharmony_ci	}
21488c2ecf20Sopenharmony_ci
21498c2ecf20Sopenharmony_ci	if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
21508c2ecf20Sopenharmony_ci		switch (mr_status.sig_err.err_type) {
21518c2ecf20Sopenharmony_ci		case IB_SIG_BAD_GUARD:
21528c2ecf20Sopenharmony_ci			nvme_req(rq)->status = NVME_SC_GUARD_CHECK;
21538c2ecf20Sopenharmony_ci			break;
21548c2ecf20Sopenharmony_ci		case IB_SIG_BAD_REFTAG:
21558c2ecf20Sopenharmony_ci			nvme_req(rq)->status = NVME_SC_REFTAG_CHECK;
21568c2ecf20Sopenharmony_ci			break;
21578c2ecf20Sopenharmony_ci		case IB_SIG_BAD_APPTAG:
21588c2ecf20Sopenharmony_ci			nvme_req(rq)->status = NVME_SC_APPTAG_CHECK;
21598c2ecf20Sopenharmony_ci			break;
21608c2ecf20Sopenharmony_ci		}
21618c2ecf20Sopenharmony_ci		pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
21628c2ecf20Sopenharmony_ci		       mr_status.sig_err.err_type, mr_status.sig_err.expected,
21638c2ecf20Sopenharmony_ci		       mr_status.sig_err.actual);
21648c2ecf20Sopenharmony_ci	}
21658c2ecf20Sopenharmony_ci}
21668c2ecf20Sopenharmony_ci
21678c2ecf20Sopenharmony_cistatic void nvme_rdma_complete_rq(struct request *rq)
21688c2ecf20Sopenharmony_ci{
21698c2ecf20Sopenharmony_ci	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
21708c2ecf20Sopenharmony_ci	struct nvme_rdma_queue *queue = req->queue;
21718c2ecf20Sopenharmony_ci	struct ib_device *ibdev = queue->device->dev;
21728c2ecf20Sopenharmony_ci
21738c2ecf20Sopenharmony_ci	if (req->use_sig_mr)
21748c2ecf20Sopenharmony_ci		nvme_rdma_check_pi_status(req);
21758c2ecf20Sopenharmony_ci
21768c2ecf20Sopenharmony_ci	nvme_rdma_unmap_data(queue, rq);
21778c2ecf20Sopenharmony_ci	ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
21788c2ecf20Sopenharmony_ci			    DMA_TO_DEVICE);
21798c2ecf20Sopenharmony_ci	nvme_complete_rq(rq);
21808c2ecf20Sopenharmony_ci}
21818c2ecf20Sopenharmony_ci
21828c2ecf20Sopenharmony_cistatic int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
21838c2ecf20Sopenharmony_ci{
21848c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl = set->driver_data;
21858c2ecf20Sopenharmony_ci	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
21868c2ecf20Sopenharmony_ci
21878c2ecf20Sopenharmony_ci	if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
21888c2ecf20Sopenharmony_ci		/* separate read/write queues */
21898c2ecf20Sopenharmony_ci		set->map[HCTX_TYPE_DEFAULT].nr_queues =
21908c2ecf20Sopenharmony_ci			ctrl->io_queues[HCTX_TYPE_DEFAULT];
21918c2ecf20Sopenharmony_ci		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
21928c2ecf20Sopenharmony_ci		set->map[HCTX_TYPE_READ].nr_queues =
21938c2ecf20Sopenharmony_ci			ctrl->io_queues[HCTX_TYPE_READ];
21948c2ecf20Sopenharmony_ci		set->map[HCTX_TYPE_READ].queue_offset =
21958c2ecf20Sopenharmony_ci			ctrl->io_queues[HCTX_TYPE_DEFAULT];
21968c2ecf20Sopenharmony_ci	} else {
21978c2ecf20Sopenharmony_ci		/* shared read/write queues */
21988c2ecf20Sopenharmony_ci		set->map[HCTX_TYPE_DEFAULT].nr_queues =
21998c2ecf20Sopenharmony_ci			ctrl->io_queues[HCTX_TYPE_DEFAULT];
22008c2ecf20Sopenharmony_ci		set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
22018c2ecf20Sopenharmony_ci		set->map[HCTX_TYPE_READ].nr_queues =
22028c2ecf20Sopenharmony_ci			ctrl->io_queues[HCTX_TYPE_DEFAULT];
22038c2ecf20Sopenharmony_ci		set->map[HCTX_TYPE_READ].queue_offset = 0;
22048c2ecf20Sopenharmony_ci	}
22058c2ecf20Sopenharmony_ci	blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
22068c2ecf20Sopenharmony_ci			ctrl->device->dev, 0);
22078c2ecf20Sopenharmony_ci	blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
22088c2ecf20Sopenharmony_ci			ctrl->device->dev, 0);
22098c2ecf20Sopenharmony_ci
22108c2ecf20Sopenharmony_ci	if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
22118c2ecf20Sopenharmony_ci		/* map dedicated poll queues only if we have queues left */
22128c2ecf20Sopenharmony_ci		set->map[HCTX_TYPE_POLL].nr_queues =
22138c2ecf20Sopenharmony_ci				ctrl->io_queues[HCTX_TYPE_POLL];
22148c2ecf20Sopenharmony_ci		set->map[HCTX_TYPE_POLL].queue_offset =
22158c2ecf20Sopenharmony_ci			ctrl->io_queues[HCTX_TYPE_DEFAULT] +
22168c2ecf20Sopenharmony_ci			ctrl->io_queues[HCTX_TYPE_READ];
22178c2ecf20Sopenharmony_ci		blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
22188c2ecf20Sopenharmony_ci	}
22198c2ecf20Sopenharmony_ci
22208c2ecf20Sopenharmony_ci	dev_info(ctrl->ctrl.device,
22218c2ecf20Sopenharmony_ci		"mapped %d/%d/%d default/read/poll queues.\n",
22228c2ecf20Sopenharmony_ci		ctrl->io_queues[HCTX_TYPE_DEFAULT],
22238c2ecf20Sopenharmony_ci		ctrl->io_queues[HCTX_TYPE_READ],
22248c2ecf20Sopenharmony_ci		ctrl->io_queues[HCTX_TYPE_POLL]);
22258c2ecf20Sopenharmony_ci
22268c2ecf20Sopenharmony_ci	return 0;
22278c2ecf20Sopenharmony_ci}
22288c2ecf20Sopenharmony_ci
22298c2ecf20Sopenharmony_cistatic const struct blk_mq_ops nvme_rdma_mq_ops = {
22308c2ecf20Sopenharmony_ci	.queue_rq	= nvme_rdma_queue_rq,
22318c2ecf20Sopenharmony_ci	.complete	= nvme_rdma_complete_rq,
22328c2ecf20Sopenharmony_ci	.init_request	= nvme_rdma_init_request,
22338c2ecf20Sopenharmony_ci	.exit_request	= nvme_rdma_exit_request,
22348c2ecf20Sopenharmony_ci	.init_hctx	= nvme_rdma_init_hctx,
22358c2ecf20Sopenharmony_ci	.timeout	= nvme_rdma_timeout,
22368c2ecf20Sopenharmony_ci	.map_queues	= nvme_rdma_map_queues,
22378c2ecf20Sopenharmony_ci	.poll		= nvme_rdma_poll,
22388c2ecf20Sopenharmony_ci};
22398c2ecf20Sopenharmony_ci
22408c2ecf20Sopenharmony_cistatic const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
22418c2ecf20Sopenharmony_ci	.queue_rq	= nvme_rdma_queue_rq,
22428c2ecf20Sopenharmony_ci	.complete	= nvme_rdma_complete_rq,
22438c2ecf20Sopenharmony_ci	.init_request	= nvme_rdma_init_request,
22448c2ecf20Sopenharmony_ci	.exit_request	= nvme_rdma_exit_request,
22458c2ecf20Sopenharmony_ci	.init_hctx	= nvme_rdma_init_admin_hctx,
22468c2ecf20Sopenharmony_ci	.timeout	= nvme_rdma_timeout,
22478c2ecf20Sopenharmony_ci};
22488c2ecf20Sopenharmony_ci
22498c2ecf20Sopenharmony_cistatic void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
22508c2ecf20Sopenharmony_ci{
22518c2ecf20Sopenharmony_ci	nvme_rdma_teardown_io_queues(ctrl, shutdown);
22528c2ecf20Sopenharmony_ci	nvme_stop_admin_queue(&ctrl->ctrl);
22538c2ecf20Sopenharmony_ci	if (shutdown)
22548c2ecf20Sopenharmony_ci		nvme_shutdown_ctrl(&ctrl->ctrl);
22558c2ecf20Sopenharmony_ci	else
22568c2ecf20Sopenharmony_ci		nvme_disable_ctrl(&ctrl->ctrl);
22578c2ecf20Sopenharmony_ci	nvme_rdma_teardown_admin_queue(ctrl, shutdown);
22588c2ecf20Sopenharmony_ci}
22598c2ecf20Sopenharmony_ci
22608c2ecf20Sopenharmony_cistatic void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
22618c2ecf20Sopenharmony_ci{
22628c2ecf20Sopenharmony_ci	nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true);
22638c2ecf20Sopenharmony_ci}
22648c2ecf20Sopenharmony_ci
22658c2ecf20Sopenharmony_cistatic void nvme_rdma_reset_ctrl_work(struct work_struct *work)
22668c2ecf20Sopenharmony_ci{
22678c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl =
22688c2ecf20Sopenharmony_ci		container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
22698c2ecf20Sopenharmony_ci
22708c2ecf20Sopenharmony_ci	nvme_stop_ctrl(&ctrl->ctrl);
22718c2ecf20Sopenharmony_ci	nvme_rdma_shutdown_ctrl(ctrl, false);
22728c2ecf20Sopenharmony_ci
22738c2ecf20Sopenharmony_ci	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
22748c2ecf20Sopenharmony_ci		/* state change failure should never happen */
22758c2ecf20Sopenharmony_ci		WARN_ON_ONCE(1);
22768c2ecf20Sopenharmony_ci		return;
22778c2ecf20Sopenharmony_ci	}
22788c2ecf20Sopenharmony_ci
22798c2ecf20Sopenharmony_ci	if (nvme_rdma_setup_ctrl(ctrl, false))
22808c2ecf20Sopenharmony_ci		goto out_fail;
22818c2ecf20Sopenharmony_ci
22828c2ecf20Sopenharmony_ci	return;
22838c2ecf20Sopenharmony_ci
22848c2ecf20Sopenharmony_ciout_fail:
22858c2ecf20Sopenharmony_ci	++ctrl->ctrl.nr_reconnects;
22868c2ecf20Sopenharmony_ci	nvme_rdma_reconnect_or_remove(ctrl);
22878c2ecf20Sopenharmony_ci}
22888c2ecf20Sopenharmony_ci
22898c2ecf20Sopenharmony_cistatic const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
22908c2ecf20Sopenharmony_ci	.name			= "rdma",
22918c2ecf20Sopenharmony_ci	.module			= THIS_MODULE,
22928c2ecf20Sopenharmony_ci	.flags			= NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED,
22938c2ecf20Sopenharmony_ci	.reg_read32		= nvmf_reg_read32,
22948c2ecf20Sopenharmony_ci	.reg_read64		= nvmf_reg_read64,
22958c2ecf20Sopenharmony_ci	.reg_write32		= nvmf_reg_write32,
22968c2ecf20Sopenharmony_ci	.free_ctrl		= nvme_rdma_free_ctrl,
22978c2ecf20Sopenharmony_ci	.submit_async_event	= nvme_rdma_submit_async_event,
22988c2ecf20Sopenharmony_ci	.delete_ctrl		= nvme_rdma_delete_ctrl,
22998c2ecf20Sopenharmony_ci	.get_address		= nvmf_get_address,
23008c2ecf20Sopenharmony_ci	.stop_ctrl		= nvme_rdma_stop_ctrl,
23018c2ecf20Sopenharmony_ci};
23028c2ecf20Sopenharmony_ci
23038c2ecf20Sopenharmony_ci/*
23048c2ecf20Sopenharmony_ci * Fails a connection request if it matches an existing controller
23058c2ecf20Sopenharmony_ci * (association) with the same tuple:
23068c2ecf20Sopenharmony_ci * <Host NQN, Host ID, local address, remote address, remote port, SUBSYS NQN>
23078c2ecf20Sopenharmony_ci *
23088c2ecf20Sopenharmony_ci * if local address is not specified in the request, it will match an
23098c2ecf20Sopenharmony_ci * existing controller with all the other parameters the same and no
23108c2ecf20Sopenharmony_ci * local port address specified as well.
23118c2ecf20Sopenharmony_ci *
23128c2ecf20Sopenharmony_ci * The ports don't need to be compared as they are intrinsically
23138c2ecf20Sopenharmony_ci * already matched by the port pointers supplied.
23148c2ecf20Sopenharmony_ci */
23158c2ecf20Sopenharmony_cistatic bool
23168c2ecf20Sopenharmony_cinvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
23178c2ecf20Sopenharmony_ci{
23188c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl;
23198c2ecf20Sopenharmony_ci	bool found = false;
23208c2ecf20Sopenharmony_ci
23218c2ecf20Sopenharmony_ci	mutex_lock(&nvme_rdma_ctrl_mutex);
23228c2ecf20Sopenharmony_ci	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
23238c2ecf20Sopenharmony_ci		found = nvmf_ip_options_match(&ctrl->ctrl, opts);
23248c2ecf20Sopenharmony_ci		if (found)
23258c2ecf20Sopenharmony_ci			break;
23268c2ecf20Sopenharmony_ci	}
23278c2ecf20Sopenharmony_ci	mutex_unlock(&nvme_rdma_ctrl_mutex);
23288c2ecf20Sopenharmony_ci
23298c2ecf20Sopenharmony_ci	return found;
23308c2ecf20Sopenharmony_ci}
23318c2ecf20Sopenharmony_ci
23328c2ecf20Sopenharmony_cistatic struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
23338c2ecf20Sopenharmony_ci		struct nvmf_ctrl_options *opts)
23348c2ecf20Sopenharmony_ci{
23358c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl;
23368c2ecf20Sopenharmony_ci	int ret;
23378c2ecf20Sopenharmony_ci	bool changed;
23388c2ecf20Sopenharmony_ci
23398c2ecf20Sopenharmony_ci	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
23408c2ecf20Sopenharmony_ci	if (!ctrl)
23418c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
23428c2ecf20Sopenharmony_ci	ctrl->ctrl.opts = opts;
23438c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&ctrl->list);
23448c2ecf20Sopenharmony_ci
23458c2ecf20Sopenharmony_ci	if (!(opts->mask & NVMF_OPT_TRSVCID)) {
23468c2ecf20Sopenharmony_ci		opts->trsvcid =
23478c2ecf20Sopenharmony_ci			kstrdup(__stringify(NVME_RDMA_IP_PORT), GFP_KERNEL);
23488c2ecf20Sopenharmony_ci		if (!opts->trsvcid) {
23498c2ecf20Sopenharmony_ci			ret = -ENOMEM;
23508c2ecf20Sopenharmony_ci			goto out_free_ctrl;
23518c2ecf20Sopenharmony_ci		}
23528c2ecf20Sopenharmony_ci		opts->mask |= NVMF_OPT_TRSVCID;
23538c2ecf20Sopenharmony_ci	}
23548c2ecf20Sopenharmony_ci
23558c2ecf20Sopenharmony_ci	ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
23568c2ecf20Sopenharmony_ci			opts->traddr, opts->trsvcid, &ctrl->addr);
23578c2ecf20Sopenharmony_ci	if (ret) {
23588c2ecf20Sopenharmony_ci		pr_err("malformed address passed: %s:%s\n",
23598c2ecf20Sopenharmony_ci			opts->traddr, opts->trsvcid);
23608c2ecf20Sopenharmony_ci		goto out_free_ctrl;
23618c2ecf20Sopenharmony_ci	}
23628c2ecf20Sopenharmony_ci
23638c2ecf20Sopenharmony_ci	if (opts->mask & NVMF_OPT_HOST_TRADDR) {
23648c2ecf20Sopenharmony_ci		ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
23658c2ecf20Sopenharmony_ci			opts->host_traddr, NULL, &ctrl->src_addr);
23668c2ecf20Sopenharmony_ci		if (ret) {
23678c2ecf20Sopenharmony_ci			pr_err("malformed src address passed: %s\n",
23688c2ecf20Sopenharmony_ci			       opts->host_traddr);
23698c2ecf20Sopenharmony_ci			goto out_free_ctrl;
23708c2ecf20Sopenharmony_ci		}
23718c2ecf20Sopenharmony_ci	}
23728c2ecf20Sopenharmony_ci
23738c2ecf20Sopenharmony_ci	if (!opts->duplicate_connect && nvme_rdma_existing_controller(opts)) {
23748c2ecf20Sopenharmony_ci		ret = -EALREADY;
23758c2ecf20Sopenharmony_ci		goto out_free_ctrl;
23768c2ecf20Sopenharmony_ci	}
23778c2ecf20Sopenharmony_ci
23788c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&ctrl->reconnect_work,
23798c2ecf20Sopenharmony_ci			nvme_rdma_reconnect_ctrl_work);
23808c2ecf20Sopenharmony_ci	INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
23818c2ecf20Sopenharmony_ci	INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
23828c2ecf20Sopenharmony_ci
23838c2ecf20Sopenharmony_ci	ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
23848c2ecf20Sopenharmony_ci				opts->nr_poll_queues + 1;
23858c2ecf20Sopenharmony_ci	ctrl->ctrl.sqsize = opts->queue_size - 1;
23868c2ecf20Sopenharmony_ci	ctrl->ctrl.kato = opts->kato;
23878c2ecf20Sopenharmony_ci
23888c2ecf20Sopenharmony_ci	ret = -ENOMEM;
23898c2ecf20Sopenharmony_ci	ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
23908c2ecf20Sopenharmony_ci				GFP_KERNEL);
23918c2ecf20Sopenharmony_ci	if (!ctrl->queues)
23928c2ecf20Sopenharmony_ci		goto out_free_ctrl;
23938c2ecf20Sopenharmony_ci
23948c2ecf20Sopenharmony_ci	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
23958c2ecf20Sopenharmony_ci				0 /* no quirks, we're perfect! */);
23968c2ecf20Sopenharmony_ci	if (ret)
23978c2ecf20Sopenharmony_ci		goto out_kfree_queues;
23988c2ecf20Sopenharmony_ci
23998c2ecf20Sopenharmony_ci	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
24008c2ecf20Sopenharmony_ci	WARN_ON_ONCE(!changed);
24018c2ecf20Sopenharmony_ci
24028c2ecf20Sopenharmony_ci	ret = nvme_rdma_setup_ctrl(ctrl, true);
24038c2ecf20Sopenharmony_ci	if (ret)
24048c2ecf20Sopenharmony_ci		goto out_uninit_ctrl;
24058c2ecf20Sopenharmony_ci
24068c2ecf20Sopenharmony_ci	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
24078c2ecf20Sopenharmony_ci		ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
24088c2ecf20Sopenharmony_ci
24098c2ecf20Sopenharmony_ci	mutex_lock(&nvme_rdma_ctrl_mutex);
24108c2ecf20Sopenharmony_ci	list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
24118c2ecf20Sopenharmony_ci	mutex_unlock(&nvme_rdma_ctrl_mutex);
24128c2ecf20Sopenharmony_ci
24138c2ecf20Sopenharmony_ci	return &ctrl->ctrl;
24148c2ecf20Sopenharmony_ci
24158c2ecf20Sopenharmony_ciout_uninit_ctrl:
24168c2ecf20Sopenharmony_ci	nvme_uninit_ctrl(&ctrl->ctrl);
24178c2ecf20Sopenharmony_ci	nvme_put_ctrl(&ctrl->ctrl);
24188c2ecf20Sopenharmony_ci	if (ret > 0)
24198c2ecf20Sopenharmony_ci		ret = -EIO;
24208c2ecf20Sopenharmony_ci	return ERR_PTR(ret);
24218c2ecf20Sopenharmony_ciout_kfree_queues:
24228c2ecf20Sopenharmony_ci	kfree(ctrl->queues);
24238c2ecf20Sopenharmony_ciout_free_ctrl:
24248c2ecf20Sopenharmony_ci	kfree(ctrl);
24258c2ecf20Sopenharmony_ci	return ERR_PTR(ret);
24268c2ecf20Sopenharmony_ci}
24278c2ecf20Sopenharmony_ci
24288c2ecf20Sopenharmony_cistatic struct nvmf_transport_ops nvme_rdma_transport = {
24298c2ecf20Sopenharmony_ci	.name		= "rdma",
24308c2ecf20Sopenharmony_ci	.module		= THIS_MODULE,
24318c2ecf20Sopenharmony_ci	.required_opts	= NVMF_OPT_TRADDR,
24328c2ecf20Sopenharmony_ci	.allowed_opts	= NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
24338c2ecf20Sopenharmony_ci			  NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
24348c2ecf20Sopenharmony_ci			  NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
24358c2ecf20Sopenharmony_ci			  NVMF_OPT_TOS,
24368c2ecf20Sopenharmony_ci	.create_ctrl	= nvme_rdma_create_ctrl,
24378c2ecf20Sopenharmony_ci};
24388c2ecf20Sopenharmony_ci
24398c2ecf20Sopenharmony_cistatic void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
24408c2ecf20Sopenharmony_ci{
24418c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl;
24428c2ecf20Sopenharmony_ci	struct nvme_rdma_device *ndev;
24438c2ecf20Sopenharmony_ci	bool found = false;
24448c2ecf20Sopenharmony_ci
24458c2ecf20Sopenharmony_ci	mutex_lock(&device_list_mutex);
24468c2ecf20Sopenharmony_ci	list_for_each_entry(ndev, &device_list, entry) {
24478c2ecf20Sopenharmony_ci		if (ndev->dev == ib_device) {
24488c2ecf20Sopenharmony_ci			found = true;
24498c2ecf20Sopenharmony_ci			break;
24508c2ecf20Sopenharmony_ci		}
24518c2ecf20Sopenharmony_ci	}
24528c2ecf20Sopenharmony_ci	mutex_unlock(&device_list_mutex);
24538c2ecf20Sopenharmony_ci
24548c2ecf20Sopenharmony_ci	if (!found)
24558c2ecf20Sopenharmony_ci		return;
24568c2ecf20Sopenharmony_ci
24578c2ecf20Sopenharmony_ci	/* Delete all controllers using this device */
24588c2ecf20Sopenharmony_ci	mutex_lock(&nvme_rdma_ctrl_mutex);
24598c2ecf20Sopenharmony_ci	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
24608c2ecf20Sopenharmony_ci		if (ctrl->device->dev != ib_device)
24618c2ecf20Sopenharmony_ci			continue;
24628c2ecf20Sopenharmony_ci		nvme_delete_ctrl(&ctrl->ctrl);
24638c2ecf20Sopenharmony_ci	}
24648c2ecf20Sopenharmony_ci	mutex_unlock(&nvme_rdma_ctrl_mutex);
24658c2ecf20Sopenharmony_ci
24668c2ecf20Sopenharmony_ci	flush_workqueue(nvme_delete_wq);
24678c2ecf20Sopenharmony_ci}
24688c2ecf20Sopenharmony_ci
24698c2ecf20Sopenharmony_cistatic struct ib_client nvme_rdma_ib_client = {
24708c2ecf20Sopenharmony_ci	.name   = "nvme_rdma",
24718c2ecf20Sopenharmony_ci	.remove = nvme_rdma_remove_one
24728c2ecf20Sopenharmony_ci};
24738c2ecf20Sopenharmony_ci
24748c2ecf20Sopenharmony_cistatic int __init nvme_rdma_init_module(void)
24758c2ecf20Sopenharmony_ci{
24768c2ecf20Sopenharmony_ci	int ret;
24778c2ecf20Sopenharmony_ci
24788c2ecf20Sopenharmony_ci	ret = ib_register_client(&nvme_rdma_ib_client);
24798c2ecf20Sopenharmony_ci	if (ret)
24808c2ecf20Sopenharmony_ci		return ret;
24818c2ecf20Sopenharmony_ci
24828c2ecf20Sopenharmony_ci	ret = nvmf_register_transport(&nvme_rdma_transport);
24838c2ecf20Sopenharmony_ci	if (ret)
24848c2ecf20Sopenharmony_ci		goto err_unreg_client;
24858c2ecf20Sopenharmony_ci
24868c2ecf20Sopenharmony_ci	return 0;
24878c2ecf20Sopenharmony_ci
24888c2ecf20Sopenharmony_cierr_unreg_client:
24898c2ecf20Sopenharmony_ci	ib_unregister_client(&nvme_rdma_ib_client);
24908c2ecf20Sopenharmony_ci	return ret;
24918c2ecf20Sopenharmony_ci}
24928c2ecf20Sopenharmony_ci
24938c2ecf20Sopenharmony_cistatic void __exit nvme_rdma_cleanup_module(void)
24948c2ecf20Sopenharmony_ci{
24958c2ecf20Sopenharmony_ci	struct nvme_rdma_ctrl *ctrl;
24968c2ecf20Sopenharmony_ci
24978c2ecf20Sopenharmony_ci	nvmf_unregister_transport(&nvme_rdma_transport);
24988c2ecf20Sopenharmony_ci	ib_unregister_client(&nvme_rdma_ib_client);
24998c2ecf20Sopenharmony_ci
25008c2ecf20Sopenharmony_ci	mutex_lock(&nvme_rdma_ctrl_mutex);
25018c2ecf20Sopenharmony_ci	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
25028c2ecf20Sopenharmony_ci		nvme_delete_ctrl(&ctrl->ctrl);
25038c2ecf20Sopenharmony_ci	mutex_unlock(&nvme_rdma_ctrl_mutex);
25048c2ecf20Sopenharmony_ci	flush_workqueue(nvme_delete_wq);
25058c2ecf20Sopenharmony_ci}
25068c2ecf20Sopenharmony_ci
25078c2ecf20Sopenharmony_cimodule_init(nvme_rdma_init_module);
25088c2ecf20Sopenharmony_cimodule_exit(nvme_rdma_cleanup_module);
25098c2ecf20Sopenharmony_ci
25108c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2");
2511