162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * NVMe over Fabrics RDMA host code. 462306a36Sopenharmony_ci * Copyright (c) 2015-2016 HGST, a Western Digital Company. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 762306a36Sopenharmony_ci#include <linux/module.h> 862306a36Sopenharmony_ci#include <linux/init.h> 962306a36Sopenharmony_ci#include <linux/slab.h> 1062306a36Sopenharmony_ci#include <rdma/mr_pool.h> 1162306a36Sopenharmony_ci#include <linux/err.h> 1262306a36Sopenharmony_ci#include <linux/string.h> 1362306a36Sopenharmony_ci#include <linux/atomic.h> 1462306a36Sopenharmony_ci#include <linux/blk-mq.h> 1562306a36Sopenharmony_ci#include <linux/blk-integrity.h> 1662306a36Sopenharmony_ci#include <linux/types.h> 1762306a36Sopenharmony_ci#include <linux/list.h> 1862306a36Sopenharmony_ci#include <linux/mutex.h> 1962306a36Sopenharmony_ci#include <linux/scatterlist.h> 2062306a36Sopenharmony_ci#include <linux/nvme.h> 2162306a36Sopenharmony_ci#include <asm/unaligned.h> 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#include <rdma/ib_verbs.h> 2462306a36Sopenharmony_ci#include <rdma/rdma_cm.h> 2562306a36Sopenharmony_ci#include <linux/nvme-rdma.h> 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#include "nvme.h" 2862306a36Sopenharmony_ci#include "fabrics.h" 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#define NVME_RDMA_CM_TIMEOUT_MS 3000 /* 3 second */ 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#define NVME_RDMA_MAX_SEGMENTS 256 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci#define NVME_RDMA_MAX_INLINE_SEGMENTS 4 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci#define NVME_RDMA_DATA_SGL_SIZE \ 3862306a36Sopenharmony_ci (sizeof(struct scatterlist) * NVME_INLINE_SG_CNT) 3962306a36Sopenharmony_ci#define NVME_RDMA_METADATA_SGL_SIZE \ 4062306a36Sopenharmony_ci (sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT) 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_cistruct nvme_rdma_device { 4362306a36Sopenharmony_ci struct ib_device *dev; 4462306a36Sopenharmony_ci struct ib_pd *pd; 4562306a36Sopenharmony_ci struct kref ref; 4662306a36Sopenharmony_ci struct list_head entry; 4762306a36Sopenharmony_ci unsigned int num_inline_segments; 4862306a36Sopenharmony_ci}; 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_cistruct nvme_rdma_qe { 5162306a36Sopenharmony_ci struct ib_cqe cqe; 5262306a36Sopenharmony_ci void *data; 5362306a36Sopenharmony_ci u64 dma; 5462306a36Sopenharmony_ci}; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cistruct nvme_rdma_sgl { 5762306a36Sopenharmony_ci int nents; 5862306a36Sopenharmony_ci struct sg_table sg_table; 5962306a36Sopenharmony_ci}; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_cistruct nvme_rdma_queue; 6262306a36Sopenharmony_cistruct nvme_rdma_request { 6362306a36Sopenharmony_ci struct nvme_request req; 6462306a36Sopenharmony_ci struct ib_mr *mr; 6562306a36Sopenharmony_ci struct nvme_rdma_qe sqe; 6662306a36Sopenharmony_ci union nvme_result result; 6762306a36Sopenharmony_ci __le16 status; 6862306a36Sopenharmony_ci refcount_t ref; 6962306a36Sopenharmony_ci struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; 7062306a36Sopenharmony_ci u32 num_sge; 7162306a36Sopenharmony_ci struct ib_reg_wr reg_wr; 7262306a36Sopenharmony_ci struct ib_cqe reg_cqe; 7362306a36Sopenharmony_ci struct nvme_rdma_queue *queue; 7462306a36Sopenharmony_ci struct nvme_rdma_sgl data_sgl; 7562306a36Sopenharmony_ci struct nvme_rdma_sgl *metadata_sgl; 7662306a36Sopenharmony_ci bool use_sig_mr; 7762306a36Sopenharmony_ci}; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_cienum nvme_rdma_queue_flags { 8062306a36Sopenharmony_ci NVME_RDMA_Q_ALLOCATED = 0, 8162306a36Sopenharmony_ci NVME_RDMA_Q_LIVE = 1, 8262306a36Sopenharmony_ci NVME_RDMA_Q_TR_READY = 2, 8362306a36Sopenharmony_ci}; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_cistruct nvme_rdma_queue { 8662306a36Sopenharmony_ci struct nvme_rdma_qe *rsp_ring; 8762306a36Sopenharmony_ci int queue_size; 8862306a36Sopenharmony_ci size_t cmnd_capsule_len; 8962306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl; 9062306a36Sopenharmony_ci struct nvme_rdma_device *device; 9162306a36Sopenharmony_ci struct ib_cq *ib_cq; 9262306a36Sopenharmony_ci struct ib_qp *qp; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci unsigned long flags; 9562306a36Sopenharmony_ci struct rdma_cm_id *cm_id; 9662306a36Sopenharmony_ci int cm_error; 9762306a36Sopenharmony_ci struct completion cm_done; 9862306a36Sopenharmony_ci bool pi_support; 9962306a36Sopenharmony_ci int cq_size; 10062306a36Sopenharmony_ci struct mutex queue_lock; 10162306a36Sopenharmony_ci}; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_cistruct nvme_rdma_ctrl { 10462306a36Sopenharmony_ci /* read only in the hot path */ 10562306a36Sopenharmony_ci struct nvme_rdma_queue *queues; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci /* other member variables */ 10862306a36Sopenharmony_ci struct blk_mq_tag_set tag_set; 10962306a36Sopenharmony_ci struct work_struct err_work; 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci struct nvme_rdma_qe async_event_sqe; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci struct delayed_work reconnect_work; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci struct list_head list; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci struct blk_mq_tag_set admin_tag_set; 11862306a36Sopenharmony_ci struct nvme_rdma_device *device; 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci u32 max_fr_pages; 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci struct sockaddr_storage addr; 12362306a36Sopenharmony_ci struct sockaddr_storage src_addr; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci struct nvme_ctrl ctrl; 12662306a36Sopenharmony_ci bool use_inline_data; 12762306a36Sopenharmony_ci u32 io_queues[HCTX_MAX_TYPES]; 12862306a36Sopenharmony_ci}; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_cistatic inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl) 13162306a36Sopenharmony_ci{ 13262306a36Sopenharmony_ci return container_of(ctrl, struct nvme_rdma_ctrl, ctrl); 13362306a36Sopenharmony_ci} 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_cistatic LIST_HEAD(device_list); 13662306a36Sopenharmony_cistatic DEFINE_MUTEX(device_list_mutex); 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_cistatic LIST_HEAD(nvme_rdma_ctrl_list); 13962306a36Sopenharmony_cistatic DEFINE_MUTEX(nvme_rdma_ctrl_mutex); 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci/* 14262306a36Sopenharmony_ci * Disabling this option makes small I/O goes faster, but is fundamentally 14362306a36Sopenharmony_ci * unsafe. With it turned off we will have to register a global rkey that 14462306a36Sopenharmony_ci * allows read and write access to all physical memory. 14562306a36Sopenharmony_ci */ 14662306a36Sopenharmony_cistatic bool register_always = true; 14762306a36Sopenharmony_cimodule_param(register_always, bool, 0444); 14862306a36Sopenharmony_ciMODULE_PARM_DESC(register_always, 14962306a36Sopenharmony_ci "Use memory registration even for contiguous memory regions"); 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_cistatic int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, 15262306a36Sopenharmony_ci struct rdma_cm_event *event); 15362306a36Sopenharmony_cistatic void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); 15462306a36Sopenharmony_cistatic void nvme_rdma_complete_rq(struct request *rq); 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_rdma_mq_ops; 15762306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_rdma_admin_mq_ops; 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_cistatic inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue) 16062306a36Sopenharmony_ci{ 16162306a36Sopenharmony_ci return queue - queue->ctrl->queues; 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_cistatic bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue) 16562306a36Sopenharmony_ci{ 16662306a36Sopenharmony_ci return nvme_rdma_queue_idx(queue) > 16762306a36Sopenharmony_ci queue->ctrl->io_queues[HCTX_TYPE_DEFAULT] + 16862306a36Sopenharmony_ci queue->ctrl->io_queues[HCTX_TYPE_READ]; 16962306a36Sopenharmony_ci} 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_cistatic inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci return queue->cmnd_capsule_len - sizeof(struct nvme_command); 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_cistatic void nvme_rdma_free_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe, 17762306a36Sopenharmony_ci size_t capsule_size, enum dma_data_direction dir) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci ib_dma_unmap_single(ibdev, qe->dma, capsule_size, dir); 18062306a36Sopenharmony_ci kfree(qe->data); 18162306a36Sopenharmony_ci} 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_cistatic int nvme_rdma_alloc_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe, 18462306a36Sopenharmony_ci size_t capsule_size, enum dma_data_direction dir) 18562306a36Sopenharmony_ci{ 18662306a36Sopenharmony_ci qe->data = kzalloc(capsule_size, GFP_KERNEL); 18762306a36Sopenharmony_ci if (!qe->data) 18862306a36Sopenharmony_ci return -ENOMEM; 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir); 19162306a36Sopenharmony_ci if (ib_dma_mapping_error(ibdev, qe->dma)) { 19262306a36Sopenharmony_ci kfree(qe->data); 19362306a36Sopenharmony_ci qe->data = NULL; 19462306a36Sopenharmony_ci return -ENOMEM; 19562306a36Sopenharmony_ci } 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci return 0; 19862306a36Sopenharmony_ci} 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_cistatic void nvme_rdma_free_ring(struct ib_device *ibdev, 20162306a36Sopenharmony_ci struct nvme_rdma_qe *ring, size_t ib_queue_size, 20262306a36Sopenharmony_ci size_t capsule_size, enum dma_data_direction dir) 20362306a36Sopenharmony_ci{ 20462306a36Sopenharmony_ci int i; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci for (i = 0; i < ib_queue_size; i++) 20762306a36Sopenharmony_ci nvme_rdma_free_qe(ibdev, &ring[i], capsule_size, dir); 20862306a36Sopenharmony_ci kfree(ring); 20962306a36Sopenharmony_ci} 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_cistatic struct nvme_rdma_qe *nvme_rdma_alloc_ring(struct ib_device *ibdev, 21262306a36Sopenharmony_ci size_t ib_queue_size, size_t capsule_size, 21362306a36Sopenharmony_ci enum dma_data_direction dir) 21462306a36Sopenharmony_ci{ 21562306a36Sopenharmony_ci struct nvme_rdma_qe *ring; 21662306a36Sopenharmony_ci int i; 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci ring = kcalloc(ib_queue_size, sizeof(struct nvme_rdma_qe), GFP_KERNEL); 21962306a36Sopenharmony_ci if (!ring) 22062306a36Sopenharmony_ci return NULL; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci /* 22362306a36Sopenharmony_ci * Bind the CQEs (post recv buffers) DMA mapping to the RDMA queue 22462306a36Sopenharmony_ci * lifetime. It's safe, since any chage in the underlying RDMA device 22562306a36Sopenharmony_ci * will issue error recovery and queue re-creation. 22662306a36Sopenharmony_ci */ 22762306a36Sopenharmony_ci for (i = 0; i < ib_queue_size; i++) { 22862306a36Sopenharmony_ci if (nvme_rdma_alloc_qe(ibdev, &ring[i], capsule_size, dir)) 22962306a36Sopenharmony_ci goto out_free_ring; 23062306a36Sopenharmony_ci } 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci return ring; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ciout_free_ring: 23562306a36Sopenharmony_ci nvme_rdma_free_ring(ibdev, ring, i, capsule_size, dir); 23662306a36Sopenharmony_ci return NULL; 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_cistatic void nvme_rdma_qp_event(struct ib_event *event, void *context) 24062306a36Sopenharmony_ci{ 24162306a36Sopenharmony_ci pr_debug("QP event %s (%d)\n", 24262306a36Sopenharmony_ci ib_event_msg(event->event), event->event); 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_cistatic int nvme_rdma_wait_for_cm(struct nvme_rdma_queue *queue) 24762306a36Sopenharmony_ci{ 24862306a36Sopenharmony_ci int ret; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci ret = wait_for_completion_interruptible(&queue->cm_done); 25162306a36Sopenharmony_ci if (ret) 25262306a36Sopenharmony_ci return ret; 25362306a36Sopenharmony_ci WARN_ON_ONCE(queue->cm_error > 0); 25462306a36Sopenharmony_ci return queue->cm_error; 25562306a36Sopenharmony_ci} 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_cistatic int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) 25862306a36Sopenharmony_ci{ 25962306a36Sopenharmony_ci struct nvme_rdma_device *dev = queue->device; 26062306a36Sopenharmony_ci struct ib_qp_init_attr init_attr; 26162306a36Sopenharmony_ci int ret; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci memset(&init_attr, 0, sizeof(init_attr)); 26462306a36Sopenharmony_ci init_attr.event_handler = nvme_rdma_qp_event; 26562306a36Sopenharmony_ci /* +1 for drain */ 26662306a36Sopenharmony_ci init_attr.cap.max_send_wr = factor * queue->queue_size + 1; 26762306a36Sopenharmony_ci /* +1 for drain */ 26862306a36Sopenharmony_ci init_attr.cap.max_recv_wr = queue->queue_size + 1; 26962306a36Sopenharmony_ci init_attr.cap.max_recv_sge = 1; 27062306a36Sopenharmony_ci init_attr.cap.max_send_sge = 1 + dev->num_inline_segments; 27162306a36Sopenharmony_ci init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 27262306a36Sopenharmony_ci init_attr.qp_type = IB_QPT_RC; 27362306a36Sopenharmony_ci init_attr.send_cq = queue->ib_cq; 27462306a36Sopenharmony_ci init_attr.recv_cq = queue->ib_cq; 27562306a36Sopenharmony_ci if (queue->pi_support) 27662306a36Sopenharmony_ci init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; 27762306a36Sopenharmony_ci init_attr.qp_context = queue; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr); 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci queue->qp = queue->cm_id->qp; 28262306a36Sopenharmony_ci return ret; 28362306a36Sopenharmony_ci} 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_cistatic void nvme_rdma_exit_request(struct blk_mq_tag_set *set, 28662306a36Sopenharmony_ci struct request *rq, unsigned int hctx_idx) 28762306a36Sopenharmony_ci{ 28862306a36Sopenharmony_ci struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci kfree(req->sqe.data); 29162306a36Sopenharmony_ci} 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_cistatic int nvme_rdma_init_request(struct blk_mq_tag_set *set, 29462306a36Sopenharmony_ci struct request *rq, unsigned int hctx_idx, 29562306a36Sopenharmony_ci unsigned int numa_node) 29662306a36Sopenharmony_ci{ 29762306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); 29862306a36Sopenharmony_ci struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 29962306a36Sopenharmony_ci int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; 30062306a36Sopenharmony_ci struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci nvme_req(rq)->ctrl = &ctrl->ctrl; 30362306a36Sopenharmony_ci req->sqe.data = kzalloc(sizeof(struct nvme_command), GFP_KERNEL); 30462306a36Sopenharmony_ci if (!req->sqe.data) 30562306a36Sopenharmony_ci return -ENOMEM; 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci /* metadata nvme_rdma_sgl struct is located after command's data SGL */ 30862306a36Sopenharmony_ci if (queue->pi_support) 30962306a36Sopenharmony_ci req->metadata_sgl = (void *)nvme_req(rq) + 31062306a36Sopenharmony_ci sizeof(struct nvme_rdma_request) + 31162306a36Sopenharmony_ci NVME_RDMA_DATA_SGL_SIZE; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci req->queue = queue; 31462306a36Sopenharmony_ci nvme_req(rq)->cmd = req->sqe.data; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci return 0; 31762306a36Sopenharmony_ci} 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_cistatic int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 32062306a36Sopenharmony_ci unsigned int hctx_idx) 32162306a36Sopenharmony_ci{ 32262306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data); 32362306a36Sopenharmony_ci struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1]; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci hctx->driver_data = queue; 32862306a36Sopenharmony_ci return 0; 32962306a36Sopenharmony_ci} 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_cistatic int nvme_rdma_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, 33262306a36Sopenharmony_ci unsigned int hctx_idx) 33362306a36Sopenharmony_ci{ 33462306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data); 33562306a36Sopenharmony_ci struct nvme_rdma_queue *queue = &ctrl->queues[0]; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci BUG_ON(hctx_idx != 0); 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci hctx->driver_data = queue; 34062306a36Sopenharmony_ci return 0; 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cistatic void nvme_rdma_free_dev(struct kref *ref) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci struct nvme_rdma_device *ndev = 34662306a36Sopenharmony_ci container_of(ref, struct nvme_rdma_device, ref); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci mutex_lock(&device_list_mutex); 34962306a36Sopenharmony_ci list_del(&ndev->entry); 35062306a36Sopenharmony_ci mutex_unlock(&device_list_mutex); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci ib_dealloc_pd(ndev->pd); 35362306a36Sopenharmony_ci kfree(ndev); 35462306a36Sopenharmony_ci} 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_cistatic void nvme_rdma_dev_put(struct nvme_rdma_device *dev) 35762306a36Sopenharmony_ci{ 35862306a36Sopenharmony_ci kref_put(&dev->ref, nvme_rdma_free_dev); 35962306a36Sopenharmony_ci} 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_cistatic int nvme_rdma_dev_get(struct nvme_rdma_device *dev) 36262306a36Sopenharmony_ci{ 36362306a36Sopenharmony_ci return kref_get_unless_zero(&dev->ref); 36462306a36Sopenharmony_ci} 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_cistatic struct nvme_rdma_device * 36762306a36Sopenharmony_cinvme_rdma_find_get_device(struct rdma_cm_id *cm_id) 36862306a36Sopenharmony_ci{ 36962306a36Sopenharmony_ci struct nvme_rdma_device *ndev; 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci mutex_lock(&device_list_mutex); 37262306a36Sopenharmony_ci list_for_each_entry(ndev, &device_list, entry) { 37362306a36Sopenharmony_ci if (ndev->dev->node_guid == cm_id->device->node_guid && 37462306a36Sopenharmony_ci nvme_rdma_dev_get(ndev)) 37562306a36Sopenharmony_ci goto out_unlock; 37662306a36Sopenharmony_ci } 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); 37962306a36Sopenharmony_ci if (!ndev) 38062306a36Sopenharmony_ci goto out_err; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci ndev->dev = cm_id->device; 38362306a36Sopenharmony_ci kref_init(&ndev->ref); 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci ndev->pd = ib_alloc_pd(ndev->dev, 38662306a36Sopenharmony_ci register_always ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); 38762306a36Sopenharmony_ci if (IS_ERR(ndev->pd)) 38862306a36Sopenharmony_ci goto out_free_dev; 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci if (!(ndev->dev->attrs.device_cap_flags & 39162306a36Sopenharmony_ci IB_DEVICE_MEM_MGT_EXTENSIONS)) { 39262306a36Sopenharmony_ci dev_err(&ndev->dev->dev, 39362306a36Sopenharmony_ci "Memory registrations not supported.\n"); 39462306a36Sopenharmony_ci goto out_free_pd; 39562306a36Sopenharmony_ci } 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS, 39862306a36Sopenharmony_ci ndev->dev->attrs.max_send_sge - 1); 39962306a36Sopenharmony_ci list_add(&ndev->entry, &device_list); 40062306a36Sopenharmony_ciout_unlock: 40162306a36Sopenharmony_ci mutex_unlock(&device_list_mutex); 40262306a36Sopenharmony_ci return ndev; 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ciout_free_pd: 40562306a36Sopenharmony_ci ib_dealloc_pd(ndev->pd); 40662306a36Sopenharmony_ciout_free_dev: 40762306a36Sopenharmony_ci kfree(ndev); 40862306a36Sopenharmony_ciout_err: 40962306a36Sopenharmony_ci mutex_unlock(&device_list_mutex); 41062306a36Sopenharmony_ci return NULL; 41162306a36Sopenharmony_ci} 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_cistatic void nvme_rdma_free_cq(struct nvme_rdma_queue *queue) 41462306a36Sopenharmony_ci{ 41562306a36Sopenharmony_ci if (nvme_rdma_poll_queue(queue)) 41662306a36Sopenharmony_ci ib_free_cq(queue->ib_cq); 41762306a36Sopenharmony_ci else 41862306a36Sopenharmony_ci ib_cq_pool_put(queue->ib_cq, queue->cq_size); 41962306a36Sopenharmony_ci} 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_cistatic void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) 42262306a36Sopenharmony_ci{ 42362306a36Sopenharmony_ci struct nvme_rdma_device *dev; 42462306a36Sopenharmony_ci struct ib_device *ibdev; 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags)) 42762306a36Sopenharmony_ci return; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci dev = queue->device; 43062306a36Sopenharmony_ci ibdev = dev->dev; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci if (queue->pi_support) 43362306a36Sopenharmony_ci ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs); 43462306a36Sopenharmony_ci ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci /* 43762306a36Sopenharmony_ci * The cm_id object might have been destroyed during RDMA connection 43862306a36Sopenharmony_ci * establishment error flow to avoid getting other cma events, thus 43962306a36Sopenharmony_ci * the destruction of the QP shouldn't use rdma_cm API. 44062306a36Sopenharmony_ci */ 44162306a36Sopenharmony_ci ib_destroy_qp(queue->qp); 44262306a36Sopenharmony_ci nvme_rdma_free_cq(queue); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, 44562306a36Sopenharmony_ci sizeof(struct nvme_completion), DMA_FROM_DEVICE); 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci nvme_rdma_dev_put(dev); 44862306a36Sopenharmony_ci} 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_cistatic int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support) 45162306a36Sopenharmony_ci{ 45262306a36Sopenharmony_ci u32 max_page_list_len; 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci if (pi_support) 45562306a36Sopenharmony_ci max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len; 45662306a36Sopenharmony_ci else 45762306a36Sopenharmony_ci max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len; 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1); 46062306a36Sopenharmony_ci} 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_cistatic int nvme_rdma_create_cq(struct ib_device *ibdev, 46362306a36Sopenharmony_ci struct nvme_rdma_queue *queue) 46462306a36Sopenharmony_ci{ 46562306a36Sopenharmony_ci int ret, comp_vector, idx = nvme_rdma_queue_idx(queue); 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci /* 46862306a36Sopenharmony_ci * Spread I/O queues completion vectors according their queue index. 46962306a36Sopenharmony_ci * Admin queues can always go on completion vector 0. 47062306a36Sopenharmony_ci */ 47162306a36Sopenharmony_ci comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors; 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci /* Polling queues need direct cq polling context */ 47462306a36Sopenharmony_ci if (nvme_rdma_poll_queue(queue)) 47562306a36Sopenharmony_ci queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size, 47662306a36Sopenharmony_ci comp_vector, IB_POLL_DIRECT); 47762306a36Sopenharmony_ci else 47862306a36Sopenharmony_ci queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size, 47962306a36Sopenharmony_ci comp_vector, IB_POLL_SOFTIRQ); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci if (IS_ERR(queue->ib_cq)) { 48262306a36Sopenharmony_ci ret = PTR_ERR(queue->ib_cq); 48362306a36Sopenharmony_ci return ret; 48462306a36Sopenharmony_ci } 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci return 0; 48762306a36Sopenharmony_ci} 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_cistatic int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) 49062306a36Sopenharmony_ci{ 49162306a36Sopenharmony_ci struct ib_device *ibdev; 49262306a36Sopenharmony_ci const int send_wr_factor = 3; /* MR, SEND, INV */ 49362306a36Sopenharmony_ci const int cq_factor = send_wr_factor + 1; /* + RECV */ 49462306a36Sopenharmony_ci int ret, pages_per_mr; 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci queue->device = nvme_rdma_find_get_device(queue->cm_id); 49762306a36Sopenharmony_ci if (!queue->device) { 49862306a36Sopenharmony_ci dev_err(queue->cm_id->device->dev.parent, 49962306a36Sopenharmony_ci "no client data found!\n"); 50062306a36Sopenharmony_ci return -ECONNREFUSED; 50162306a36Sopenharmony_ci } 50262306a36Sopenharmony_ci ibdev = queue->device->dev; 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci /* +1 for ib_drain_qp */ 50562306a36Sopenharmony_ci queue->cq_size = cq_factor * queue->queue_size + 1; 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci ret = nvme_rdma_create_cq(ibdev, queue); 50862306a36Sopenharmony_ci if (ret) 50962306a36Sopenharmony_ci goto out_put_dev; 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci ret = nvme_rdma_create_qp(queue, send_wr_factor); 51262306a36Sopenharmony_ci if (ret) 51362306a36Sopenharmony_ci goto out_destroy_ib_cq; 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci queue->rsp_ring = nvme_rdma_alloc_ring(ibdev, queue->queue_size, 51662306a36Sopenharmony_ci sizeof(struct nvme_completion), DMA_FROM_DEVICE); 51762306a36Sopenharmony_ci if (!queue->rsp_ring) { 51862306a36Sopenharmony_ci ret = -ENOMEM; 51962306a36Sopenharmony_ci goto out_destroy_qp; 52062306a36Sopenharmony_ci } 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci /* 52362306a36Sopenharmony_ci * Currently we don't use SG_GAPS MR's so if the first entry is 52462306a36Sopenharmony_ci * misaligned we'll end up using two entries for a single data page, 52562306a36Sopenharmony_ci * so one additional entry is required. 52662306a36Sopenharmony_ci */ 52762306a36Sopenharmony_ci pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1; 52862306a36Sopenharmony_ci ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, 52962306a36Sopenharmony_ci queue->queue_size, 53062306a36Sopenharmony_ci IB_MR_TYPE_MEM_REG, 53162306a36Sopenharmony_ci pages_per_mr, 0); 53262306a36Sopenharmony_ci if (ret) { 53362306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 53462306a36Sopenharmony_ci "failed to initialize MR pool sized %d for QID %d\n", 53562306a36Sopenharmony_ci queue->queue_size, nvme_rdma_queue_idx(queue)); 53662306a36Sopenharmony_ci goto out_destroy_ring; 53762306a36Sopenharmony_ci } 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci if (queue->pi_support) { 54062306a36Sopenharmony_ci ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs, 54162306a36Sopenharmony_ci queue->queue_size, IB_MR_TYPE_INTEGRITY, 54262306a36Sopenharmony_ci pages_per_mr, pages_per_mr); 54362306a36Sopenharmony_ci if (ret) { 54462306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 54562306a36Sopenharmony_ci "failed to initialize PI MR pool sized %d for QID %d\n", 54662306a36Sopenharmony_ci queue->queue_size, nvme_rdma_queue_idx(queue)); 54762306a36Sopenharmony_ci goto out_destroy_mr_pool; 54862306a36Sopenharmony_ci } 54962306a36Sopenharmony_ci } 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci set_bit(NVME_RDMA_Q_TR_READY, &queue->flags); 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci return 0; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ciout_destroy_mr_pool: 55662306a36Sopenharmony_ci ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); 55762306a36Sopenharmony_ciout_destroy_ring: 55862306a36Sopenharmony_ci nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, 55962306a36Sopenharmony_ci sizeof(struct nvme_completion), DMA_FROM_DEVICE); 56062306a36Sopenharmony_ciout_destroy_qp: 56162306a36Sopenharmony_ci rdma_destroy_qp(queue->cm_id); 56262306a36Sopenharmony_ciout_destroy_ib_cq: 56362306a36Sopenharmony_ci nvme_rdma_free_cq(queue); 56462306a36Sopenharmony_ciout_put_dev: 56562306a36Sopenharmony_ci nvme_rdma_dev_put(queue->device); 56662306a36Sopenharmony_ci return ret; 56762306a36Sopenharmony_ci} 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_cistatic int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, 57062306a36Sopenharmony_ci int idx, size_t queue_size) 57162306a36Sopenharmony_ci{ 57262306a36Sopenharmony_ci struct nvme_rdma_queue *queue; 57362306a36Sopenharmony_ci struct sockaddr *src_addr = NULL; 57462306a36Sopenharmony_ci int ret; 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci queue = &ctrl->queues[idx]; 57762306a36Sopenharmony_ci mutex_init(&queue->queue_lock); 57862306a36Sopenharmony_ci queue->ctrl = ctrl; 57962306a36Sopenharmony_ci if (idx && ctrl->ctrl.max_integrity_segments) 58062306a36Sopenharmony_ci queue->pi_support = true; 58162306a36Sopenharmony_ci else 58262306a36Sopenharmony_ci queue->pi_support = false; 58362306a36Sopenharmony_ci init_completion(&queue->cm_done); 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci if (idx > 0) 58662306a36Sopenharmony_ci queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; 58762306a36Sopenharmony_ci else 58862306a36Sopenharmony_ci queue->cmnd_capsule_len = sizeof(struct nvme_command); 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci queue->queue_size = queue_size; 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, 59362306a36Sopenharmony_ci RDMA_PS_TCP, IB_QPT_RC); 59462306a36Sopenharmony_ci if (IS_ERR(queue->cm_id)) { 59562306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, 59662306a36Sopenharmony_ci "failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id)); 59762306a36Sopenharmony_ci ret = PTR_ERR(queue->cm_id); 59862306a36Sopenharmony_ci goto out_destroy_mutex; 59962306a36Sopenharmony_ci } 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) 60262306a36Sopenharmony_ci src_addr = (struct sockaddr *)&ctrl->src_addr; 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci queue->cm_error = -ETIMEDOUT; 60562306a36Sopenharmony_ci ret = rdma_resolve_addr(queue->cm_id, src_addr, 60662306a36Sopenharmony_ci (struct sockaddr *)&ctrl->addr, 60762306a36Sopenharmony_ci NVME_RDMA_CM_TIMEOUT_MS); 60862306a36Sopenharmony_ci if (ret) { 60962306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, 61062306a36Sopenharmony_ci "rdma_resolve_addr failed (%d).\n", ret); 61162306a36Sopenharmony_ci goto out_destroy_cm_id; 61262306a36Sopenharmony_ci } 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci ret = nvme_rdma_wait_for_cm(queue); 61562306a36Sopenharmony_ci if (ret) { 61662306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, 61762306a36Sopenharmony_ci "rdma connection establishment failed (%d)\n", ret); 61862306a36Sopenharmony_ci goto out_destroy_cm_id; 61962306a36Sopenharmony_ci } 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags); 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci return 0; 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ciout_destroy_cm_id: 62662306a36Sopenharmony_ci rdma_destroy_id(queue->cm_id); 62762306a36Sopenharmony_ci nvme_rdma_destroy_queue_ib(queue); 62862306a36Sopenharmony_ciout_destroy_mutex: 62962306a36Sopenharmony_ci mutex_destroy(&queue->queue_lock); 63062306a36Sopenharmony_ci return ret; 63162306a36Sopenharmony_ci} 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_cistatic void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) 63462306a36Sopenharmony_ci{ 63562306a36Sopenharmony_ci rdma_disconnect(queue->cm_id); 63662306a36Sopenharmony_ci ib_drain_qp(queue->qp); 63762306a36Sopenharmony_ci} 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_cistatic void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) 64062306a36Sopenharmony_ci{ 64162306a36Sopenharmony_ci if (!test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) 64262306a36Sopenharmony_ci return; 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci mutex_lock(&queue->queue_lock); 64562306a36Sopenharmony_ci if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) 64662306a36Sopenharmony_ci __nvme_rdma_stop_queue(queue); 64762306a36Sopenharmony_ci mutex_unlock(&queue->queue_lock); 64862306a36Sopenharmony_ci} 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_cistatic void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) 65162306a36Sopenharmony_ci{ 65262306a36Sopenharmony_ci if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) 65362306a36Sopenharmony_ci return; 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci rdma_destroy_id(queue->cm_id); 65662306a36Sopenharmony_ci nvme_rdma_destroy_queue_ib(queue); 65762306a36Sopenharmony_ci mutex_destroy(&queue->queue_lock); 65862306a36Sopenharmony_ci} 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_cistatic void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) 66162306a36Sopenharmony_ci{ 66262306a36Sopenharmony_ci int i; 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci for (i = 1; i < ctrl->ctrl.queue_count; i++) 66562306a36Sopenharmony_ci nvme_rdma_free_queue(&ctrl->queues[i]); 66662306a36Sopenharmony_ci} 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_cistatic void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl) 66962306a36Sopenharmony_ci{ 67062306a36Sopenharmony_ci int i; 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci for (i = 1; i < ctrl->ctrl.queue_count; i++) 67362306a36Sopenharmony_ci nvme_rdma_stop_queue(&ctrl->queues[i]); 67462306a36Sopenharmony_ci} 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_cistatic int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) 67762306a36Sopenharmony_ci{ 67862306a36Sopenharmony_ci struct nvme_rdma_queue *queue = &ctrl->queues[idx]; 67962306a36Sopenharmony_ci int ret; 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci if (idx) 68262306a36Sopenharmony_ci ret = nvmf_connect_io_queue(&ctrl->ctrl, idx); 68362306a36Sopenharmony_ci else 68462306a36Sopenharmony_ci ret = nvmf_connect_admin_queue(&ctrl->ctrl); 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_ci if (!ret) { 68762306a36Sopenharmony_ci set_bit(NVME_RDMA_Q_LIVE, &queue->flags); 68862306a36Sopenharmony_ci } else { 68962306a36Sopenharmony_ci if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) 69062306a36Sopenharmony_ci __nvme_rdma_stop_queue(queue); 69162306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, 69262306a36Sopenharmony_ci "failed to connect queue: %d ret=%d\n", idx, ret); 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci return ret; 69562306a36Sopenharmony_ci} 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_cistatic int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl, 69862306a36Sopenharmony_ci int first, int last) 69962306a36Sopenharmony_ci{ 70062306a36Sopenharmony_ci int i, ret = 0; 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci for (i = first; i < last; i++) { 70362306a36Sopenharmony_ci ret = nvme_rdma_start_queue(ctrl, i); 70462306a36Sopenharmony_ci if (ret) 70562306a36Sopenharmony_ci goto out_stop_queues; 70662306a36Sopenharmony_ci } 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci return 0; 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ciout_stop_queues: 71162306a36Sopenharmony_ci for (i--; i >= first; i--) 71262306a36Sopenharmony_ci nvme_rdma_stop_queue(&ctrl->queues[i]); 71362306a36Sopenharmony_ci return ret; 71462306a36Sopenharmony_ci} 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_cistatic int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) 71762306a36Sopenharmony_ci{ 71862306a36Sopenharmony_ci struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 71962306a36Sopenharmony_ci unsigned int nr_io_queues; 72062306a36Sopenharmony_ci int i, ret; 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci nr_io_queues = nvmf_nr_io_queues(opts); 72362306a36Sopenharmony_ci ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 72462306a36Sopenharmony_ci if (ret) 72562306a36Sopenharmony_ci return ret; 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci if (nr_io_queues == 0) { 72862306a36Sopenharmony_ci dev_err(ctrl->ctrl.device, 72962306a36Sopenharmony_ci "unable to set any I/O queues\n"); 73062306a36Sopenharmony_ci return -ENOMEM; 73162306a36Sopenharmony_ci } 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci ctrl->ctrl.queue_count = nr_io_queues + 1; 73462306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, 73562306a36Sopenharmony_ci "creating %d I/O queues.\n", nr_io_queues); 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci nvmf_set_io_queues(opts, nr_io_queues, ctrl->io_queues); 73862306a36Sopenharmony_ci for (i = 1; i < ctrl->ctrl.queue_count; i++) { 73962306a36Sopenharmony_ci ret = nvme_rdma_alloc_queue(ctrl, i, 74062306a36Sopenharmony_ci ctrl->ctrl.sqsize + 1); 74162306a36Sopenharmony_ci if (ret) 74262306a36Sopenharmony_ci goto out_free_queues; 74362306a36Sopenharmony_ci } 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci return 0; 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ciout_free_queues: 74862306a36Sopenharmony_ci for (i--; i >= 1; i--) 74962306a36Sopenharmony_ci nvme_rdma_free_queue(&ctrl->queues[i]); 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci return ret; 75262306a36Sopenharmony_ci} 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_cistatic int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl) 75562306a36Sopenharmony_ci{ 75662306a36Sopenharmony_ci unsigned int cmd_size = sizeof(struct nvme_rdma_request) + 75762306a36Sopenharmony_ci NVME_RDMA_DATA_SGL_SIZE; 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci if (ctrl->max_integrity_segments) 76062306a36Sopenharmony_ci cmd_size += sizeof(struct nvme_rdma_sgl) + 76162306a36Sopenharmony_ci NVME_RDMA_METADATA_SGL_SIZE; 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set, 76462306a36Sopenharmony_ci &nvme_rdma_mq_ops, 76562306a36Sopenharmony_ci ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2, 76662306a36Sopenharmony_ci cmd_size); 76762306a36Sopenharmony_ci} 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_cistatic void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) 77062306a36Sopenharmony_ci{ 77162306a36Sopenharmony_ci if (ctrl->async_event_sqe.data) { 77262306a36Sopenharmony_ci cancel_work_sync(&ctrl->ctrl.async_event_work); 77362306a36Sopenharmony_ci nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, 77462306a36Sopenharmony_ci sizeof(struct nvme_command), DMA_TO_DEVICE); 77562306a36Sopenharmony_ci ctrl->async_event_sqe.data = NULL; 77662306a36Sopenharmony_ci } 77762306a36Sopenharmony_ci nvme_rdma_free_queue(&ctrl->queues[0]); 77862306a36Sopenharmony_ci} 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_cistatic int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, 78162306a36Sopenharmony_ci bool new) 78262306a36Sopenharmony_ci{ 78362306a36Sopenharmony_ci bool pi_capable = false; 78462306a36Sopenharmony_ci int error; 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); 78762306a36Sopenharmony_ci if (error) 78862306a36Sopenharmony_ci return error; 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci ctrl->device = ctrl->queues[0].device; 79162306a36Sopenharmony_ci ctrl->ctrl.numa_node = ibdev_to_node(ctrl->device->dev); 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci /* T10-PI support */ 79462306a36Sopenharmony_ci if (ctrl->device->dev->attrs.kernel_cap_flags & 79562306a36Sopenharmony_ci IBK_INTEGRITY_HANDOVER) 79662306a36Sopenharmony_ci pi_capable = true; 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev, 79962306a36Sopenharmony_ci pi_capable); 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci /* 80262306a36Sopenharmony_ci * Bind the async event SQE DMA mapping to the admin queue lifetime. 80362306a36Sopenharmony_ci * It's safe, since any chage in the underlying RDMA device will issue 80462306a36Sopenharmony_ci * error recovery and queue re-creation. 80562306a36Sopenharmony_ci */ 80662306a36Sopenharmony_ci error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe, 80762306a36Sopenharmony_ci sizeof(struct nvme_command), DMA_TO_DEVICE); 80862306a36Sopenharmony_ci if (error) 80962306a36Sopenharmony_ci goto out_free_queue; 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci if (new) { 81262306a36Sopenharmony_ci error = nvme_alloc_admin_tag_set(&ctrl->ctrl, 81362306a36Sopenharmony_ci &ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops, 81462306a36Sopenharmony_ci sizeof(struct nvme_rdma_request) + 81562306a36Sopenharmony_ci NVME_RDMA_DATA_SGL_SIZE); 81662306a36Sopenharmony_ci if (error) 81762306a36Sopenharmony_ci goto out_free_async_qe; 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci } 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci error = nvme_rdma_start_queue(ctrl, 0); 82262306a36Sopenharmony_ci if (error) 82362306a36Sopenharmony_ci goto out_remove_admin_tag_set; 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci error = nvme_enable_ctrl(&ctrl->ctrl); 82662306a36Sopenharmony_ci if (error) 82762306a36Sopenharmony_ci goto out_stop_queue; 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci ctrl->ctrl.max_segments = ctrl->max_fr_pages; 83062306a36Sopenharmony_ci ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9); 83162306a36Sopenharmony_ci if (pi_capable) 83262306a36Sopenharmony_ci ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages; 83362306a36Sopenharmony_ci else 83462306a36Sopenharmony_ci ctrl->ctrl.max_integrity_segments = 0; 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci nvme_unquiesce_admin_queue(&ctrl->ctrl); 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ci error = nvme_init_ctrl_finish(&ctrl->ctrl, false); 83962306a36Sopenharmony_ci if (error) 84062306a36Sopenharmony_ci goto out_quiesce_queue; 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci return 0; 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ciout_quiesce_queue: 84562306a36Sopenharmony_ci nvme_quiesce_admin_queue(&ctrl->ctrl); 84662306a36Sopenharmony_ci blk_sync_queue(ctrl->ctrl.admin_q); 84762306a36Sopenharmony_ciout_stop_queue: 84862306a36Sopenharmony_ci nvme_rdma_stop_queue(&ctrl->queues[0]); 84962306a36Sopenharmony_ci nvme_cancel_admin_tagset(&ctrl->ctrl); 85062306a36Sopenharmony_ciout_remove_admin_tag_set: 85162306a36Sopenharmony_ci if (new) 85262306a36Sopenharmony_ci nvme_remove_admin_tag_set(&ctrl->ctrl); 85362306a36Sopenharmony_ciout_free_async_qe: 85462306a36Sopenharmony_ci if (ctrl->async_event_sqe.data) { 85562306a36Sopenharmony_ci nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, 85662306a36Sopenharmony_ci sizeof(struct nvme_command), DMA_TO_DEVICE); 85762306a36Sopenharmony_ci ctrl->async_event_sqe.data = NULL; 85862306a36Sopenharmony_ci } 85962306a36Sopenharmony_ciout_free_queue: 86062306a36Sopenharmony_ci nvme_rdma_free_queue(&ctrl->queues[0]); 86162306a36Sopenharmony_ci return error; 86262306a36Sopenharmony_ci} 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_cistatic int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) 86562306a36Sopenharmony_ci{ 86662306a36Sopenharmony_ci int ret, nr_queues; 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci ret = nvme_rdma_alloc_io_queues(ctrl); 86962306a36Sopenharmony_ci if (ret) 87062306a36Sopenharmony_ci return ret; 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci if (new) { 87362306a36Sopenharmony_ci ret = nvme_rdma_alloc_tag_set(&ctrl->ctrl); 87462306a36Sopenharmony_ci if (ret) 87562306a36Sopenharmony_ci goto out_free_io_queues; 87662306a36Sopenharmony_ci } 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci /* 87962306a36Sopenharmony_ci * Only start IO queues for which we have allocated the tagset 88062306a36Sopenharmony_ci * and limitted it to the available queues. On reconnects, the 88162306a36Sopenharmony_ci * queue number might have changed. 88262306a36Sopenharmony_ci */ 88362306a36Sopenharmony_ci nr_queues = min(ctrl->tag_set.nr_hw_queues + 1, ctrl->ctrl.queue_count); 88462306a36Sopenharmony_ci ret = nvme_rdma_start_io_queues(ctrl, 1, nr_queues); 88562306a36Sopenharmony_ci if (ret) 88662306a36Sopenharmony_ci goto out_cleanup_tagset; 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci if (!new) { 88962306a36Sopenharmony_ci nvme_start_freeze(&ctrl->ctrl); 89062306a36Sopenharmony_ci nvme_unquiesce_io_queues(&ctrl->ctrl); 89162306a36Sopenharmony_ci if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) { 89262306a36Sopenharmony_ci /* 89362306a36Sopenharmony_ci * If we timed out waiting for freeze we are likely to 89462306a36Sopenharmony_ci * be stuck. Fail the controller initialization just 89562306a36Sopenharmony_ci * to be safe. 89662306a36Sopenharmony_ci */ 89762306a36Sopenharmony_ci ret = -ENODEV; 89862306a36Sopenharmony_ci nvme_unfreeze(&ctrl->ctrl); 89962306a36Sopenharmony_ci goto out_wait_freeze_timed_out; 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset, 90262306a36Sopenharmony_ci ctrl->ctrl.queue_count - 1); 90362306a36Sopenharmony_ci nvme_unfreeze(&ctrl->ctrl); 90462306a36Sopenharmony_ci } 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci /* 90762306a36Sopenharmony_ci * If the number of queues has increased (reconnect case) 90862306a36Sopenharmony_ci * start all new queues now. 90962306a36Sopenharmony_ci */ 91062306a36Sopenharmony_ci ret = nvme_rdma_start_io_queues(ctrl, nr_queues, 91162306a36Sopenharmony_ci ctrl->tag_set.nr_hw_queues + 1); 91262306a36Sopenharmony_ci if (ret) 91362306a36Sopenharmony_ci goto out_wait_freeze_timed_out; 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci return 0; 91662306a36Sopenharmony_ci 91762306a36Sopenharmony_ciout_wait_freeze_timed_out: 91862306a36Sopenharmony_ci nvme_quiesce_io_queues(&ctrl->ctrl); 91962306a36Sopenharmony_ci nvme_sync_io_queues(&ctrl->ctrl); 92062306a36Sopenharmony_ci nvme_rdma_stop_io_queues(ctrl); 92162306a36Sopenharmony_ciout_cleanup_tagset: 92262306a36Sopenharmony_ci nvme_cancel_tagset(&ctrl->ctrl); 92362306a36Sopenharmony_ci if (new) 92462306a36Sopenharmony_ci nvme_remove_io_tag_set(&ctrl->ctrl); 92562306a36Sopenharmony_ciout_free_io_queues: 92662306a36Sopenharmony_ci nvme_rdma_free_io_queues(ctrl); 92762306a36Sopenharmony_ci return ret; 92862306a36Sopenharmony_ci} 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_cistatic void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, 93162306a36Sopenharmony_ci bool remove) 93262306a36Sopenharmony_ci{ 93362306a36Sopenharmony_ci nvme_quiesce_admin_queue(&ctrl->ctrl); 93462306a36Sopenharmony_ci blk_sync_queue(ctrl->ctrl.admin_q); 93562306a36Sopenharmony_ci nvme_rdma_stop_queue(&ctrl->queues[0]); 93662306a36Sopenharmony_ci nvme_cancel_admin_tagset(&ctrl->ctrl); 93762306a36Sopenharmony_ci if (remove) { 93862306a36Sopenharmony_ci nvme_unquiesce_admin_queue(&ctrl->ctrl); 93962306a36Sopenharmony_ci nvme_remove_admin_tag_set(&ctrl->ctrl); 94062306a36Sopenharmony_ci } 94162306a36Sopenharmony_ci nvme_rdma_destroy_admin_queue(ctrl); 94262306a36Sopenharmony_ci} 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_cistatic void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, 94562306a36Sopenharmony_ci bool remove) 94662306a36Sopenharmony_ci{ 94762306a36Sopenharmony_ci if (ctrl->ctrl.queue_count > 1) { 94862306a36Sopenharmony_ci nvme_quiesce_io_queues(&ctrl->ctrl); 94962306a36Sopenharmony_ci nvme_sync_io_queues(&ctrl->ctrl); 95062306a36Sopenharmony_ci nvme_rdma_stop_io_queues(ctrl); 95162306a36Sopenharmony_ci nvme_cancel_tagset(&ctrl->ctrl); 95262306a36Sopenharmony_ci if (remove) { 95362306a36Sopenharmony_ci nvme_unquiesce_io_queues(&ctrl->ctrl); 95462306a36Sopenharmony_ci nvme_remove_io_tag_set(&ctrl->ctrl); 95562306a36Sopenharmony_ci } 95662306a36Sopenharmony_ci nvme_rdma_free_io_queues(ctrl); 95762306a36Sopenharmony_ci } 95862306a36Sopenharmony_ci} 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_cistatic void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) 96162306a36Sopenharmony_ci{ 96262306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci flush_work(&ctrl->err_work); 96562306a36Sopenharmony_ci cancel_delayed_work_sync(&ctrl->reconnect_work); 96662306a36Sopenharmony_ci} 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_cistatic void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) 96962306a36Sopenharmony_ci{ 97062306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_ci if (list_empty(&ctrl->list)) 97362306a36Sopenharmony_ci goto free_ctrl; 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci mutex_lock(&nvme_rdma_ctrl_mutex); 97662306a36Sopenharmony_ci list_del(&ctrl->list); 97762306a36Sopenharmony_ci mutex_unlock(&nvme_rdma_ctrl_mutex); 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci nvmf_free_options(nctrl->opts); 98062306a36Sopenharmony_cifree_ctrl: 98162306a36Sopenharmony_ci kfree(ctrl->queues); 98262306a36Sopenharmony_ci kfree(ctrl); 98362306a36Sopenharmony_ci} 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_cistatic void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) 98662306a36Sopenharmony_ci{ 98762306a36Sopenharmony_ci enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci /* If we are resetting/deleting then do nothing */ 99062306a36Sopenharmony_ci if (state != NVME_CTRL_CONNECTING) { 99162306a36Sopenharmony_ci WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE); 99262306a36Sopenharmony_ci return; 99362306a36Sopenharmony_ci } 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_ci if (nvmf_should_reconnect(&ctrl->ctrl)) { 99662306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n", 99762306a36Sopenharmony_ci ctrl->ctrl.opts->reconnect_delay); 99862306a36Sopenharmony_ci queue_delayed_work(nvme_wq, &ctrl->reconnect_work, 99962306a36Sopenharmony_ci ctrl->ctrl.opts->reconnect_delay * HZ); 100062306a36Sopenharmony_ci } else { 100162306a36Sopenharmony_ci nvme_delete_ctrl(&ctrl->ctrl); 100262306a36Sopenharmony_ci } 100362306a36Sopenharmony_ci} 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_cistatic int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) 100662306a36Sopenharmony_ci{ 100762306a36Sopenharmony_ci int ret; 100862306a36Sopenharmony_ci bool changed; 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci ret = nvme_rdma_configure_admin_queue(ctrl, new); 101162306a36Sopenharmony_ci if (ret) 101262306a36Sopenharmony_ci return ret; 101362306a36Sopenharmony_ci 101462306a36Sopenharmony_ci if (ctrl->ctrl.icdoff) { 101562306a36Sopenharmony_ci ret = -EOPNOTSUPP; 101662306a36Sopenharmony_ci dev_err(ctrl->ctrl.device, "icdoff is not supported!\n"); 101762306a36Sopenharmony_ci goto destroy_admin; 101862306a36Sopenharmony_ci } 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_ci if (!(ctrl->ctrl.sgls & (1 << 2))) { 102162306a36Sopenharmony_ci ret = -EOPNOTSUPP; 102262306a36Sopenharmony_ci dev_err(ctrl->ctrl.device, 102362306a36Sopenharmony_ci "Mandatory keyed sgls are not supported!\n"); 102462306a36Sopenharmony_ci goto destroy_admin; 102562306a36Sopenharmony_ci } 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ci if (ctrl->ctrl.opts->queue_size > ctrl->ctrl.sqsize + 1) { 102862306a36Sopenharmony_ci dev_warn(ctrl->ctrl.device, 102962306a36Sopenharmony_ci "queue_size %zu > ctrl sqsize %u, clamping down\n", 103062306a36Sopenharmony_ci ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1); 103162306a36Sopenharmony_ci } 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ci if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) { 103462306a36Sopenharmony_ci dev_warn(ctrl->ctrl.device, 103562306a36Sopenharmony_ci "ctrl sqsize %u > max queue size %u, clamping down\n", 103662306a36Sopenharmony_ci ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE); 103762306a36Sopenharmony_ci ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1; 103862306a36Sopenharmony_ci } 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { 104162306a36Sopenharmony_ci dev_warn(ctrl->ctrl.device, 104262306a36Sopenharmony_ci "sqsize %u > ctrl maxcmd %u, clamping down\n", 104362306a36Sopenharmony_ci ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd); 104462306a36Sopenharmony_ci ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1; 104562306a36Sopenharmony_ci } 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci if (ctrl->ctrl.sgls & (1 << 20)) 104862306a36Sopenharmony_ci ctrl->use_inline_data = true; 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_ci if (ctrl->ctrl.queue_count > 1) { 105162306a36Sopenharmony_ci ret = nvme_rdma_configure_io_queues(ctrl, new); 105262306a36Sopenharmony_ci if (ret) 105362306a36Sopenharmony_ci goto destroy_admin; 105462306a36Sopenharmony_ci } 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 105762306a36Sopenharmony_ci if (!changed) { 105862306a36Sopenharmony_ci /* 105962306a36Sopenharmony_ci * state change failure is ok if we started ctrl delete, 106062306a36Sopenharmony_ci * unless we're during creation of a new controller to 106162306a36Sopenharmony_ci * avoid races with teardown flow. 106262306a36Sopenharmony_ci */ 106362306a36Sopenharmony_ci enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci WARN_ON_ONCE(state != NVME_CTRL_DELETING && 106662306a36Sopenharmony_ci state != NVME_CTRL_DELETING_NOIO); 106762306a36Sopenharmony_ci WARN_ON_ONCE(new); 106862306a36Sopenharmony_ci ret = -EINVAL; 106962306a36Sopenharmony_ci goto destroy_io; 107062306a36Sopenharmony_ci } 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci nvme_start_ctrl(&ctrl->ctrl); 107362306a36Sopenharmony_ci return 0; 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_cidestroy_io: 107662306a36Sopenharmony_ci if (ctrl->ctrl.queue_count > 1) { 107762306a36Sopenharmony_ci nvme_quiesce_io_queues(&ctrl->ctrl); 107862306a36Sopenharmony_ci nvme_sync_io_queues(&ctrl->ctrl); 107962306a36Sopenharmony_ci nvme_rdma_stop_io_queues(ctrl); 108062306a36Sopenharmony_ci nvme_cancel_tagset(&ctrl->ctrl); 108162306a36Sopenharmony_ci if (new) 108262306a36Sopenharmony_ci nvme_remove_io_tag_set(&ctrl->ctrl); 108362306a36Sopenharmony_ci nvme_rdma_free_io_queues(ctrl); 108462306a36Sopenharmony_ci } 108562306a36Sopenharmony_cidestroy_admin: 108662306a36Sopenharmony_ci nvme_quiesce_admin_queue(&ctrl->ctrl); 108762306a36Sopenharmony_ci blk_sync_queue(ctrl->ctrl.admin_q); 108862306a36Sopenharmony_ci nvme_rdma_stop_queue(&ctrl->queues[0]); 108962306a36Sopenharmony_ci nvme_cancel_admin_tagset(&ctrl->ctrl); 109062306a36Sopenharmony_ci if (new) 109162306a36Sopenharmony_ci nvme_remove_admin_tag_set(&ctrl->ctrl); 109262306a36Sopenharmony_ci nvme_rdma_destroy_admin_queue(ctrl); 109362306a36Sopenharmony_ci return ret; 109462306a36Sopenharmony_ci} 109562306a36Sopenharmony_ci 109662306a36Sopenharmony_cistatic void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) 109762306a36Sopenharmony_ci{ 109862306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work), 109962306a36Sopenharmony_ci struct nvme_rdma_ctrl, reconnect_work); 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_ci ++ctrl->ctrl.nr_reconnects; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci if (nvme_rdma_setup_ctrl(ctrl, false)) 110462306a36Sopenharmony_ci goto requeue; 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n", 110762306a36Sopenharmony_ci ctrl->ctrl.nr_reconnects); 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci ctrl->ctrl.nr_reconnects = 0; 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci return; 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_cirequeue: 111462306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", 111562306a36Sopenharmony_ci ctrl->ctrl.nr_reconnects); 111662306a36Sopenharmony_ci nvme_rdma_reconnect_or_remove(ctrl); 111762306a36Sopenharmony_ci} 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_cistatic void nvme_rdma_error_recovery_work(struct work_struct *work) 112062306a36Sopenharmony_ci{ 112162306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = container_of(work, 112262306a36Sopenharmony_ci struct nvme_rdma_ctrl, err_work); 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_ci nvme_stop_keep_alive(&ctrl->ctrl); 112562306a36Sopenharmony_ci flush_work(&ctrl->ctrl.async_event_work); 112662306a36Sopenharmony_ci nvme_rdma_teardown_io_queues(ctrl, false); 112762306a36Sopenharmony_ci nvme_unquiesce_io_queues(&ctrl->ctrl); 112862306a36Sopenharmony_ci nvme_rdma_teardown_admin_queue(ctrl, false); 112962306a36Sopenharmony_ci nvme_unquiesce_admin_queue(&ctrl->ctrl); 113062306a36Sopenharmony_ci nvme_auth_stop(&ctrl->ctrl); 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { 113362306a36Sopenharmony_ci /* state change failure is ok if we started ctrl delete */ 113462306a36Sopenharmony_ci enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_ci WARN_ON_ONCE(state != NVME_CTRL_DELETING && 113762306a36Sopenharmony_ci state != NVME_CTRL_DELETING_NOIO); 113862306a36Sopenharmony_ci return; 113962306a36Sopenharmony_ci } 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci nvme_rdma_reconnect_or_remove(ctrl); 114262306a36Sopenharmony_ci} 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_cistatic void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) 114562306a36Sopenharmony_ci{ 114662306a36Sopenharmony_ci if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) 114762306a36Sopenharmony_ci return; 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci dev_warn(ctrl->ctrl.device, "starting error recovery\n"); 115062306a36Sopenharmony_ci queue_work(nvme_reset_wq, &ctrl->err_work); 115162306a36Sopenharmony_ci} 115262306a36Sopenharmony_ci 115362306a36Sopenharmony_cistatic void nvme_rdma_end_request(struct nvme_rdma_request *req) 115462306a36Sopenharmony_ci{ 115562306a36Sopenharmony_ci struct request *rq = blk_mq_rq_from_pdu(req); 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ci if (!refcount_dec_and_test(&req->ref)) 115862306a36Sopenharmony_ci return; 115962306a36Sopenharmony_ci if (!nvme_try_complete_req(rq, req->status, req->result)) 116062306a36Sopenharmony_ci nvme_rdma_complete_rq(rq); 116162306a36Sopenharmony_ci} 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_cistatic void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc, 116462306a36Sopenharmony_ci const char *op) 116562306a36Sopenharmony_ci{ 116662306a36Sopenharmony_ci struct nvme_rdma_queue *queue = wc->qp->qp_context; 116762306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = queue->ctrl; 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE) 117062306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, 117162306a36Sopenharmony_ci "%s for CQE 0x%p failed with status %s (%d)\n", 117262306a36Sopenharmony_ci op, wc->wr_cqe, 117362306a36Sopenharmony_ci ib_wc_status_msg(wc->status), wc->status); 117462306a36Sopenharmony_ci nvme_rdma_error_recovery(ctrl); 117562306a36Sopenharmony_ci} 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_cistatic void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc) 117862306a36Sopenharmony_ci{ 117962306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) 118062306a36Sopenharmony_ci nvme_rdma_wr_error(cq, wc, "MEMREG"); 118162306a36Sopenharmony_ci} 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_cistatic void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) 118462306a36Sopenharmony_ci{ 118562306a36Sopenharmony_ci struct nvme_rdma_request *req = 118662306a36Sopenharmony_ci container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe); 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) 118962306a36Sopenharmony_ci nvme_rdma_wr_error(cq, wc, "LOCAL_INV"); 119062306a36Sopenharmony_ci else 119162306a36Sopenharmony_ci nvme_rdma_end_request(req); 119262306a36Sopenharmony_ci} 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_cistatic int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, 119562306a36Sopenharmony_ci struct nvme_rdma_request *req) 119662306a36Sopenharmony_ci{ 119762306a36Sopenharmony_ci struct ib_send_wr wr = { 119862306a36Sopenharmony_ci .opcode = IB_WR_LOCAL_INV, 119962306a36Sopenharmony_ci .next = NULL, 120062306a36Sopenharmony_ci .num_sge = 0, 120162306a36Sopenharmony_ci .send_flags = IB_SEND_SIGNALED, 120262306a36Sopenharmony_ci .ex.invalidate_rkey = req->mr->rkey, 120362306a36Sopenharmony_ci }; 120462306a36Sopenharmony_ci 120562306a36Sopenharmony_ci req->reg_cqe.done = nvme_rdma_inv_rkey_done; 120662306a36Sopenharmony_ci wr.wr_cqe = &req->reg_cqe; 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci return ib_post_send(queue->qp, &wr, NULL); 120962306a36Sopenharmony_ci} 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_cistatic void nvme_rdma_dma_unmap_req(struct ib_device *ibdev, struct request *rq) 121262306a36Sopenharmony_ci{ 121362306a36Sopenharmony_ci struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci if (blk_integrity_rq(rq)) { 121662306a36Sopenharmony_ci ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, 121762306a36Sopenharmony_ci req->metadata_sgl->nents, rq_dma_dir(rq)); 121862306a36Sopenharmony_ci sg_free_table_chained(&req->metadata_sgl->sg_table, 121962306a36Sopenharmony_ci NVME_INLINE_METADATA_SG_CNT); 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, 122362306a36Sopenharmony_ci rq_dma_dir(rq)); 122462306a36Sopenharmony_ci sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); 122562306a36Sopenharmony_ci} 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_cistatic void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, 122862306a36Sopenharmony_ci struct request *rq) 122962306a36Sopenharmony_ci{ 123062306a36Sopenharmony_ci struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 123162306a36Sopenharmony_ci struct nvme_rdma_device *dev = queue->device; 123262306a36Sopenharmony_ci struct ib_device *ibdev = dev->dev; 123362306a36Sopenharmony_ci struct list_head *pool = &queue->qp->rdma_mrs; 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci if (!blk_rq_nr_phys_segments(rq)) 123662306a36Sopenharmony_ci return; 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci if (req->use_sig_mr) 123962306a36Sopenharmony_ci pool = &queue->qp->sig_mrs; 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_ci if (req->mr) { 124262306a36Sopenharmony_ci ib_mr_pool_put(queue->qp, pool, req->mr); 124362306a36Sopenharmony_ci req->mr = NULL; 124462306a36Sopenharmony_ci } 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci nvme_rdma_dma_unmap_req(ibdev, rq); 124762306a36Sopenharmony_ci} 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_cistatic int nvme_rdma_set_sg_null(struct nvme_command *c) 125062306a36Sopenharmony_ci{ 125162306a36Sopenharmony_ci struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci sg->addr = 0; 125462306a36Sopenharmony_ci put_unaligned_le24(0, sg->length); 125562306a36Sopenharmony_ci put_unaligned_le32(0, sg->key); 125662306a36Sopenharmony_ci sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; 125762306a36Sopenharmony_ci return 0; 125862306a36Sopenharmony_ci} 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_cistatic int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, 126162306a36Sopenharmony_ci struct nvme_rdma_request *req, struct nvme_command *c, 126262306a36Sopenharmony_ci int count) 126362306a36Sopenharmony_ci{ 126462306a36Sopenharmony_ci struct nvme_sgl_desc *sg = &c->common.dptr.sgl; 126562306a36Sopenharmony_ci struct ib_sge *sge = &req->sge[1]; 126662306a36Sopenharmony_ci struct scatterlist *sgl; 126762306a36Sopenharmony_ci u32 len = 0; 126862306a36Sopenharmony_ci int i; 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci for_each_sg(req->data_sgl.sg_table.sgl, sgl, count, i) { 127162306a36Sopenharmony_ci sge->addr = sg_dma_address(sgl); 127262306a36Sopenharmony_ci sge->length = sg_dma_len(sgl); 127362306a36Sopenharmony_ci sge->lkey = queue->device->pd->local_dma_lkey; 127462306a36Sopenharmony_ci len += sge->length; 127562306a36Sopenharmony_ci sge++; 127662306a36Sopenharmony_ci } 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_ci sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff); 127962306a36Sopenharmony_ci sg->length = cpu_to_le32(len); 128062306a36Sopenharmony_ci sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET; 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci req->num_sge += count; 128362306a36Sopenharmony_ci return 0; 128462306a36Sopenharmony_ci} 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_cistatic int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue, 128762306a36Sopenharmony_ci struct nvme_rdma_request *req, struct nvme_command *c) 128862306a36Sopenharmony_ci{ 128962306a36Sopenharmony_ci struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl)); 129262306a36Sopenharmony_ci put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length); 129362306a36Sopenharmony_ci put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key); 129462306a36Sopenharmony_ci sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; 129562306a36Sopenharmony_ci return 0; 129662306a36Sopenharmony_ci} 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_cistatic int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, 129962306a36Sopenharmony_ci struct nvme_rdma_request *req, struct nvme_command *c, 130062306a36Sopenharmony_ci int count) 130162306a36Sopenharmony_ci{ 130262306a36Sopenharmony_ci struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; 130362306a36Sopenharmony_ci int nr; 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_ci req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs); 130662306a36Sopenharmony_ci if (WARN_ON_ONCE(!req->mr)) 130762306a36Sopenharmony_ci return -EAGAIN; 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci /* 131062306a36Sopenharmony_ci * Align the MR to a 4K page size to match the ctrl page size and 131162306a36Sopenharmony_ci * the block virtual boundary. 131262306a36Sopenharmony_ci */ 131362306a36Sopenharmony_ci nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL, 131462306a36Sopenharmony_ci SZ_4K); 131562306a36Sopenharmony_ci if (unlikely(nr < count)) { 131662306a36Sopenharmony_ci ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); 131762306a36Sopenharmony_ci req->mr = NULL; 131862306a36Sopenharmony_ci if (nr < 0) 131962306a36Sopenharmony_ci return nr; 132062306a36Sopenharmony_ci return -EINVAL; 132162306a36Sopenharmony_ci } 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ci ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_ci req->reg_cqe.done = nvme_rdma_memreg_done; 132662306a36Sopenharmony_ci memset(&req->reg_wr, 0, sizeof(req->reg_wr)); 132762306a36Sopenharmony_ci req->reg_wr.wr.opcode = IB_WR_REG_MR; 132862306a36Sopenharmony_ci req->reg_wr.wr.wr_cqe = &req->reg_cqe; 132962306a36Sopenharmony_ci req->reg_wr.wr.num_sge = 0; 133062306a36Sopenharmony_ci req->reg_wr.mr = req->mr; 133162306a36Sopenharmony_ci req->reg_wr.key = req->mr->rkey; 133262306a36Sopenharmony_ci req->reg_wr.access = IB_ACCESS_LOCAL_WRITE | 133362306a36Sopenharmony_ci IB_ACCESS_REMOTE_READ | 133462306a36Sopenharmony_ci IB_ACCESS_REMOTE_WRITE; 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci sg->addr = cpu_to_le64(req->mr->iova); 133762306a36Sopenharmony_ci put_unaligned_le24(req->mr->length, sg->length); 133862306a36Sopenharmony_ci put_unaligned_le32(req->mr->rkey, sg->key); 133962306a36Sopenharmony_ci sg->type = (NVME_KEY_SGL_FMT_DATA_DESC << 4) | 134062306a36Sopenharmony_ci NVME_SGL_FMT_INVALIDATE; 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci return 0; 134362306a36Sopenharmony_ci} 134462306a36Sopenharmony_ci 134562306a36Sopenharmony_cistatic void nvme_rdma_set_sig_domain(struct blk_integrity *bi, 134662306a36Sopenharmony_ci struct nvme_command *cmd, struct ib_sig_domain *domain, 134762306a36Sopenharmony_ci u16 control, u8 pi_type) 134862306a36Sopenharmony_ci{ 134962306a36Sopenharmony_ci domain->sig_type = IB_SIG_TYPE_T10_DIF; 135062306a36Sopenharmony_ci domain->sig.dif.bg_type = IB_T10DIF_CRC; 135162306a36Sopenharmony_ci domain->sig.dif.pi_interval = 1 << bi->interval_exp; 135262306a36Sopenharmony_ci domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); 135362306a36Sopenharmony_ci if (control & NVME_RW_PRINFO_PRCHK_REF) 135462306a36Sopenharmony_ci domain->sig.dif.ref_remap = true; 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); 135762306a36Sopenharmony_ci domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); 135862306a36Sopenharmony_ci domain->sig.dif.app_escape = true; 135962306a36Sopenharmony_ci if (pi_type == NVME_NS_DPS_PI_TYPE3) 136062306a36Sopenharmony_ci domain->sig.dif.ref_escape = true; 136162306a36Sopenharmony_ci} 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_cistatic void nvme_rdma_set_sig_attrs(struct blk_integrity *bi, 136462306a36Sopenharmony_ci struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs, 136562306a36Sopenharmony_ci u8 pi_type) 136662306a36Sopenharmony_ci{ 136762306a36Sopenharmony_ci u16 control = le16_to_cpu(cmd->rw.control); 136862306a36Sopenharmony_ci 136962306a36Sopenharmony_ci memset(sig_attrs, 0, sizeof(*sig_attrs)); 137062306a36Sopenharmony_ci if (control & NVME_RW_PRINFO_PRACT) { 137162306a36Sopenharmony_ci /* for WRITE_INSERT/READ_STRIP no memory domain */ 137262306a36Sopenharmony_ci sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; 137362306a36Sopenharmony_ci nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, 137462306a36Sopenharmony_ci pi_type); 137562306a36Sopenharmony_ci /* Clear the PRACT bit since HCA will generate/verify the PI */ 137662306a36Sopenharmony_ci control &= ~NVME_RW_PRINFO_PRACT; 137762306a36Sopenharmony_ci cmd->rw.control = cpu_to_le16(control); 137862306a36Sopenharmony_ci } else { 137962306a36Sopenharmony_ci /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ 138062306a36Sopenharmony_ci nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, 138162306a36Sopenharmony_ci pi_type); 138262306a36Sopenharmony_ci nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, 138362306a36Sopenharmony_ci pi_type); 138462306a36Sopenharmony_ci } 138562306a36Sopenharmony_ci} 138662306a36Sopenharmony_ci 138762306a36Sopenharmony_cistatic void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask) 138862306a36Sopenharmony_ci{ 138962306a36Sopenharmony_ci *mask = 0; 139062306a36Sopenharmony_ci if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF) 139162306a36Sopenharmony_ci *mask |= IB_SIG_CHECK_REFTAG; 139262306a36Sopenharmony_ci if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD) 139362306a36Sopenharmony_ci *mask |= IB_SIG_CHECK_GUARD; 139462306a36Sopenharmony_ci} 139562306a36Sopenharmony_ci 139662306a36Sopenharmony_cistatic void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc) 139762306a36Sopenharmony_ci{ 139862306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) 139962306a36Sopenharmony_ci nvme_rdma_wr_error(cq, wc, "SIG"); 140062306a36Sopenharmony_ci} 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_cistatic int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, 140362306a36Sopenharmony_ci struct nvme_rdma_request *req, struct nvme_command *c, 140462306a36Sopenharmony_ci int count, int pi_count) 140562306a36Sopenharmony_ci{ 140662306a36Sopenharmony_ci struct nvme_rdma_sgl *sgl = &req->data_sgl; 140762306a36Sopenharmony_ci struct ib_reg_wr *wr = &req->reg_wr; 140862306a36Sopenharmony_ci struct request *rq = blk_mq_rq_from_pdu(req); 140962306a36Sopenharmony_ci struct nvme_ns *ns = rq->q->queuedata; 141062306a36Sopenharmony_ci struct bio *bio = rq->bio; 141162306a36Sopenharmony_ci struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; 141262306a36Sopenharmony_ci int nr; 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_ci req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs); 141562306a36Sopenharmony_ci if (WARN_ON_ONCE(!req->mr)) 141662306a36Sopenharmony_ci return -EAGAIN; 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL, 141962306a36Sopenharmony_ci req->metadata_sgl->sg_table.sgl, pi_count, NULL, 142062306a36Sopenharmony_ci SZ_4K); 142162306a36Sopenharmony_ci if (unlikely(nr)) 142262306a36Sopenharmony_ci goto mr_put; 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c, 142562306a36Sopenharmony_ci req->mr->sig_attrs, ns->pi_type); 142662306a36Sopenharmony_ci nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); 142962306a36Sopenharmony_ci 143062306a36Sopenharmony_ci req->reg_cqe.done = nvme_rdma_sig_done; 143162306a36Sopenharmony_ci memset(wr, 0, sizeof(*wr)); 143262306a36Sopenharmony_ci wr->wr.opcode = IB_WR_REG_MR_INTEGRITY; 143362306a36Sopenharmony_ci wr->wr.wr_cqe = &req->reg_cqe; 143462306a36Sopenharmony_ci wr->wr.num_sge = 0; 143562306a36Sopenharmony_ci wr->wr.send_flags = 0; 143662306a36Sopenharmony_ci wr->mr = req->mr; 143762306a36Sopenharmony_ci wr->key = req->mr->rkey; 143862306a36Sopenharmony_ci wr->access = IB_ACCESS_LOCAL_WRITE | 143962306a36Sopenharmony_ci IB_ACCESS_REMOTE_READ | 144062306a36Sopenharmony_ci IB_ACCESS_REMOTE_WRITE; 144162306a36Sopenharmony_ci 144262306a36Sopenharmony_ci sg->addr = cpu_to_le64(req->mr->iova); 144362306a36Sopenharmony_ci put_unaligned_le24(req->mr->length, sg->length); 144462306a36Sopenharmony_ci put_unaligned_le32(req->mr->rkey, sg->key); 144562306a36Sopenharmony_ci sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci return 0; 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_cimr_put: 145062306a36Sopenharmony_ci ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr); 145162306a36Sopenharmony_ci req->mr = NULL; 145262306a36Sopenharmony_ci if (nr < 0) 145362306a36Sopenharmony_ci return nr; 145462306a36Sopenharmony_ci return -EINVAL; 145562306a36Sopenharmony_ci} 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_cistatic int nvme_rdma_dma_map_req(struct ib_device *ibdev, struct request *rq, 145862306a36Sopenharmony_ci int *count, int *pi_count) 145962306a36Sopenharmony_ci{ 146062306a36Sopenharmony_ci struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 146162306a36Sopenharmony_ci int ret; 146262306a36Sopenharmony_ci 146362306a36Sopenharmony_ci req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1); 146462306a36Sopenharmony_ci ret = sg_alloc_table_chained(&req->data_sgl.sg_table, 146562306a36Sopenharmony_ci blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl, 146662306a36Sopenharmony_ci NVME_INLINE_SG_CNT); 146762306a36Sopenharmony_ci if (ret) 146862306a36Sopenharmony_ci return -ENOMEM; 146962306a36Sopenharmony_ci 147062306a36Sopenharmony_ci req->data_sgl.nents = blk_rq_map_sg(rq->q, rq, 147162306a36Sopenharmony_ci req->data_sgl.sg_table.sgl); 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci *count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, 147462306a36Sopenharmony_ci req->data_sgl.nents, rq_dma_dir(rq)); 147562306a36Sopenharmony_ci if (unlikely(*count <= 0)) { 147662306a36Sopenharmony_ci ret = -EIO; 147762306a36Sopenharmony_ci goto out_free_table; 147862306a36Sopenharmony_ci } 147962306a36Sopenharmony_ci 148062306a36Sopenharmony_ci if (blk_integrity_rq(rq)) { 148162306a36Sopenharmony_ci req->metadata_sgl->sg_table.sgl = 148262306a36Sopenharmony_ci (struct scatterlist *)(req->metadata_sgl + 1); 148362306a36Sopenharmony_ci ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table, 148462306a36Sopenharmony_ci blk_rq_count_integrity_sg(rq->q, rq->bio), 148562306a36Sopenharmony_ci req->metadata_sgl->sg_table.sgl, 148662306a36Sopenharmony_ci NVME_INLINE_METADATA_SG_CNT); 148762306a36Sopenharmony_ci if (unlikely(ret)) { 148862306a36Sopenharmony_ci ret = -ENOMEM; 148962306a36Sopenharmony_ci goto out_unmap_sg; 149062306a36Sopenharmony_ci } 149162306a36Sopenharmony_ci 149262306a36Sopenharmony_ci req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q, 149362306a36Sopenharmony_ci rq->bio, req->metadata_sgl->sg_table.sgl); 149462306a36Sopenharmony_ci *pi_count = ib_dma_map_sg(ibdev, 149562306a36Sopenharmony_ci req->metadata_sgl->sg_table.sgl, 149662306a36Sopenharmony_ci req->metadata_sgl->nents, 149762306a36Sopenharmony_ci rq_dma_dir(rq)); 149862306a36Sopenharmony_ci if (unlikely(*pi_count <= 0)) { 149962306a36Sopenharmony_ci ret = -EIO; 150062306a36Sopenharmony_ci goto out_free_pi_table; 150162306a36Sopenharmony_ci } 150262306a36Sopenharmony_ci } 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_ci return 0; 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ciout_free_pi_table: 150762306a36Sopenharmony_ci sg_free_table_chained(&req->metadata_sgl->sg_table, 150862306a36Sopenharmony_ci NVME_INLINE_METADATA_SG_CNT); 150962306a36Sopenharmony_ciout_unmap_sg: 151062306a36Sopenharmony_ci ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, 151162306a36Sopenharmony_ci rq_dma_dir(rq)); 151262306a36Sopenharmony_ciout_free_table: 151362306a36Sopenharmony_ci sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); 151462306a36Sopenharmony_ci return ret; 151562306a36Sopenharmony_ci} 151662306a36Sopenharmony_ci 151762306a36Sopenharmony_cistatic int nvme_rdma_map_data(struct nvme_rdma_queue *queue, 151862306a36Sopenharmony_ci struct request *rq, struct nvme_command *c) 151962306a36Sopenharmony_ci{ 152062306a36Sopenharmony_ci struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 152162306a36Sopenharmony_ci struct nvme_rdma_device *dev = queue->device; 152262306a36Sopenharmony_ci struct ib_device *ibdev = dev->dev; 152362306a36Sopenharmony_ci int pi_count = 0; 152462306a36Sopenharmony_ci int count, ret; 152562306a36Sopenharmony_ci 152662306a36Sopenharmony_ci req->num_sge = 1; 152762306a36Sopenharmony_ci refcount_set(&req->ref, 2); /* send and recv completions */ 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci c->common.flags |= NVME_CMD_SGL_METABUF; 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ci if (!blk_rq_nr_phys_segments(rq)) 153262306a36Sopenharmony_ci return nvme_rdma_set_sg_null(c); 153362306a36Sopenharmony_ci 153462306a36Sopenharmony_ci ret = nvme_rdma_dma_map_req(ibdev, rq, &count, &pi_count); 153562306a36Sopenharmony_ci if (unlikely(ret)) 153662306a36Sopenharmony_ci return ret; 153762306a36Sopenharmony_ci 153862306a36Sopenharmony_ci if (req->use_sig_mr) { 153962306a36Sopenharmony_ci ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count); 154062306a36Sopenharmony_ci goto out; 154162306a36Sopenharmony_ci } 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_ci if (count <= dev->num_inline_segments) { 154462306a36Sopenharmony_ci if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && 154562306a36Sopenharmony_ci queue->ctrl->use_inline_data && 154662306a36Sopenharmony_ci blk_rq_payload_bytes(rq) <= 154762306a36Sopenharmony_ci nvme_rdma_inline_data_size(queue)) { 154862306a36Sopenharmony_ci ret = nvme_rdma_map_sg_inline(queue, req, c, count); 154962306a36Sopenharmony_ci goto out; 155062306a36Sopenharmony_ci } 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci if (count == 1 && dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) { 155362306a36Sopenharmony_ci ret = nvme_rdma_map_sg_single(queue, req, c); 155462306a36Sopenharmony_ci goto out; 155562306a36Sopenharmony_ci } 155662306a36Sopenharmony_ci } 155762306a36Sopenharmony_ci 155862306a36Sopenharmony_ci ret = nvme_rdma_map_sg_fr(queue, req, c, count); 155962306a36Sopenharmony_ciout: 156062306a36Sopenharmony_ci if (unlikely(ret)) 156162306a36Sopenharmony_ci goto out_dma_unmap_req; 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_ci return 0; 156462306a36Sopenharmony_ci 156562306a36Sopenharmony_ciout_dma_unmap_req: 156662306a36Sopenharmony_ci nvme_rdma_dma_unmap_req(ibdev, rq); 156762306a36Sopenharmony_ci return ret; 156862306a36Sopenharmony_ci} 156962306a36Sopenharmony_ci 157062306a36Sopenharmony_cistatic void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) 157162306a36Sopenharmony_ci{ 157262306a36Sopenharmony_ci struct nvme_rdma_qe *qe = 157362306a36Sopenharmony_ci container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); 157462306a36Sopenharmony_ci struct nvme_rdma_request *req = 157562306a36Sopenharmony_ci container_of(qe, struct nvme_rdma_request, sqe); 157662306a36Sopenharmony_ci 157762306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) 157862306a36Sopenharmony_ci nvme_rdma_wr_error(cq, wc, "SEND"); 157962306a36Sopenharmony_ci else 158062306a36Sopenharmony_ci nvme_rdma_end_request(req); 158162306a36Sopenharmony_ci} 158262306a36Sopenharmony_ci 158362306a36Sopenharmony_cistatic int nvme_rdma_post_send(struct nvme_rdma_queue *queue, 158462306a36Sopenharmony_ci struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, 158562306a36Sopenharmony_ci struct ib_send_wr *first) 158662306a36Sopenharmony_ci{ 158762306a36Sopenharmony_ci struct ib_send_wr wr; 158862306a36Sopenharmony_ci int ret; 158962306a36Sopenharmony_ci 159062306a36Sopenharmony_ci sge->addr = qe->dma; 159162306a36Sopenharmony_ci sge->length = sizeof(struct nvme_command); 159262306a36Sopenharmony_ci sge->lkey = queue->device->pd->local_dma_lkey; 159362306a36Sopenharmony_ci 159462306a36Sopenharmony_ci wr.next = NULL; 159562306a36Sopenharmony_ci wr.wr_cqe = &qe->cqe; 159662306a36Sopenharmony_ci wr.sg_list = sge; 159762306a36Sopenharmony_ci wr.num_sge = num_sge; 159862306a36Sopenharmony_ci wr.opcode = IB_WR_SEND; 159962306a36Sopenharmony_ci wr.send_flags = IB_SEND_SIGNALED; 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci if (first) 160262306a36Sopenharmony_ci first->next = ≀ 160362306a36Sopenharmony_ci else 160462306a36Sopenharmony_ci first = ≀ 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_ci ret = ib_post_send(queue->qp, first, NULL); 160762306a36Sopenharmony_ci if (unlikely(ret)) { 160862306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 160962306a36Sopenharmony_ci "%s failed with error code %d\n", __func__, ret); 161062306a36Sopenharmony_ci } 161162306a36Sopenharmony_ci return ret; 161262306a36Sopenharmony_ci} 161362306a36Sopenharmony_ci 161462306a36Sopenharmony_cistatic int nvme_rdma_post_recv(struct nvme_rdma_queue *queue, 161562306a36Sopenharmony_ci struct nvme_rdma_qe *qe) 161662306a36Sopenharmony_ci{ 161762306a36Sopenharmony_ci struct ib_recv_wr wr; 161862306a36Sopenharmony_ci struct ib_sge list; 161962306a36Sopenharmony_ci int ret; 162062306a36Sopenharmony_ci 162162306a36Sopenharmony_ci list.addr = qe->dma; 162262306a36Sopenharmony_ci list.length = sizeof(struct nvme_completion); 162362306a36Sopenharmony_ci list.lkey = queue->device->pd->local_dma_lkey; 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci qe->cqe.done = nvme_rdma_recv_done; 162662306a36Sopenharmony_ci 162762306a36Sopenharmony_ci wr.next = NULL; 162862306a36Sopenharmony_ci wr.wr_cqe = &qe->cqe; 162962306a36Sopenharmony_ci wr.sg_list = &list; 163062306a36Sopenharmony_ci wr.num_sge = 1; 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_ci ret = ib_post_recv(queue->qp, &wr, NULL); 163362306a36Sopenharmony_ci if (unlikely(ret)) { 163462306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 163562306a36Sopenharmony_ci "%s failed with error code %d\n", __func__, ret); 163662306a36Sopenharmony_ci } 163762306a36Sopenharmony_ci return ret; 163862306a36Sopenharmony_ci} 163962306a36Sopenharmony_ci 164062306a36Sopenharmony_cistatic struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue) 164162306a36Sopenharmony_ci{ 164262306a36Sopenharmony_ci u32 queue_idx = nvme_rdma_queue_idx(queue); 164362306a36Sopenharmony_ci 164462306a36Sopenharmony_ci if (queue_idx == 0) 164562306a36Sopenharmony_ci return queue->ctrl->admin_tag_set.tags[queue_idx]; 164662306a36Sopenharmony_ci return queue->ctrl->tag_set.tags[queue_idx - 1]; 164762306a36Sopenharmony_ci} 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_cistatic void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc) 165062306a36Sopenharmony_ci{ 165162306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) 165262306a36Sopenharmony_ci nvme_rdma_wr_error(cq, wc, "ASYNC"); 165362306a36Sopenharmony_ci} 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_cistatic void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) 165662306a36Sopenharmony_ci{ 165762306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg); 165862306a36Sopenharmony_ci struct nvme_rdma_queue *queue = &ctrl->queues[0]; 165962306a36Sopenharmony_ci struct ib_device *dev = queue->device->dev; 166062306a36Sopenharmony_ci struct nvme_rdma_qe *sqe = &ctrl->async_event_sqe; 166162306a36Sopenharmony_ci struct nvme_command *cmd = sqe->data; 166262306a36Sopenharmony_ci struct ib_sge sge; 166362306a36Sopenharmony_ci int ret; 166462306a36Sopenharmony_ci 166562306a36Sopenharmony_ci ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(*cmd), DMA_TO_DEVICE); 166662306a36Sopenharmony_ci 166762306a36Sopenharmony_ci memset(cmd, 0, sizeof(*cmd)); 166862306a36Sopenharmony_ci cmd->common.opcode = nvme_admin_async_event; 166962306a36Sopenharmony_ci cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH; 167062306a36Sopenharmony_ci cmd->common.flags |= NVME_CMD_SGL_METABUF; 167162306a36Sopenharmony_ci nvme_rdma_set_sg_null(cmd); 167262306a36Sopenharmony_ci 167362306a36Sopenharmony_ci sqe->cqe.done = nvme_rdma_async_done; 167462306a36Sopenharmony_ci 167562306a36Sopenharmony_ci ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd), 167662306a36Sopenharmony_ci DMA_TO_DEVICE); 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL); 167962306a36Sopenharmony_ci WARN_ON_ONCE(ret); 168062306a36Sopenharmony_ci} 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_cistatic void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, 168362306a36Sopenharmony_ci struct nvme_completion *cqe, struct ib_wc *wc) 168462306a36Sopenharmony_ci{ 168562306a36Sopenharmony_ci struct request *rq; 168662306a36Sopenharmony_ci struct nvme_rdma_request *req; 168762306a36Sopenharmony_ci 168862306a36Sopenharmony_ci rq = nvme_find_rq(nvme_rdma_tagset(queue), cqe->command_id); 168962306a36Sopenharmony_ci if (!rq) { 169062306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 169162306a36Sopenharmony_ci "got bad command_id %#x on QP %#x\n", 169262306a36Sopenharmony_ci cqe->command_id, queue->qp->qp_num); 169362306a36Sopenharmony_ci nvme_rdma_error_recovery(queue->ctrl); 169462306a36Sopenharmony_ci return; 169562306a36Sopenharmony_ci } 169662306a36Sopenharmony_ci req = blk_mq_rq_to_pdu(rq); 169762306a36Sopenharmony_ci 169862306a36Sopenharmony_ci req->status = cqe->status; 169962306a36Sopenharmony_ci req->result = cqe->result; 170062306a36Sopenharmony_ci 170162306a36Sopenharmony_ci if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { 170262306a36Sopenharmony_ci if (unlikely(!req->mr || 170362306a36Sopenharmony_ci wc->ex.invalidate_rkey != req->mr->rkey)) { 170462306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 170562306a36Sopenharmony_ci "Bogus remote invalidation for rkey %#x\n", 170662306a36Sopenharmony_ci req->mr ? req->mr->rkey : 0); 170762306a36Sopenharmony_ci nvme_rdma_error_recovery(queue->ctrl); 170862306a36Sopenharmony_ci } 170962306a36Sopenharmony_ci } else if (req->mr) { 171062306a36Sopenharmony_ci int ret; 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_ci ret = nvme_rdma_inv_rkey(queue, req); 171362306a36Sopenharmony_ci if (unlikely(ret < 0)) { 171462306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 171562306a36Sopenharmony_ci "Queueing INV WR for rkey %#x failed (%d)\n", 171662306a36Sopenharmony_ci req->mr->rkey, ret); 171762306a36Sopenharmony_ci nvme_rdma_error_recovery(queue->ctrl); 171862306a36Sopenharmony_ci } 171962306a36Sopenharmony_ci /* the local invalidation completion will end the request */ 172062306a36Sopenharmony_ci return; 172162306a36Sopenharmony_ci } 172262306a36Sopenharmony_ci 172362306a36Sopenharmony_ci nvme_rdma_end_request(req); 172462306a36Sopenharmony_ci} 172562306a36Sopenharmony_ci 172662306a36Sopenharmony_cistatic void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) 172762306a36Sopenharmony_ci{ 172862306a36Sopenharmony_ci struct nvme_rdma_qe *qe = 172962306a36Sopenharmony_ci container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); 173062306a36Sopenharmony_ci struct nvme_rdma_queue *queue = wc->qp->qp_context; 173162306a36Sopenharmony_ci struct ib_device *ibdev = queue->device->dev; 173262306a36Sopenharmony_ci struct nvme_completion *cqe = qe->data; 173362306a36Sopenharmony_ci const size_t len = sizeof(struct nvme_completion); 173462306a36Sopenharmony_ci 173562306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) { 173662306a36Sopenharmony_ci nvme_rdma_wr_error(cq, wc, "RECV"); 173762306a36Sopenharmony_ci return; 173862306a36Sopenharmony_ci } 173962306a36Sopenharmony_ci 174062306a36Sopenharmony_ci /* sanity checking for received data length */ 174162306a36Sopenharmony_ci if (unlikely(wc->byte_len < len)) { 174262306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 174362306a36Sopenharmony_ci "Unexpected nvme completion length(%d)\n", wc->byte_len); 174462306a36Sopenharmony_ci nvme_rdma_error_recovery(queue->ctrl); 174562306a36Sopenharmony_ci return; 174662306a36Sopenharmony_ci } 174762306a36Sopenharmony_ci 174862306a36Sopenharmony_ci ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE); 174962306a36Sopenharmony_ci /* 175062306a36Sopenharmony_ci * AEN requests are special as they don't time out and can 175162306a36Sopenharmony_ci * survive any kind of queue freeze and often don't respond to 175262306a36Sopenharmony_ci * aborts. We don't even bother to allocate a struct request 175362306a36Sopenharmony_ci * for them but rather special case them here. 175462306a36Sopenharmony_ci */ 175562306a36Sopenharmony_ci if (unlikely(nvme_is_aen_req(nvme_rdma_queue_idx(queue), 175662306a36Sopenharmony_ci cqe->command_id))) 175762306a36Sopenharmony_ci nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, 175862306a36Sopenharmony_ci &cqe->result); 175962306a36Sopenharmony_ci else 176062306a36Sopenharmony_ci nvme_rdma_process_nvme_rsp(queue, cqe, wc); 176162306a36Sopenharmony_ci ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE); 176262306a36Sopenharmony_ci 176362306a36Sopenharmony_ci nvme_rdma_post_recv(queue, qe); 176462306a36Sopenharmony_ci} 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_cistatic int nvme_rdma_conn_established(struct nvme_rdma_queue *queue) 176762306a36Sopenharmony_ci{ 176862306a36Sopenharmony_ci int ret, i; 176962306a36Sopenharmony_ci 177062306a36Sopenharmony_ci for (i = 0; i < queue->queue_size; i++) { 177162306a36Sopenharmony_ci ret = nvme_rdma_post_recv(queue, &queue->rsp_ring[i]); 177262306a36Sopenharmony_ci if (ret) 177362306a36Sopenharmony_ci return ret; 177462306a36Sopenharmony_ci } 177562306a36Sopenharmony_ci 177662306a36Sopenharmony_ci return 0; 177762306a36Sopenharmony_ci} 177862306a36Sopenharmony_ci 177962306a36Sopenharmony_cistatic int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue, 178062306a36Sopenharmony_ci struct rdma_cm_event *ev) 178162306a36Sopenharmony_ci{ 178262306a36Sopenharmony_ci struct rdma_cm_id *cm_id = queue->cm_id; 178362306a36Sopenharmony_ci int status = ev->status; 178462306a36Sopenharmony_ci const char *rej_msg; 178562306a36Sopenharmony_ci const struct nvme_rdma_cm_rej *rej_data; 178662306a36Sopenharmony_ci u8 rej_data_len; 178762306a36Sopenharmony_ci 178862306a36Sopenharmony_ci rej_msg = rdma_reject_msg(cm_id, status); 178962306a36Sopenharmony_ci rej_data = rdma_consumer_reject_data(cm_id, ev, &rej_data_len); 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_ci if (rej_data && rej_data_len >= sizeof(u16)) { 179262306a36Sopenharmony_ci u16 sts = le16_to_cpu(rej_data->sts); 179362306a36Sopenharmony_ci 179462306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 179562306a36Sopenharmony_ci "Connect rejected: status %d (%s) nvme status %d (%s).\n", 179662306a36Sopenharmony_ci status, rej_msg, sts, nvme_rdma_cm_msg(sts)); 179762306a36Sopenharmony_ci } else { 179862306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 179962306a36Sopenharmony_ci "Connect rejected: status %d (%s).\n", status, rej_msg); 180062306a36Sopenharmony_ci } 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_ci return -ECONNRESET; 180362306a36Sopenharmony_ci} 180462306a36Sopenharmony_ci 180562306a36Sopenharmony_cistatic int nvme_rdma_addr_resolved(struct nvme_rdma_queue *queue) 180662306a36Sopenharmony_ci{ 180762306a36Sopenharmony_ci struct nvme_ctrl *ctrl = &queue->ctrl->ctrl; 180862306a36Sopenharmony_ci int ret; 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ci ret = nvme_rdma_create_queue_ib(queue); 181162306a36Sopenharmony_ci if (ret) 181262306a36Sopenharmony_ci return ret; 181362306a36Sopenharmony_ci 181462306a36Sopenharmony_ci if (ctrl->opts->tos >= 0) 181562306a36Sopenharmony_ci rdma_set_service_type(queue->cm_id, ctrl->opts->tos); 181662306a36Sopenharmony_ci ret = rdma_resolve_route(queue->cm_id, NVME_RDMA_CM_TIMEOUT_MS); 181762306a36Sopenharmony_ci if (ret) { 181862306a36Sopenharmony_ci dev_err(ctrl->device, "rdma_resolve_route failed (%d).\n", 181962306a36Sopenharmony_ci queue->cm_error); 182062306a36Sopenharmony_ci goto out_destroy_queue; 182162306a36Sopenharmony_ci } 182262306a36Sopenharmony_ci 182362306a36Sopenharmony_ci return 0; 182462306a36Sopenharmony_ci 182562306a36Sopenharmony_ciout_destroy_queue: 182662306a36Sopenharmony_ci nvme_rdma_destroy_queue_ib(queue); 182762306a36Sopenharmony_ci return ret; 182862306a36Sopenharmony_ci} 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_cistatic int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) 183162306a36Sopenharmony_ci{ 183262306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = queue->ctrl; 183362306a36Sopenharmony_ci struct rdma_conn_param param = { }; 183462306a36Sopenharmony_ci struct nvme_rdma_cm_req priv = { }; 183562306a36Sopenharmony_ci int ret; 183662306a36Sopenharmony_ci 183762306a36Sopenharmony_ci param.qp_num = queue->qp->qp_num; 183862306a36Sopenharmony_ci param.flow_control = 1; 183962306a36Sopenharmony_ci 184062306a36Sopenharmony_ci param.responder_resources = queue->device->dev->attrs.max_qp_rd_atom; 184162306a36Sopenharmony_ci /* maximum retry count */ 184262306a36Sopenharmony_ci param.retry_count = 7; 184362306a36Sopenharmony_ci param.rnr_retry_count = 7; 184462306a36Sopenharmony_ci param.private_data = &priv; 184562306a36Sopenharmony_ci param.private_data_len = sizeof(priv); 184662306a36Sopenharmony_ci 184762306a36Sopenharmony_ci priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); 184862306a36Sopenharmony_ci priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue)); 184962306a36Sopenharmony_ci /* 185062306a36Sopenharmony_ci * set the admin queue depth to the minimum size 185162306a36Sopenharmony_ci * specified by the Fabrics standard. 185262306a36Sopenharmony_ci */ 185362306a36Sopenharmony_ci if (priv.qid == 0) { 185462306a36Sopenharmony_ci priv.hrqsize = cpu_to_le16(NVME_AQ_DEPTH); 185562306a36Sopenharmony_ci priv.hsqsize = cpu_to_le16(NVME_AQ_DEPTH - 1); 185662306a36Sopenharmony_ci } else { 185762306a36Sopenharmony_ci /* 185862306a36Sopenharmony_ci * current interpretation of the fabrics spec 185962306a36Sopenharmony_ci * is at minimum you make hrqsize sqsize+1, or a 186062306a36Sopenharmony_ci * 1's based representation of sqsize. 186162306a36Sopenharmony_ci */ 186262306a36Sopenharmony_ci priv.hrqsize = cpu_to_le16(queue->queue_size); 186362306a36Sopenharmony_ci priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize); 186462306a36Sopenharmony_ci } 186562306a36Sopenharmony_ci 186662306a36Sopenharmony_ci ret = rdma_connect_locked(queue->cm_id, ¶m); 186762306a36Sopenharmony_ci if (ret) { 186862306a36Sopenharmony_ci dev_err(ctrl->ctrl.device, 186962306a36Sopenharmony_ci "rdma_connect_locked failed (%d).\n", ret); 187062306a36Sopenharmony_ci return ret; 187162306a36Sopenharmony_ci } 187262306a36Sopenharmony_ci 187362306a36Sopenharmony_ci return 0; 187462306a36Sopenharmony_ci} 187562306a36Sopenharmony_ci 187662306a36Sopenharmony_cistatic int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, 187762306a36Sopenharmony_ci struct rdma_cm_event *ev) 187862306a36Sopenharmony_ci{ 187962306a36Sopenharmony_ci struct nvme_rdma_queue *queue = cm_id->context; 188062306a36Sopenharmony_ci int cm_error = 0; 188162306a36Sopenharmony_ci 188262306a36Sopenharmony_ci dev_dbg(queue->ctrl->ctrl.device, "%s (%d): status %d id %p\n", 188362306a36Sopenharmony_ci rdma_event_msg(ev->event), ev->event, 188462306a36Sopenharmony_ci ev->status, cm_id); 188562306a36Sopenharmony_ci 188662306a36Sopenharmony_ci switch (ev->event) { 188762306a36Sopenharmony_ci case RDMA_CM_EVENT_ADDR_RESOLVED: 188862306a36Sopenharmony_ci cm_error = nvme_rdma_addr_resolved(queue); 188962306a36Sopenharmony_ci break; 189062306a36Sopenharmony_ci case RDMA_CM_EVENT_ROUTE_RESOLVED: 189162306a36Sopenharmony_ci cm_error = nvme_rdma_route_resolved(queue); 189262306a36Sopenharmony_ci break; 189362306a36Sopenharmony_ci case RDMA_CM_EVENT_ESTABLISHED: 189462306a36Sopenharmony_ci queue->cm_error = nvme_rdma_conn_established(queue); 189562306a36Sopenharmony_ci /* complete cm_done regardless of success/failure */ 189662306a36Sopenharmony_ci complete(&queue->cm_done); 189762306a36Sopenharmony_ci return 0; 189862306a36Sopenharmony_ci case RDMA_CM_EVENT_REJECTED: 189962306a36Sopenharmony_ci cm_error = nvme_rdma_conn_rejected(queue, ev); 190062306a36Sopenharmony_ci break; 190162306a36Sopenharmony_ci case RDMA_CM_EVENT_ROUTE_ERROR: 190262306a36Sopenharmony_ci case RDMA_CM_EVENT_CONNECT_ERROR: 190362306a36Sopenharmony_ci case RDMA_CM_EVENT_UNREACHABLE: 190462306a36Sopenharmony_ci case RDMA_CM_EVENT_ADDR_ERROR: 190562306a36Sopenharmony_ci dev_dbg(queue->ctrl->ctrl.device, 190662306a36Sopenharmony_ci "CM error event %d\n", ev->event); 190762306a36Sopenharmony_ci cm_error = -ECONNRESET; 190862306a36Sopenharmony_ci break; 190962306a36Sopenharmony_ci case RDMA_CM_EVENT_DISCONNECTED: 191062306a36Sopenharmony_ci case RDMA_CM_EVENT_ADDR_CHANGE: 191162306a36Sopenharmony_ci case RDMA_CM_EVENT_TIMEWAIT_EXIT: 191262306a36Sopenharmony_ci dev_dbg(queue->ctrl->ctrl.device, 191362306a36Sopenharmony_ci "disconnect received - connection closed\n"); 191462306a36Sopenharmony_ci nvme_rdma_error_recovery(queue->ctrl); 191562306a36Sopenharmony_ci break; 191662306a36Sopenharmony_ci case RDMA_CM_EVENT_DEVICE_REMOVAL: 191762306a36Sopenharmony_ci /* device removal is handled via the ib_client API */ 191862306a36Sopenharmony_ci break; 191962306a36Sopenharmony_ci default: 192062306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 192162306a36Sopenharmony_ci "Unexpected RDMA CM event (%d)\n", ev->event); 192262306a36Sopenharmony_ci nvme_rdma_error_recovery(queue->ctrl); 192362306a36Sopenharmony_ci break; 192462306a36Sopenharmony_ci } 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_ci if (cm_error) { 192762306a36Sopenharmony_ci queue->cm_error = cm_error; 192862306a36Sopenharmony_ci complete(&queue->cm_done); 192962306a36Sopenharmony_ci } 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_ci return 0; 193262306a36Sopenharmony_ci} 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_cistatic void nvme_rdma_complete_timed_out(struct request *rq) 193562306a36Sopenharmony_ci{ 193662306a36Sopenharmony_ci struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 193762306a36Sopenharmony_ci struct nvme_rdma_queue *queue = req->queue; 193862306a36Sopenharmony_ci 193962306a36Sopenharmony_ci nvme_rdma_stop_queue(queue); 194062306a36Sopenharmony_ci nvmf_complete_timed_out_request(rq); 194162306a36Sopenharmony_ci} 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_cistatic enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) 194462306a36Sopenharmony_ci{ 194562306a36Sopenharmony_ci struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 194662306a36Sopenharmony_ci struct nvme_rdma_queue *queue = req->queue; 194762306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = queue->ctrl; 194862306a36Sopenharmony_ci 194962306a36Sopenharmony_ci dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", 195062306a36Sopenharmony_ci rq->tag, nvme_rdma_queue_idx(queue)); 195162306a36Sopenharmony_ci 195262306a36Sopenharmony_ci if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) { 195362306a36Sopenharmony_ci /* 195462306a36Sopenharmony_ci * If we are resetting, connecting or deleting we should 195562306a36Sopenharmony_ci * complete immediately because we may block controller 195662306a36Sopenharmony_ci * teardown or setup sequence 195762306a36Sopenharmony_ci * - ctrl disable/shutdown fabrics requests 195862306a36Sopenharmony_ci * - connect requests 195962306a36Sopenharmony_ci * - initialization admin requests 196062306a36Sopenharmony_ci * - I/O requests that entered after unquiescing and 196162306a36Sopenharmony_ci * the controller stopped responding 196262306a36Sopenharmony_ci * 196362306a36Sopenharmony_ci * All other requests should be cancelled by the error 196462306a36Sopenharmony_ci * recovery work, so it's fine that we fail it here. 196562306a36Sopenharmony_ci */ 196662306a36Sopenharmony_ci nvme_rdma_complete_timed_out(rq); 196762306a36Sopenharmony_ci return BLK_EH_DONE; 196862306a36Sopenharmony_ci } 196962306a36Sopenharmony_ci 197062306a36Sopenharmony_ci /* 197162306a36Sopenharmony_ci * LIVE state should trigger the normal error recovery which will 197262306a36Sopenharmony_ci * handle completing this request. 197362306a36Sopenharmony_ci */ 197462306a36Sopenharmony_ci nvme_rdma_error_recovery(ctrl); 197562306a36Sopenharmony_ci return BLK_EH_RESET_TIMER; 197662306a36Sopenharmony_ci} 197762306a36Sopenharmony_ci 197862306a36Sopenharmony_cistatic blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, 197962306a36Sopenharmony_ci const struct blk_mq_queue_data *bd) 198062306a36Sopenharmony_ci{ 198162306a36Sopenharmony_ci struct nvme_ns *ns = hctx->queue->queuedata; 198262306a36Sopenharmony_ci struct nvme_rdma_queue *queue = hctx->driver_data; 198362306a36Sopenharmony_ci struct request *rq = bd->rq; 198462306a36Sopenharmony_ci struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 198562306a36Sopenharmony_ci struct nvme_rdma_qe *sqe = &req->sqe; 198662306a36Sopenharmony_ci struct nvme_command *c = nvme_req(rq)->cmd; 198762306a36Sopenharmony_ci struct ib_device *dev; 198862306a36Sopenharmony_ci bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags); 198962306a36Sopenharmony_ci blk_status_t ret; 199062306a36Sopenharmony_ci int err; 199162306a36Sopenharmony_ci 199262306a36Sopenharmony_ci WARN_ON_ONCE(rq->tag < 0); 199362306a36Sopenharmony_ci 199462306a36Sopenharmony_ci if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 199562306a36Sopenharmony_ci return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); 199662306a36Sopenharmony_ci 199762306a36Sopenharmony_ci dev = queue->device->dev; 199862306a36Sopenharmony_ci 199962306a36Sopenharmony_ci req->sqe.dma = ib_dma_map_single(dev, req->sqe.data, 200062306a36Sopenharmony_ci sizeof(struct nvme_command), 200162306a36Sopenharmony_ci DMA_TO_DEVICE); 200262306a36Sopenharmony_ci err = ib_dma_mapping_error(dev, req->sqe.dma); 200362306a36Sopenharmony_ci if (unlikely(err)) 200462306a36Sopenharmony_ci return BLK_STS_RESOURCE; 200562306a36Sopenharmony_ci 200662306a36Sopenharmony_ci ib_dma_sync_single_for_cpu(dev, sqe->dma, 200762306a36Sopenharmony_ci sizeof(struct nvme_command), DMA_TO_DEVICE); 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci ret = nvme_setup_cmd(ns, rq); 201062306a36Sopenharmony_ci if (ret) 201162306a36Sopenharmony_ci goto unmap_qe; 201262306a36Sopenharmony_ci 201362306a36Sopenharmony_ci nvme_start_request(rq); 201462306a36Sopenharmony_ci 201562306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && 201662306a36Sopenharmony_ci queue->pi_support && 201762306a36Sopenharmony_ci (c->common.opcode == nvme_cmd_write || 201862306a36Sopenharmony_ci c->common.opcode == nvme_cmd_read) && 201962306a36Sopenharmony_ci nvme_ns_has_pi(ns)) 202062306a36Sopenharmony_ci req->use_sig_mr = true; 202162306a36Sopenharmony_ci else 202262306a36Sopenharmony_ci req->use_sig_mr = false; 202362306a36Sopenharmony_ci 202462306a36Sopenharmony_ci err = nvme_rdma_map_data(queue, rq, c); 202562306a36Sopenharmony_ci if (unlikely(err < 0)) { 202662306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 202762306a36Sopenharmony_ci "Failed to map data (%d)\n", err); 202862306a36Sopenharmony_ci goto err; 202962306a36Sopenharmony_ci } 203062306a36Sopenharmony_ci 203162306a36Sopenharmony_ci sqe->cqe.done = nvme_rdma_send_done; 203262306a36Sopenharmony_ci 203362306a36Sopenharmony_ci ib_dma_sync_single_for_device(dev, sqe->dma, 203462306a36Sopenharmony_ci sizeof(struct nvme_command), DMA_TO_DEVICE); 203562306a36Sopenharmony_ci 203662306a36Sopenharmony_ci err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, 203762306a36Sopenharmony_ci req->mr ? &req->reg_wr.wr : NULL); 203862306a36Sopenharmony_ci if (unlikely(err)) 203962306a36Sopenharmony_ci goto err_unmap; 204062306a36Sopenharmony_ci 204162306a36Sopenharmony_ci return BLK_STS_OK; 204262306a36Sopenharmony_ci 204362306a36Sopenharmony_cierr_unmap: 204462306a36Sopenharmony_ci nvme_rdma_unmap_data(queue, rq); 204562306a36Sopenharmony_cierr: 204662306a36Sopenharmony_ci if (err == -EIO) 204762306a36Sopenharmony_ci ret = nvme_host_path_error(rq); 204862306a36Sopenharmony_ci else if (err == -ENOMEM || err == -EAGAIN) 204962306a36Sopenharmony_ci ret = BLK_STS_RESOURCE; 205062306a36Sopenharmony_ci else 205162306a36Sopenharmony_ci ret = BLK_STS_IOERR; 205262306a36Sopenharmony_ci nvme_cleanup_cmd(rq); 205362306a36Sopenharmony_ciunmap_qe: 205462306a36Sopenharmony_ci ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command), 205562306a36Sopenharmony_ci DMA_TO_DEVICE); 205662306a36Sopenharmony_ci return ret; 205762306a36Sopenharmony_ci} 205862306a36Sopenharmony_ci 205962306a36Sopenharmony_cistatic int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) 206062306a36Sopenharmony_ci{ 206162306a36Sopenharmony_ci struct nvme_rdma_queue *queue = hctx->driver_data; 206262306a36Sopenharmony_ci 206362306a36Sopenharmony_ci return ib_process_cq_direct(queue->ib_cq, -1); 206462306a36Sopenharmony_ci} 206562306a36Sopenharmony_ci 206662306a36Sopenharmony_cistatic void nvme_rdma_check_pi_status(struct nvme_rdma_request *req) 206762306a36Sopenharmony_ci{ 206862306a36Sopenharmony_ci struct request *rq = blk_mq_rq_from_pdu(req); 206962306a36Sopenharmony_ci struct ib_mr_status mr_status; 207062306a36Sopenharmony_ci int ret; 207162306a36Sopenharmony_ci 207262306a36Sopenharmony_ci ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status); 207362306a36Sopenharmony_ci if (ret) { 207462306a36Sopenharmony_ci pr_err("ib_check_mr_status failed, ret %d\n", ret); 207562306a36Sopenharmony_ci nvme_req(rq)->status = NVME_SC_INVALID_PI; 207662306a36Sopenharmony_ci return; 207762306a36Sopenharmony_ci } 207862306a36Sopenharmony_ci 207962306a36Sopenharmony_ci if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { 208062306a36Sopenharmony_ci switch (mr_status.sig_err.err_type) { 208162306a36Sopenharmony_ci case IB_SIG_BAD_GUARD: 208262306a36Sopenharmony_ci nvme_req(rq)->status = NVME_SC_GUARD_CHECK; 208362306a36Sopenharmony_ci break; 208462306a36Sopenharmony_ci case IB_SIG_BAD_REFTAG: 208562306a36Sopenharmony_ci nvme_req(rq)->status = NVME_SC_REFTAG_CHECK; 208662306a36Sopenharmony_ci break; 208762306a36Sopenharmony_ci case IB_SIG_BAD_APPTAG: 208862306a36Sopenharmony_ci nvme_req(rq)->status = NVME_SC_APPTAG_CHECK; 208962306a36Sopenharmony_ci break; 209062306a36Sopenharmony_ci } 209162306a36Sopenharmony_ci pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", 209262306a36Sopenharmony_ci mr_status.sig_err.err_type, mr_status.sig_err.expected, 209362306a36Sopenharmony_ci mr_status.sig_err.actual); 209462306a36Sopenharmony_ci } 209562306a36Sopenharmony_ci} 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_cistatic void nvme_rdma_complete_rq(struct request *rq) 209862306a36Sopenharmony_ci{ 209962306a36Sopenharmony_ci struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 210062306a36Sopenharmony_ci struct nvme_rdma_queue *queue = req->queue; 210162306a36Sopenharmony_ci struct ib_device *ibdev = queue->device->dev; 210262306a36Sopenharmony_ci 210362306a36Sopenharmony_ci if (req->use_sig_mr) 210462306a36Sopenharmony_ci nvme_rdma_check_pi_status(req); 210562306a36Sopenharmony_ci 210662306a36Sopenharmony_ci nvme_rdma_unmap_data(queue, rq); 210762306a36Sopenharmony_ci ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command), 210862306a36Sopenharmony_ci DMA_TO_DEVICE); 210962306a36Sopenharmony_ci nvme_complete_rq(rq); 211062306a36Sopenharmony_ci} 211162306a36Sopenharmony_ci 211262306a36Sopenharmony_cistatic void nvme_rdma_map_queues(struct blk_mq_tag_set *set) 211362306a36Sopenharmony_ci{ 211462306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); 211562306a36Sopenharmony_ci 211662306a36Sopenharmony_ci nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues); 211762306a36Sopenharmony_ci} 211862306a36Sopenharmony_ci 211962306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_rdma_mq_ops = { 212062306a36Sopenharmony_ci .queue_rq = nvme_rdma_queue_rq, 212162306a36Sopenharmony_ci .complete = nvme_rdma_complete_rq, 212262306a36Sopenharmony_ci .init_request = nvme_rdma_init_request, 212362306a36Sopenharmony_ci .exit_request = nvme_rdma_exit_request, 212462306a36Sopenharmony_ci .init_hctx = nvme_rdma_init_hctx, 212562306a36Sopenharmony_ci .timeout = nvme_rdma_timeout, 212662306a36Sopenharmony_ci .map_queues = nvme_rdma_map_queues, 212762306a36Sopenharmony_ci .poll = nvme_rdma_poll, 212862306a36Sopenharmony_ci}; 212962306a36Sopenharmony_ci 213062306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_rdma_admin_mq_ops = { 213162306a36Sopenharmony_ci .queue_rq = nvme_rdma_queue_rq, 213262306a36Sopenharmony_ci .complete = nvme_rdma_complete_rq, 213362306a36Sopenharmony_ci .init_request = nvme_rdma_init_request, 213462306a36Sopenharmony_ci .exit_request = nvme_rdma_exit_request, 213562306a36Sopenharmony_ci .init_hctx = nvme_rdma_init_admin_hctx, 213662306a36Sopenharmony_ci .timeout = nvme_rdma_timeout, 213762306a36Sopenharmony_ci}; 213862306a36Sopenharmony_ci 213962306a36Sopenharmony_cistatic void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) 214062306a36Sopenharmony_ci{ 214162306a36Sopenharmony_ci nvme_rdma_teardown_io_queues(ctrl, shutdown); 214262306a36Sopenharmony_ci nvme_quiesce_admin_queue(&ctrl->ctrl); 214362306a36Sopenharmony_ci nvme_disable_ctrl(&ctrl->ctrl, shutdown); 214462306a36Sopenharmony_ci nvme_rdma_teardown_admin_queue(ctrl, shutdown); 214562306a36Sopenharmony_ci} 214662306a36Sopenharmony_ci 214762306a36Sopenharmony_cistatic void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl) 214862306a36Sopenharmony_ci{ 214962306a36Sopenharmony_ci nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true); 215062306a36Sopenharmony_ci} 215162306a36Sopenharmony_ci 215262306a36Sopenharmony_cistatic void nvme_rdma_reset_ctrl_work(struct work_struct *work) 215362306a36Sopenharmony_ci{ 215462306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl = 215562306a36Sopenharmony_ci container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work); 215662306a36Sopenharmony_ci 215762306a36Sopenharmony_ci nvme_stop_ctrl(&ctrl->ctrl); 215862306a36Sopenharmony_ci nvme_rdma_shutdown_ctrl(ctrl, false); 215962306a36Sopenharmony_ci 216062306a36Sopenharmony_ci if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { 216162306a36Sopenharmony_ci /* state change failure should never happen */ 216262306a36Sopenharmony_ci WARN_ON_ONCE(1); 216362306a36Sopenharmony_ci return; 216462306a36Sopenharmony_ci } 216562306a36Sopenharmony_ci 216662306a36Sopenharmony_ci if (nvme_rdma_setup_ctrl(ctrl, false)) 216762306a36Sopenharmony_ci goto out_fail; 216862306a36Sopenharmony_ci 216962306a36Sopenharmony_ci return; 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ciout_fail: 217262306a36Sopenharmony_ci ++ctrl->ctrl.nr_reconnects; 217362306a36Sopenharmony_ci nvme_rdma_reconnect_or_remove(ctrl); 217462306a36Sopenharmony_ci} 217562306a36Sopenharmony_ci 217662306a36Sopenharmony_cistatic const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { 217762306a36Sopenharmony_ci .name = "rdma", 217862306a36Sopenharmony_ci .module = THIS_MODULE, 217962306a36Sopenharmony_ci .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED, 218062306a36Sopenharmony_ci .reg_read32 = nvmf_reg_read32, 218162306a36Sopenharmony_ci .reg_read64 = nvmf_reg_read64, 218262306a36Sopenharmony_ci .reg_write32 = nvmf_reg_write32, 218362306a36Sopenharmony_ci .free_ctrl = nvme_rdma_free_ctrl, 218462306a36Sopenharmony_ci .submit_async_event = nvme_rdma_submit_async_event, 218562306a36Sopenharmony_ci .delete_ctrl = nvme_rdma_delete_ctrl, 218662306a36Sopenharmony_ci .get_address = nvmf_get_address, 218762306a36Sopenharmony_ci .stop_ctrl = nvme_rdma_stop_ctrl, 218862306a36Sopenharmony_ci}; 218962306a36Sopenharmony_ci 219062306a36Sopenharmony_ci/* 219162306a36Sopenharmony_ci * Fails a connection request if it matches an existing controller 219262306a36Sopenharmony_ci * (association) with the same tuple: 219362306a36Sopenharmony_ci * <Host NQN, Host ID, local address, remote address, remote port, SUBSYS NQN> 219462306a36Sopenharmony_ci * 219562306a36Sopenharmony_ci * if local address is not specified in the request, it will match an 219662306a36Sopenharmony_ci * existing controller with all the other parameters the same and no 219762306a36Sopenharmony_ci * local port address specified as well. 219862306a36Sopenharmony_ci * 219962306a36Sopenharmony_ci * The ports don't need to be compared as they are intrinsically 220062306a36Sopenharmony_ci * already matched by the port pointers supplied. 220162306a36Sopenharmony_ci */ 220262306a36Sopenharmony_cistatic bool 220362306a36Sopenharmony_cinvme_rdma_existing_controller(struct nvmf_ctrl_options *opts) 220462306a36Sopenharmony_ci{ 220562306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl; 220662306a36Sopenharmony_ci bool found = false; 220762306a36Sopenharmony_ci 220862306a36Sopenharmony_ci mutex_lock(&nvme_rdma_ctrl_mutex); 220962306a36Sopenharmony_ci list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { 221062306a36Sopenharmony_ci found = nvmf_ip_options_match(&ctrl->ctrl, opts); 221162306a36Sopenharmony_ci if (found) 221262306a36Sopenharmony_ci break; 221362306a36Sopenharmony_ci } 221462306a36Sopenharmony_ci mutex_unlock(&nvme_rdma_ctrl_mutex); 221562306a36Sopenharmony_ci 221662306a36Sopenharmony_ci return found; 221762306a36Sopenharmony_ci} 221862306a36Sopenharmony_ci 221962306a36Sopenharmony_cistatic struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, 222062306a36Sopenharmony_ci struct nvmf_ctrl_options *opts) 222162306a36Sopenharmony_ci{ 222262306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl; 222362306a36Sopenharmony_ci int ret; 222462306a36Sopenharmony_ci bool changed; 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 222762306a36Sopenharmony_ci if (!ctrl) 222862306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 222962306a36Sopenharmony_ci ctrl->ctrl.opts = opts; 223062306a36Sopenharmony_ci INIT_LIST_HEAD(&ctrl->list); 223162306a36Sopenharmony_ci 223262306a36Sopenharmony_ci if (!(opts->mask & NVMF_OPT_TRSVCID)) { 223362306a36Sopenharmony_ci opts->trsvcid = 223462306a36Sopenharmony_ci kstrdup(__stringify(NVME_RDMA_IP_PORT), GFP_KERNEL); 223562306a36Sopenharmony_ci if (!opts->trsvcid) { 223662306a36Sopenharmony_ci ret = -ENOMEM; 223762306a36Sopenharmony_ci goto out_free_ctrl; 223862306a36Sopenharmony_ci } 223962306a36Sopenharmony_ci opts->mask |= NVMF_OPT_TRSVCID; 224062306a36Sopenharmony_ci } 224162306a36Sopenharmony_ci 224262306a36Sopenharmony_ci ret = inet_pton_with_scope(&init_net, AF_UNSPEC, 224362306a36Sopenharmony_ci opts->traddr, opts->trsvcid, &ctrl->addr); 224462306a36Sopenharmony_ci if (ret) { 224562306a36Sopenharmony_ci pr_err("malformed address passed: %s:%s\n", 224662306a36Sopenharmony_ci opts->traddr, opts->trsvcid); 224762306a36Sopenharmony_ci goto out_free_ctrl; 224862306a36Sopenharmony_ci } 224962306a36Sopenharmony_ci 225062306a36Sopenharmony_ci if (opts->mask & NVMF_OPT_HOST_TRADDR) { 225162306a36Sopenharmony_ci ret = inet_pton_with_scope(&init_net, AF_UNSPEC, 225262306a36Sopenharmony_ci opts->host_traddr, NULL, &ctrl->src_addr); 225362306a36Sopenharmony_ci if (ret) { 225462306a36Sopenharmony_ci pr_err("malformed src address passed: %s\n", 225562306a36Sopenharmony_ci opts->host_traddr); 225662306a36Sopenharmony_ci goto out_free_ctrl; 225762306a36Sopenharmony_ci } 225862306a36Sopenharmony_ci } 225962306a36Sopenharmony_ci 226062306a36Sopenharmony_ci if (!opts->duplicate_connect && nvme_rdma_existing_controller(opts)) { 226162306a36Sopenharmony_ci ret = -EALREADY; 226262306a36Sopenharmony_ci goto out_free_ctrl; 226362306a36Sopenharmony_ci } 226462306a36Sopenharmony_ci 226562306a36Sopenharmony_ci INIT_DELAYED_WORK(&ctrl->reconnect_work, 226662306a36Sopenharmony_ci nvme_rdma_reconnect_ctrl_work); 226762306a36Sopenharmony_ci INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); 226862306a36Sopenharmony_ci INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work); 226962306a36Sopenharmony_ci 227062306a36Sopenharmony_ci ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 227162306a36Sopenharmony_ci opts->nr_poll_queues + 1; 227262306a36Sopenharmony_ci ctrl->ctrl.sqsize = opts->queue_size - 1; 227362306a36Sopenharmony_ci ctrl->ctrl.kato = opts->kato; 227462306a36Sopenharmony_ci 227562306a36Sopenharmony_ci ret = -ENOMEM; 227662306a36Sopenharmony_ci ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), 227762306a36Sopenharmony_ci GFP_KERNEL); 227862306a36Sopenharmony_ci if (!ctrl->queues) 227962306a36Sopenharmony_ci goto out_free_ctrl; 228062306a36Sopenharmony_ci 228162306a36Sopenharmony_ci ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, 228262306a36Sopenharmony_ci 0 /* no quirks, we're perfect! */); 228362306a36Sopenharmony_ci if (ret) 228462306a36Sopenharmony_ci goto out_kfree_queues; 228562306a36Sopenharmony_ci 228662306a36Sopenharmony_ci changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING); 228762306a36Sopenharmony_ci WARN_ON_ONCE(!changed); 228862306a36Sopenharmony_ci 228962306a36Sopenharmony_ci ret = nvme_rdma_setup_ctrl(ctrl, true); 229062306a36Sopenharmony_ci if (ret) 229162306a36Sopenharmony_ci goto out_uninit_ctrl; 229262306a36Sopenharmony_ci 229362306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", 229462306a36Sopenharmony_ci nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); 229562306a36Sopenharmony_ci 229662306a36Sopenharmony_ci mutex_lock(&nvme_rdma_ctrl_mutex); 229762306a36Sopenharmony_ci list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); 229862306a36Sopenharmony_ci mutex_unlock(&nvme_rdma_ctrl_mutex); 229962306a36Sopenharmony_ci 230062306a36Sopenharmony_ci return &ctrl->ctrl; 230162306a36Sopenharmony_ci 230262306a36Sopenharmony_ciout_uninit_ctrl: 230362306a36Sopenharmony_ci nvme_uninit_ctrl(&ctrl->ctrl); 230462306a36Sopenharmony_ci nvme_put_ctrl(&ctrl->ctrl); 230562306a36Sopenharmony_ci if (ret > 0) 230662306a36Sopenharmony_ci ret = -EIO; 230762306a36Sopenharmony_ci return ERR_PTR(ret); 230862306a36Sopenharmony_ciout_kfree_queues: 230962306a36Sopenharmony_ci kfree(ctrl->queues); 231062306a36Sopenharmony_ciout_free_ctrl: 231162306a36Sopenharmony_ci kfree(ctrl); 231262306a36Sopenharmony_ci return ERR_PTR(ret); 231362306a36Sopenharmony_ci} 231462306a36Sopenharmony_ci 231562306a36Sopenharmony_cistatic struct nvmf_transport_ops nvme_rdma_transport = { 231662306a36Sopenharmony_ci .name = "rdma", 231762306a36Sopenharmony_ci .module = THIS_MODULE, 231862306a36Sopenharmony_ci .required_opts = NVMF_OPT_TRADDR, 231962306a36Sopenharmony_ci .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | 232062306a36Sopenharmony_ci NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | 232162306a36Sopenharmony_ci NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | 232262306a36Sopenharmony_ci NVMF_OPT_TOS, 232362306a36Sopenharmony_ci .create_ctrl = nvme_rdma_create_ctrl, 232462306a36Sopenharmony_ci}; 232562306a36Sopenharmony_ci 232662306a36Sopenharmony_cistatic void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) 232762306a36Sopenharmony_ci{ 232862306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl; 232962306a36Sopenharmony_ci struct nvme_rdma_device *ndev; 233062306a36Sopenharmony_ci bool found = false; 233162306a36Sopenharmony_ci 233262306a36Sopenharmony_ci mutex_lock(&device_list_mutex); 233362306a36Sopenharmony_ci list_for_each_entry(ndev, &device_list, entry) { 233462306a36Sopenharmony_ci if (ndev->dev == ib_device) { 233562306a36Sopenharmony_ci found = true; 233662306a36Sopenharmony_ci break; 233762306a36Sopenharmony_ci } 233862306a36Sopenharmony_ci } 233962306a36Sopenharmony_ci mutex_unlock(&device_list_mutex); 234062306a36Sopenharmony_ci 234162306a36Sopenharmony_ci if (!found) 234262306a36Sopenharmony_ci return; 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci /* Delete all controllers using this device */ 234562306a36Sopenharmony_ci mutex_lock(&nvme_rdma_ctrl_mutex); 234662306a36Sopenharmony_ci list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { 234762306a36Sopenharmony_ci if (ctrl->device->dev != ib_device) 234862306a36Sopenharmony_ci continue; 234962306a36Sopenharmony_ci nvme_delete_ctrl(&ctrl->ctrl); 235062306a36Sopenharmony_ci } 235162306a36Sopenharmony_ci mutex_unlock(&nvme_rdma_ctrl_mutex); 235262306a36Sopenharmony_ci 235362306a36Sopenharmony_ci flush_workqueue(nvme_delete_wq); 235462306a36Sopenharmony_ci} 235562306a36Sopenharmony_ci 235662306a36Sopenharmony_cistatic struct ib_client nvme_rdma_ib_client = { 235762306a36Sopenharmony_ci .name = "nvme_rdma", 235862306a36Sopenharmony_ci .remove = nvme_rdma_remove_one 235962306a36Sopenharmony_ci}; 236062306a36Sopenharmony_ci 236162306a36Sopenharmony_cistatic int __init nvme_rdma_init_module(void) 236262306a36Sopenharmony_ci{ 236362306a36Sopenharmony_ci int ret; 236462306a36Sopenharmony_ci 236562306a36Sopenharmony_ci ret = ib_register_client(&nvme_rdma_ib_client); 236662306a36Sopenharmony_ci if (ret) 236762306a36Sopenharmony_ci return ret; 236862306a36Sopenharmony_ci 236962306a36Sopenharmony_ci ret = nvmf_register_transport(&nvme_rdma_transport); 237062306a36Sopenharmony_ci if (ret) 237162306a36Sopenharmony_ci goto err_unreg_client; 237262306a36Sopenharmony_ci 237362306a36Sopenharmony_ci return 0; 237462306a36Sopenharmony_ci 237562306a36Sopenharmony_cierr_unreg_client: 237662306a36Sopenharmony_ci ib_unregister_client(&nvme_rdma_ib_client); 237762306a36Sopenharmony_ci return ret; 237862306a36Sopenharmony_ci} 237962306a36Sopenharmony_ci 238062306a36Sopenharmony_cistatic void __exit nvme_rdma_cleanup_module(void) 238162306a36Sopenharmony_ci{ 238262306a36Sopenharmony_ci struct nvme_rdma_ctrl *ctrl; 238362306a36Sopenharmony_ci 238462306a36Sopenharmony_ci nvmf_unregister_transport(&nvme_rdma_transport); 238562306a36Sopenharmony_ci ib_unregister_client(&nvme_rdma_ib_client); 238662306a36Sopenharmony_ci 238762306a36Sopenharmony_ci mutex_lock(&nvme_rdma_ctrl_mutex); 238862306a36Sopenharmony_ci list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) 238962306a36Sopenharmony_ci nvme_delete_ctrl(&ctrl->ctrl); 239062306a36Sopenharmony_ci mutex_unlock(&nvme_rdma_ctrl_mutex); 239162306a36Sopenharmony_ci flush_workqueue(nvme_delete_wq); 239262306a36Sopenharmony_ci} 239362306a36Sopenharmony_ci 239462306a36Sopenharmony_cimodule_init(nvme_rdma_init_module); 239562306a36Sopenharmony_cimodule_exit(nvme_rdma_cleanup_module); 239662306a36Sopenharmony_ci 239762306a36Sopenharmony_ciMODULE_LICENSE("GPL v2"); 2398