162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * NVMe over Fabrics RDMA target. 462306a36Sopenharmony_ci * Copyright (c) 2015-2016 HGST, a Western Digital Company. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 762306a36Sopenharmony_ci#include <linux/atomic.h> 862306a36Sopenharmony_ci#include <linux/blk-integrity.h> 962306a36Sopenharmony_ci#include <linux/ctype.h> 1062306a36Sopenharmony_ci#include <linux/delay.h> 1162306a36Sopenharmony_ci#include <linux/err.h> 1262306a36Sopenharmony_ci#include <linux/init.h> 1362306a36Sopenharmony_ci#include <linux/module.h> 1462306a36Sopenharmony_ci#include <linux/nvme.h> 1562306a36Sopenharmony_ci#include <linux/slab.h> 1662306a36Sopenharmony_ci#include <linux/string.h> 1762306a36Sopenharmony_ci#include <linux/wait.h> 1862306a36Sopenharmony_ci#include <linux/inet.h> 1962306a36Sopenharmony_ci#include <asm/unaligned.h> 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci#include <rdma/ib_verbs.h> 2262306a36Sopenharmony_ci#include <rdma/rdma_cm.h> 2362306a36Sopenharmony_ci#include <rdma/rw.h> 2462306a36Sopenharmony_ci#include <rdma/ib_cm.h> 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#include <linux/nvme-rdma.h> 2762306a36Sopenharmony_ci#include "nvmet.h" 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci/* 3062306a36Sopenharmony_ci * We allow at least 1 page, up to 4 SGEs, and up to 16KB of inline data 3162306a36Sopenharmony_ci */ 3262306a36Sopenharmony_ci#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE PAGE_SIZE 3362306a36Sopenharmony_ci#define NVMET_RDMA_MAX_INLINE_SGE 4 3462306a36Sopenharmony_ci#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE) 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci/* Assume mpsmin == device_page_size == 4KB */ 3762306a36Sopenharmony_ci#define NVMET_RDMA_MAX_MDTS 8 3862306a36Sopenharmony_ci#define NVMET_RDMA_MAX_METADATA_MDTS 5 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_cistruct nvmet_rdma_srq; 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_cistruct nvmet_rdma_cmd { 4362306a36Sopenharmony_ci struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; 4462306a36Sopenharmony_ci struct ib_cqe cqe; 4562306a36Sopenharmony_ci struct ib_recv_wr wr; 4662306a36Sopenharmony_ci struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE]; 4762306a36Sopenharmony_ci struct nvme_command *nvme_cmd; 4862306a36Sopenharmony_ci struct nvmet_rdma_queue *queue; 4962306a36Sopenharmony_ci struct nvmet_rdma_srq *nsrq; 5062306a36Sopenharmony_ci}; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_cienum { 5362306a36Sopenharmony_ci NVMET_RDMA_REQ_INLINE_DATA = (1 << 0), 5462306a36Sopenharmony_ci NVMET_RDMA_REQ_INVALIDATE_RKEY = (1 << 1), 5562306a36Sopenharmony_ci}; 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_cistruct nvmet_rdma_rsp { 5862306a36Sopenharmony_ci struct ib_sge send_sge; 5962306a36Sopenharmony_ci struct ib_cqe send_cqe; 6062306a36Sopenharmony_ci struct ib_send_wr send_wr; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci struct nvmet_rdma_cmd *cmd; 6362306a36Sopenharmony_ci struct nvmet_rdma_queue *queue; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci struct ib_cqe read_cqe; 6662306a36Sopenharmony_ci struct ib_cqe write_cqe; 6762306a36Sopenharmony_ci struct rdma_rw_ctx rw; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci struct nvmet_req req; 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci bool allocated; 7262306a36Sopenharmony_ci u8 n_rdma; 7362306a36Sopenharmony_ci u32 flags; 7462306a36Sopenharmony_ci u32 invalidate_rkey; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci struct list_head wait_list; 7762306a36Sopenharmony_ci struct list_head free_list; 7862306a36Sopenharmony_ci}; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_cienum nvmet_rdma_queue_state { 8162306a36Sopenharmony_ci NVMET_RDMA_Q_CONNECTING, 8262306a36Sopenharmony_ci NVMET_RDMA_Q_LIVE, 8362306a36Sopenharmony_ci NVMET_RDMA_Q_DISCONNECTING, 8462306a36Sopenharmony_ci}; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_cistruct nvmet_rdma_queue { 8762306a36Sopenharmony_ci struct rdma_cm_id *cm_id; 8862306a36Sopenharmony_ci struct ib_qp *qp; 8962306a36Sopenharmony_ci struct nvmet_port *port; 9062306a36Sopenharmony_ci struct ib_cq *cq; 9162306a36Sopenharmony_ci atomic_t sq_wr_avail; 9262306a36Sopenharmony_ci struct nvmet_rdma_device *dev; 9362306a36Sopenharmony_ci struct nvmet_rdma_srq *nsrq; 9462306a36Sopenharmony_ci spinlock_t state_lock; 9562306a36Sopenharmony_ci enum nvmet_rdma_queue_state state; 9662306a36Sopenharmony_ci struct nvmet_cq nvme_cq; 9762306a36Sopenharmony_ci struct nvmet_sq nvme_sq; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsps; 10062306a36Sopenharmony_ci struct list_head free_rsps; 10162306a36Sopenharmony_ci spinlock_t rsps_lock; 10262306a36Sopenharmony_ci struct nvmet_rdma_cmd *cmds; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci struct work_struct release_work; 10562306a36Sopenharmony_ci struct list_head rsp_wait_list; 10662306a36Sopenharmony_ci struct list_head rsp_wr_wait_list; 10762306a36Sopenharmony_ci spinlock_t rsp_wr_wait_lock; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci int idx; 11062306a36Sopenharmony_ci int host_qid; 11162306a36Sopenharmony_ci int comp_vector; 11262306a36Sopenharmony_ci int recv_queue_size; 11362306a36Sopenharmony_ci int send_queue_size; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci struct list_head queue_list; 11662306a36Sopenharmony_ci}; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistruct nvmet_rdma_port { 11962306a36Sopenharmony_ci struct nvmet_port *nport; 12062306a36Sopenharmony_ci struct sockaddr_storage addr; 12162306a36Sopenharmony_ci struct rdma_cm_id *cm_id; 12262306a36Sopenharmony_ci struct delayed_work repair_work; 12362306a36Sopenharmony_ci}; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_cistruct nvmet_rdma_srq { 12662306a36Sopenharmony_ci struct ib_srq *srq; 12762306a36Sopenharmony_ci struct nvmet_rdma_cmd *cmds; 12862306a36Sopenharmony_ci struct nvmet_rdma_device *ndev; 12962306a36Sopenharmony_ci}; 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_cistruct nvmet_rdma_device { 13262306a36Sopenharmony_ci struct ib_device *device; 13362306a36Sopenharmony_ci struct ib_pd *pd; 13462306a36Sopenharmony_ci struct nvmet_rdma_srq **srqs; 13562306a36Sopenharmony_ci int srq_count; 13662306a36Sopenharmony_ci size_t srq_size; 13762306a36Sopenharmony_ci struct kref ref; 13862306a36Sopenharmony_ci struct list_head entry; 13962306a36Sopenharmony_ci int inline_data_size; 14062306a36Sopenharmony_ci int inline_page_count; 14162306a36Sopenharmony_ci}; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_cistatic bool nvmet_rdma_use_srq; 14462306a36Sopenharmony_cimodule_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); 14562306a36Sopenharmony_ciMODULE_PARM_DESC(use_srq, "Use shared receive queue."); 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_cistatic int srq_size_set(const char *val, const struct kernel_param *kp); 14862306a36Sopenharmony_cistatic const struct kernel_param_ops srq_size_ops = { 14962306a36Sopenharmony_ci .set = srq_size_set, 15062306a36Sopenharmony_ci .get = param_get_int, 15162306a36Sopenharmony_ci}; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_cistatic int nvmet_rdma_srq_size = 1024; 15462306a36Sopenharmony_cimodule_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644); 15562306a36Sopenharmony_ciMODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)"); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_cistatic DEFINE_IDA(nvmet_rdma_queue_ida); 15862306a36Sopenharmony_cistatic LIST_HEAD(nvmet_rdma_queue_list); 15962306a36Sopenharmony_cistatic DEFINE_MUTEX(nvmet_rdma_queue_mutex); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_cistatic LIST_HEAD(device_list); 16262306a36Sopenharmony_cistatic DEFINE_MUTEX(device_list_mutex); 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_cistatic bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp); 16562306a36Sopenharmony_cistatic void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc); 16662306a36Sopenharmony_cistatic void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); 16762306a36Sopenharmony_cistatic void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); 16862306a36Sopenharmony_cistatic void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc); 16962306a36Sopenharmony_cistatic void nvmet_rdma_qp_event(struct ib_event *event, void *priv); 17062306a36Sopenharmony_cistatic void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); 17162306a36Sopenharmony_cistatic void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, 17262306a36Sopenharmony_ci struct nvmet_rdma_rsp *r); 17362306a36Sopenharmony_cistatic int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, 17462306a36Sopenharmony_ci struct nvmet_rdma_rsp *r); 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_cistatic const struct nvmet_fabrics_ops nvmet_rdma_ops; 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_cistatic int srq_size_set(const char *val, const struct kernel_param *kp) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci int n = 0, ret; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci ret = kstrtoint(val, 10, &n); 18362306a36Sopenharmony_ci if (ret != 0 || n < 256) 18462306a36Sopenharmony_ci return -EINVAL; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci return param_set_int(val, kp); 18762306a36Sopenharmony_ci} 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_cistatic int num_pages(int len) 19062306a36Sopenharmony_ci{ 19162306a36Sopenharmony_ci return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT); 19262306a36Sopenharmony_ci} 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_cistatic inline bool nvmet_rdma_need_data_in(struct nvmet_rdma_rsp *rsp) 19562306a36Sopenharmony_ci{ 19662306a36Sopenharmony_ci return nvme_is_write(rsp->req.cmd) && 19762306a36Sopenharmony_ci rsp->req.transfer_len && 19862306a36Sopenharmony_ci !(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA); 19962306a36Sopenharmony_ci} 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_cistatic inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci return !nvme_is_write(rsp->req.cmd) && 20462306a36Sopenharmony_ci rsp->req.transfer_len && 20562306a36Sopenharmony_ci !rsp->req.cqe->status && 20662306a36Sopenharmony_ci !(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA); 20762306a36Sopenharmony_ci} 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_cistatic inline struct nvmet_rdma_rsp * 21062306a36Sopenharmony_cinvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp; 21362306a36Sopenharmony_ci unsigned long flags; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci spin_lock_irqsave(&queue->rsps_lock, flags); 21662306a36Sopenharmony_ci rsp = list_first_entry_or_null(&queue->free_rsps, 21762306a36Sopenharmony_ci struct nvmet_rdma_rsp, free_list); 21862306a36Sopenharmony_ci if (likely(rsp)) 21962306a36Sopenharmony_ci list_del(&rsp->free_list); 22062306a36Sopenharmony_ci spin_unlock_irqrestore(&queue->rsps_lock, flags); 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci if (unlikely(!rsp)) { 22362306a36Sopenharmony_ci int ret; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); 22662306a36Sopenharmony_ci if (unlikely(!rsp)) 22762306a36Sopenharmony_ci return NULL; 22862306a36Sopenharmony_ci ret = nvmet_rdma_alloc_rsp(queue->dev, rsp); 22962306a36Sopenharmony_ci if (unlikely(ret)) { 23062306a36Sopenharmony_ci kfree(rsp); 23162306a36Sopenharmony_ci return NULL; 23262306a36Sopenharmony_ci } 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci rsp->allocated = true; 23562306a36Sopenharmony_ci } 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci return rsp; 23862306a36Sopenharmony_ci} 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_cistatic inline void 24162306a36Sopenharmony_cinvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci unsigned long flags; 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci if (unlikely(rsp->allocated)) { 24662306a36Sopenharmony_ci nvmet_rdma_free_rsp(rsp->queue->dev, rsp); 24762306a36Sopenharmony_ci kfree(rsp); 24862306a36Sopenharmony_ci return; 24962306a36Sopenharmony_ci } 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci spin_lock_irqsave(&rsp->queue->rsps_lock, flags); 25262306a36Sopenharmony_ci list_add_tail(&rsp->free_list, &rsp->queue->free_rsps); 25362306a36Sopenharmony_ci spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags); 25462306a36Sopenharmony_ci} 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_cistatic void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev, 25762306a36Sopenharmony_ci struct nvmet_rdma_cmd *c) 25862306a36Sopenharmony_ci{ 25962306a36Sopenharmony_ci struct scatterlist *sg; 26062306a36Sopenharmony_ci struct ib_sge *sge; 26162306a36Sopenharmony_ci int i; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci if (!ndev->inline_data_size) 26462306a36Sopenharmony_ci return; 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci sg = c->inline_sg; 26762306a36Sopenharmony_ci sge = &c->sge[1]; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) { 27062306a36Sopenharmony_ci if (sge->length) 27162306a36Sopenharmony_ci ib_dma_unmap_page(ndev->device, sge->addr, 27262306a36Sopenharmony_ci sge->length, DMA_FROM_DEVICE); 27362306a36Sopenharmony_ci if (sg_page(sg)) 27462306a36Sopenharmony_ci __free_page(sg_page(sg)); 27562306a36Sopenharmony_ci } 27662306a36Sopenharmony_ci} 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_cistatic int nvmet_rdma_alloc_inline_pages(struct nvmet_rdma_device *ndev, 27962306a36Sopenharmony_ci struct nvmet_rdma_cmd *c) 28062306a36Sopenharmony_ci{ 28162306a36Sopenharmony_ci struct scatterlist *sg; 28262306a36Sopenharmony_ci struct ib_sge *sge; 28362306a36Sopenharmony_ci struct page *pg; 28462306a36Sopenharmony_ci int len; 28562306a36Sopenharmony_ci int i; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci if (!ndev->inline_data_size) 28862306a36Sopenharmony_ci return 0; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci sg = c->inline_sg; 29162306a36Sopenharmony_ci sg_init_table(sg, ndev->inline_page_count); 29262306a36Sopenharmony_ci sge = &c->sge[1]; 29362306a36Sopenharmony_ci len = ndev->inline_data_size; 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) { 29662306a36Sopenharmony_ci pg = alloc_page(GFP_KERNEL); 29762306a36Sopenharmony_ci if (!pg) 29862306a36Sopenharmony_ci goto out_err; 29962306a36Sopenharmony_ci sg_assign_page(sg, pg); 30062306a36Sopenharmony_ci sge->addr = ib_dma_map_page(ndev->device, 30162306a36Sopenharmony_ci pg, 0, PAGE_SIZE, DMA_FROM_DEVICE); 30262306a36Sopenharmony_ci if (ib_dma_mapping_error(ndev->device, sge->addr)) 30362306a36Sopenharmony_ci goto out_err; 30462306a36Sopenharmony_ci sge->length = min_t(int, len, PAGE_SIZE); 30562306a36Sopenharmony_ci sge->lkey = ndev->pd->local_dma_lkey; 30662306a36Sopenharmony_ci len -= sge->length; 30762306a36Sopenharmony_ci } 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci return 0; 31062306a36Sopenharmony_ciout_err: 31162306a36Sopenharmony_ci for (; i >= 0; i--, sg--, sge--) { 31262306a36Sopenharmony_ci if (sge->length) 31362306a36Sopenharmony_ci ib_dma_unmap_page(ndev->device, sge->addr, 31462306a36Sopenharmony_ci sge->length, DMA_FROM_DEVICE); 31562306a36Sopenharmony_ci if (sg_page(sg)) 31662306a36Sopenharmony_ci __free_page(sg_page(sg)); 31762306a36Sopenharmony_ci } 31862306a36Sopenharmony_ci return -ENOMEM; 31962306a36Sopenharmony_ci} 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_cistatic int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev, 32262306a36Sopenharmony_ci struct nvmet_rdma_cmd *c, bool admin) 32362306a36Sopenharmony_ci{ 32462306a36Sopenharmony_ci /* NVMe command / RDMA RECV */ 32562306a36Sopenharmony_ci c->nvme_cmd = kmalloc(sizeof(*c->nvme_cmd), GFP_KERNEL); 32662306a36Sopenharmony_ci if (!c->nvme_cmd) 32762306a36Sopenharmony_ci goto out; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci c->sge[0].addr = ib_dma_map_single(ndev->device, c->nvme_cmd, 33062306a36Sopenharmony_ci sizeof(*c->nvme_cmd), DMA_FROM_DEVICE); 33162306a36Sopenharmony_ci if (ib_dma_mapping_error(ndev->device, c->sge[0].addr)) 33262306a36Sopenharmony_ci goto out_free_cmd; 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci c->sge[0].length = sizeof(*c->nvme_cmd); 33562306a36Sopenharmony_ci c->sge[0].lkey = ndev->pd->local_dma_lkey; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci if (!admin && nvmet_rdma_alloc_inline_pages(ndev, c)) 33862306a36Sopenharmony_ci goto out_unmap_cmd; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci c->cqe.done = nvmet_rdma_recv_done; 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci c->wr.wr_cqe = &c->cqe; 34362306a36Sopenharmony_ci c->wr.sg_list = c->sge; 34462306a36Sopenharmony_ci c->wr.num_sge = admin ? 1 : ndev->inline_page_count + 1; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci return 0; 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ciout_unmap_cmd: 34962306a36Sopenharmony_ci ib_dma_unmap_single(ndev->device, c->sge[0].addr, 35062306a36Sopenharmony_ci sizeof(*c->nvme_cmd), DMA_FROM_DEVICE); 35162306a36Sopenharmony_ciout_free_cmd: 35262306a36Sopenharmony_ci kfree(c->nvme_cmd); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ciout: 35562306a36Sopenharmony_ci return -ENOMEM; 35662306a36Sopenharmony_ci} 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_cistatic void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev, 35962306a36Sopenharmony_ci struct nvmet_rdma_cmd *c, bool admin) 36062306a36Sopenharmony_ci{ 36162306a36Sopenharmony_ci if (!admin) 36262306a36Sopenharmony_ci nvmet_rdma_free_inline_pages(ndev, c); 36362306a36Sopenharmony_ci ib_dma_unmap_single(ndev->device, c->sge[0].addr, 36462306a36Sopenharmony_ci sizeof(*c->nvme_cmd), DMA_FROM_DEVICE); 36562306a36Sopenharmony_ci kfree(c->nvme_cmd); 36662306a36Sopenharmony_ci} 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_cistatic struct nvmet_rdma_cmd * 36962306a36Sopenharmony_cinvmet_rdma_alloc_cmds(struct nvmet_rdma_device *ndev, 37062306a36Sopenharmony_ci int nr_cmds, bool admin) 37162306a36Sopenharmony_ci{ 37262306a36Sopenharmony_ci struct nvmet_rdma_cmd *cmds; 37362306a36Sopenharmony_ci int ret = -EINVAL, i; 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci cmds = kcalloc(nr_cmds, sizeof(struct nvmet_rdma_cmd), GFP_KERNEL); 37662306a36Sopenharmony_ci if (!cmds) 37762306a36Sopenharmony_ci goto out; 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci for (i = 0; i < nr_cmds; i++) { 38062306a36Sopenharmony_ci ret = nvmet_rdma_alloc_cmd(ndev, cmds + i, admin); 38162306a36Sopenharmony_ci if (ret) 38262306a36Sopenharmony_ci goto out_free; 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci return cmds; 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ciout_free: 38862306a36Sopenharmony_ci while (--i >= 0) 38962306a36Sopenharmony_ci nvmet_rdma_free_cmd(ndev, cmds + i, admin); 39062306a36Sopenharmony_ci kfree(cmds); 39162306a36Sopenharmony_ciout: 39262306a36Sopenharmony_ci return ERR_PTR(ret); 39362306a36Sopenharmony_ci} 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_cistatic void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev, 39662306a36Sopenharmony_ci struct nvmet_rdma_cmd *cmds, int nr_cmds, bool admin) 39762306a36Sopenharmony_ci{ 39862306a36Sopenharmony_ci int i; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci for (i = 0; i < nr_cmds; i++) 40162306a36Sopenharmony_ci nvmet_rdma_free_cmd(ndev, cmds + i, admin); 40262306a36Sopenharmony_ci kfree(cmds); 40362306a36Sopenharmony_ci} 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_cistatic int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, 40662306a36Sopenharmony_ci struct nvmet_rdma_rsp *r) 40762306a36Sopenharmony_ci{ 40862306a36Sopenharmony_ci /* NVMe CQE / RDMA SEND */ 40962306a36Sopenharmony_ci r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL); 41062306a36Sopenharmony_ci if (!r->req.cqe) 41162306a36Sopenharmony_ci goto out; 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.cqe, 41462306a36Sopenharmony_ci sizeof(*r->req.cqe), DMA_TO_DEVICE); 41562306a36Sopenharmony_ci if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) 41662306a36Sopenharmony_ci goto out_free_rsp; 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci if (ib_dma_pci_p2p_dma_supported(ndev->device)) 41962306a36Sopenharmony_ci r->req.p2p_client = &ndev->device->dev; 42062306a36Sopenharmony_ci r->send_sge.length = sizeof(*r->req.cqe); 42162306a36Sopenharmony_ci r->send_sge.lkey = ndev->pd->local_dma_lkey; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci r->send_cqe.done = nvmet_rdma_send_done; 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci r->send_wr.wr_cqe = &r->send_cqe; 42662306a36Sopenharmony_ci r->send_wr.sg_list = &r->send_sge; 42762306a36Sopenharmony_ci r->send_wr.num_sge = 1; 42862306a36Sopenharmony_ci r->send_wr.send_flags = IB_SEND_SIGNALED; 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci /* Data In / RDMA READ */ 43162306a36Sopenharmony_ci r->read_cqe.done = nvmet_rdma_read_data_done; 43262306a36Sopenharmony_ci /* Data Out / RDMA WRITE */ 43362306a36Sopenharmony_ci r->write_cqe.done = nvmet_rdma_write_data_done; 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci return 0; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ciout_free_rsp: 43862306a36Sopenharmony_ci kfree(r->req.cqe); 43962306a36Sopenharmony_ciout: 44062306a36Sopenharmony_ci return -ENOMEM; 44162306a36Sopenharmony_ci} 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_cistatic void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, 44462306a36Sopenharmony_ci struct nvmet_rdma_rsp *r) 44562306a36Sopenharmony_ci{ 44662306a36Sopenharmony_ci ib_dma_unmap_single(ndev->device, r->send_sge.addr, 44762306a36Sopenharmony_ci sizeof(*r->req.cqe), DMA_TO_DEVICE); 44862306a36Sopenharmony_ci kfree(r->req.cqe); 44962306a36Sopenharmony_ci} 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_cistatic int 45262306a36Sopenharmony_cinvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue) 45362306a36Sopenharmony_ci{ 45462306a36Sopenharmony_ci struct nvmet_rdma_device *ndev = queue->dev; 45562306a36Sopenharmony_ci int nr_rsps = queue->recv_queue_size * 2; 45662306a36Sopenharmony_ci int ret = -EINVAL, i; 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp), 45962306a36Sopenharmony_ci GFP_KERNEL); 46062306a36Sopenharmony_ci if (!queue->rsps) 46162306a36Sopenharmony_ci goto out; 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci for (i = 0; i < nr_rsps; i++) { 46462306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp = &queue->rsps[i]; 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci ret = nvmet_rdma_alloc_rsp(ndev, rsp); 46762306a36Sopenharmony_ci if (ret) 46862306a36Sopenharmony_ci goto out_free; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci list_add_tail(&rsp->free_list, &queue->free_rsps); 47162306a36Sopenharmony_ci } 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci return 0; 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ciout_free: 47662306a36Sopenharmony_ci while (--i >= 0) { 47762306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp = &queue->rsps[i]; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci list_del(&rsp->free_list); 48062306a36Sopenharmony_ci nvmet_rdma_free_rsp(ndev, rsp); 48162306a36Sopenharmony_ci } 48262306a36Sopenharmony_ci kfree(queue->rsps); 48362306a36Sopenharmony_ciout: 48462306a36Sopenharmony_ci return ret; 48562306a36Sopenharmony_ci} 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_cistatic void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue) 48862306a36Sopenharmony_ci{ 48962306a36Sopenharmony_ci struct nvmet_rdma_device *ndev = queue->dev; 49062306a36Sopenharmony_ci int i, nr_rsps = queue->recv_queue_size * 2; 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci for (i = 0; i < nr_rsps; i++) { 49362306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp = &queue->rsps[i]; 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci list_del(&rsp->free_list); 49662306a36Sopenharmony_ci nvmet_rdma_free_rsp(ndev, rsp); 49762306a36Sopenharmony_ci } 49862306a36Sopenharmony_ci kfree(queue->rsps); 49962306a36Sopenharmony_ci} 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_cistatic int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, 50262306a36Sopenharmony_ci struct nvmet_rdma_cmd *cmd) 50362306a36Sopenharmony_ci{ 50462306a36Sopenharmony_ci int ret; 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci ib_dma_sync_single_for_device(ndev->device, 50762306a36Sopenharmony_ci cmd->sge[0].addr, cmd->sge[0].length, 50862306a36Sopenharmony_ci DMA_FROM_DEVICE); 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci if (cmd->nsrq) 51162306a36Sopenharmony_ci ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL); 51262306a36Sopenharmony_ci else 51362306a36Sopenharmony_ci ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci if (unlikely(ret)) 51662306a36Sopenharmony_ci pr_err("post_recv cmd failed\n"); 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci return ret; 51962306a36Sopenharmony_ci} 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_cistatic void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue) 52262306a36Sopenharmony_ci{ 52362306a36Sopenharmony_ci spin_lock(&queue->rsp_wr_wait_lock); 52462306a36Sopenharmony_ci while (!list_empty(&queue->rsp_wr_wait_list)) { 52562306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp; 52662306a36Sopenharmony_ci bool ret; 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci rsp = list_entry(queue->rsp_wr_wait_list.next, 52962306a36Sopenharmony_ci struct nvmet_rdma_rsp, wait_list); 53062306a36Sopenharmony_ci list_del(&rsp->wait_list); 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci spin_unlock(&queue->rsp_wr_wait_lock); 53362306a36Sopenharmony_ci ret = nvmet_rdma_execute_command(rsp); 53462306a36Sopenharmony_ci spin_lock(&queue->rsp_wr_wait_lock); 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci if (!ret) { 53762306a36Sopenharmony_ci list_add(&rsp->wait_list, &queue->rsp_wr_wait_list); 53862306a36Sopenharmony_ci break; 53962306a36Sopenharmony_ci } 54062306a36Sopenharmony_ci } 54162306a36Sopenharmony_ci spin_unlock(&queue->rsp_wr_wait_lock); 54262306a36Sopenharmony_ci} 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_cistatic u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr) 54562306a36Sopenharmony_ci{ 54662306a36Sopenharmony_ci struct ib_mr_status mr_status; 54762306a36Sopenharmony_ci int ret; 54862306a36Sopenharmony_ci u16 status = 0; 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); 55162306a36Sopenharmony_ci if (ret) { 55262306a36Sopenharmony_ci pr_err("ib_check_mr_status failed, ret %d\n", ret); 55362306a36Sopenharmony_ci return NVME_SC_INVALID_PI; 55462306a36Sopenharmony_ci } 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { 55762306a36Sopenharmony_ci switch (mr_status.sig_err.err_type) { 55862306a36Sopenharmony_ci case IB_SIG_BAD_GUARD: 55962306a36Sopenharmony_ci status = NVME_SC_GUARD_CHECK; 56062306a36Sopenharmony_ci break; 56162306a36Sopenharmony_ci case IB_SIG_BAD_REFTAG: 56262306a36Sopenharmony_ci status = NVME_SC_REFTAG_CHECK; 56362306a36Sopenharmony_ci break; 56462306a36Sopenharmony_ci case IB_SIG_BAD_APPTAG: 56562306a36Sopenharmony_ci status = NVME_SC_APPTAG_CHECK; 56662306a36Sopenharmony_ci break; 56762306a36Sopenharmony_ci } 56862306a36Sopenharmony_ci pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", 56962306a36Sopenharmony_ci mr_status.sig_err.err_type, 57062306a36Sopenharmony_ci mr_status.sig_err.expected, 57162306a36Sopenharmony_ci mr_status.sig_err.actual); 57262306a36Sopenharmony_ci } 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci return status; 57562306a36Sopenharmony_ci} 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_cistatic void nvmet_rdma_set_sig_domain(struct blk_integrity *bi, 57862306a36Sopenharmony_ci struct nvme_command *cmd, struct ib_sig_domain *domain, 57962306a36Sopenharmony_ci u16 control, u8 pi_type) 58062306a36Sopenharmony_ci{ 58162306a36Sopenharmony_ci domain->sig_type = IB_SIG_TYPE_T10_DIF; 58262306a36Sopenharmony_ci domain->sig.dif.bg_type = IB_T10DIF_CRC; 58362306a36Sopenharmony_ci domain->sig.dif.pi_interval = 1 << bi->interval_exp; 58462306a36Sopenharmony_ci domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); 58562306a36Sopenharmony_ci if (control & NVME_RW_PRINFO_PRCHK_REF) 58662306a36Sopenharmony_ci domain->sig.dif.ref_remap = true; 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_ci domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); 58962306a36Sopenharmony_ci domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); 59062306a36Sopenharmony_ci domain->sig.dif.app_escape = true; 59162306a36Sopenharmony_ci if (pi_type == NVME_NS_DPS_PI_TYPE3) 59262306a36Sopenharmony_ci domain->sig.dif.ref_escape = true; 59362306a36Sopenharmony_ci} 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_cistatic void nvmet_rdma_set_sig_attrs(struct nvmet_req *req, 59662306a36Sopenharmony_ci struct ib_sig_attrs *sig_attrs) 59762306a36Sopenharmony_ci{ 59862306a36Sopenharmony_ci struct nvme_command *cmd = req->cmd; 59962306a36Sopenharmony_ci u16 control = le16_to_cpu(cmd->rw.control); 60062306a36Sopenharmony_ci u8 pi_type = req->ns->pi_type; 60162306a36Sopenharmony_ci struct blk_integrity *bi; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci bi = bdev_get_integrity(req->ns->bdev); 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci memset(sig_attrs, 0, sizeof(*sig_attrs)); 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci if (control & NVME_RW_PRINFO_PRACT) { 60862306a36Sopenharmony_ci /* for WRITE_INSERT/READ_STRIP no wire domain */ 60962306a36Sopenharmony_ci sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; 61062306a36Sopenharmony_ci nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, 61162306a36Sopenharmony_ci pi_type); 61262306a36Sopenharmony_ci /* Clear the PRACT bit since HCA will generate/verify the PI */ 61362306a36Sopenharmony_ci control &= ~NVME_RW_PRINFO_PRACT; 61462306a36Sopenharmony_ci cmd->rw.control = cpu_to_le16(control); 61562306a36Sopenharmony_ci /* PI is added by the HW */ 61662306a36Sopenharmony_ci req->transfer_len += req->metadata_len; 61762306a36Sopenharmony_ci } else { 61862306a36Sopenharmony_ci /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ 61962306a36Sopenharmony_ci nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, 62062306a36Sopenharmony_ci pi_type); 62162306a36Sopenharmony_ci nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, 62262306a36Sopenharmony_ci pi_type); 62362306a36Sopenharmony_ci } 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci if (control & NVME_RW_PRINFO_PRCHK_REF) 62662306a36Sopenharmony_ci sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG; 62762306a36Sopenharmony_ci if (control & NVME_RW_PRINFO_PRCHK_GUARD) 62862306a36Sopenharmony_ci sig_attrs->check_mask |= IB_SIG_CHECK_GUARD; 62962306a36Sopenharmony_ci if (control & NVME_RW_PRINFO_PRCHK_APP) 63062306a36Sopenharmony_ci sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG; 63162306a36Sopenharmony_ci} 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_cistatic int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key, 63462306a36Sopenharmony_ci struct ib_sig_attrs *sig_attrs) 63562306a36Sopenharmony_ci{ 63662306a36Sopenharmony_ci struct rdma_cm_id *cm_id = rsp->queue->cm_id; 63762306a36Sopenharmony_ci struct nvmet_req *req = &rsp->req; 63862306a36Sopenharmony_ci int ret; 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci if (req->metadata_len) 64162306a36Sopenharmony_ci ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp, 64262306a36Sopenharmony_ci cm_id->port_num, req->sg, req->sg_cnt, 64362306a36Sopenharmony_ci req->metadata_sg, req->metadata_sg_cnt, sig_attrs, 64462306a36Sopenharmony_ci addr, key, nvmet_data_dir(req)); 64562306a36Sopenharmony_ci else 64662306a36Sopenharmony_ci ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, 64762306a36Sopenharmony_ci req->sg, req->sg_cnt, 0, addr, key, 64862306a36Sopenharmony_ci nvmet_data_dir(req)); 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci return ret; 65162306a36Sopenharmony_ci} 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_cistatic void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp) 65462306a36Sopenharmony_ci{ 65562306a36Sopenharmony_ci struct rdma_cm_id *cm_id = rsp->queue->cm_id; 65662306a36Sopenharmony_ci struct nvmet_req *req = &rsp->req; 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci if (req->metadata_len) 65962306a36Sopenharmony_ci rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp, 66062306a36Sopenharmony_ci cm_id->port_num, req->sg, req->sg_cnt, 66162306a36Sopenharmony_ci req->metadata_sg, req->metadata_sg_cnt, 66262306a36Sopenharmony_ci nvmet_data_dir(req)); 66362306a36Sopenharmony_ci else 66462306a36Sopenharmony_ci rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num, 66562306a36Sopenharmony_ci req->sg, req->sg_cnt, nvmet_data_dir(req)); 66662306a36Sopenharmony_ci} 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_cistatic void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) 66962306a36Sopenharmony_ci{ 67062306a36Sopenharmony_ci struct nvmet_rdma_queue *queue = rsp->queue; 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci if (rsp->n_rdma) 67562306a36Sopenharmony_ci nvmet_rdma_rw_ctx_destroy(rsp); 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci if (rsp->req.sg != rsp->cmd->inline_sg) 67862306a36Sopenharmony_ci nvmet_req_free_sgls(&rsp->req); 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ci if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list))) 68162306a36Sopenharmony_ci nvmet_rdma_process_wr_wait_list(queue); 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_ci nvmet_rdma_put_rsp(rsp); 68462306a36Sopenharmony_ci} 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_cistatic void nvmet_rdma_error_comp(struct nvmet_rdma_queue *queue) 68762306a36Sopenharmony_ci{ 68862306a36Sopenharmony_ci if (queue->nvme_sq.ctrl) { 68962306a36Sopenharmony_ci nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl); 69062306a36Sopenharmony_ci } else { 69162306a36Sopenharmony_ci /* 69262306a36Sopenharmony_ci * we didn't setup the controller yet in case 69362306a36Sopenharmony_ci * of admin connect error, just disconnect and 69462306a36Sopenharmony_ci * cleanup the queue 69562306a36Sopenharmony_ci */ 69662306a36Sopenharmony_ci nvmet_rdma_queue_disconnect(queue); 69762306a36Sopenharmony_ci } 69862306a36Sopenharmony_ci} 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_cistatic void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) 70162306a36Sopenharmony_ci{ 70262306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp = 70362306a36Sopenharmony_ci container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); 70462306a36Sopenharmony_ci struct nvmet_rdma_queue *queue = wc->qp->qp_context; 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci nvmet_rdma_release_rsp(rsp); 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS && 70962306a36Sopenharmony_ci wc->status != IB_WC_WR_FLUSH_ERR)) { 71062306a36Sopenharmony_ci pr_err("SEND for CQE 0x%p failed with status %s (%d).\n", 71162306a36Sopenharmony_ci wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status); 71262306a36Sopenharmony_ci nvmet_rdma_error_comp(queue); 71362306a36Sopenharmony_ci } 71462306a36Sopenharmony_ci} 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_cistatic void nvmet_rdma_queue_response(struct nvmet_req *req) 71762306a36Sopenharmony_ci{ 71862306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp = 71962306a36Sopenharmony_ci container_of(req, struct nvmet_rdma_rsp, req); 72062306a36Sopenharmony_ci struct rdma_cm_id *cm_id = rsp->queue->cm_id; 72162306a36Sopenharmony_ci struct ib_send_wr *first_wr; 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) { 72462306a36Sopenharmony_ci rsp->send_wr.opcode = IB_WR_SEND_WITH_INV; 72562306a36Sopenharmony_ci rsp->send_wr.ex.invalidate_rkey = rsp->invalidate_rkey; 72662306a36Sopenharmony_ci } else { 72762306a36Sopenharmony_ci rsp->send_wr.opcode = IB_WR_SEND; 72862306a36Sopenharmony_ci } 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci if (nvmet_rdma_need_data_out(rsp)) { 73162306a36Sopenharmony_ci if (rsp->req.metadata_len) 73262306a36Sopenharmony_ci first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, 73362306a36Sopenharmony_ci cm_id->port_num, &rsp->write_cqe, NULL); 73462306a36Sopenharmony_ci else 73562306a36Sopenharmony_ci first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, 73662306a36Sopenharmony_ci cm_id->port_num, NULL, &rsp->send_wr); 73762306a36Sopenharmony_ci } else { 73862306a36Sopenharmony_ci first_wr = &rsp->send_wr; 73962306a36Sopenharmony_ci } 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci ib_dma_sync_single_for_device(rsp->queue->dev->device, 74462306a36Sopenharmony_ci rsp->send_sge.addr, rsp->send_sge.length, 74562306a36Sopenharmony_ci DMA_TO_DEVICE); 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) { 74862306a36Sopenharmony_ci pr_err("sending cmd response failed\n"); 74962306a36Sopenharmony_ci nvmet_rdma_release_rsp(rsp); 75062306a36Sopenharmony_ci } 75162306a36Sopenharmony_ci} 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_cistatic void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) 75462306a36Sopenharmony_ci{ 75562306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp = 75662306a36Sopenharmony_ci container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe); 75762306a36Sopenharmony_ci struct nvmet_rdma_queue *queue = wc->qp->qp_context; 75862306a36Sopenharmony_ci u16 status = 0; 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ci WARN_ON(rsp->n_rdma <= 0); 76162306a36Sopenharmony_ci atomic_add(rsp->n_rdma, &queue->sq_wr_avail); 76262306a36Sopenharmony_ci rsp->n_rdma = 0; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) { 76562306a36Sopenharmony_ci nvmet_rdma_rw_ctx_destroy(rsp); 76662306a36Sopenharmony_ci nvmet_req_uninit(&rsp->req); 76762306a36Sopenharmony_ci nvmet_rdma_release_rsp(rsp); 76862306a36Sopenharmony_ci if (wc->status != IB_WC_WR_FLUSH_ERR) { 76962306a36Sopenharmony_ci pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n", 77062306a36Sopenharmony_ci wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status); 77162306a36Sopenharmony_ci nvmet_rdma_error_comp(queue); 77262306a36Sopenharmony_ci } 77362306a36Sopenharmony_ci return; 77462306a36Sopenharmony_ci } 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci if (rsp->req.metadata_len) 77762306a36Sopenharmony_ci status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); 77862306a36Sopenharmony_ci nvmet_rdma_rw_ctx_destroy(rsp); 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_ci if (unlikely(status)) 78162306a36Sopenharmony_ci nvmet_req_complete(&rsp->req, status); 78262306a36Sopenharmony_ci else 78362306a36Sopenharmony_ci rsp->req.execute(&rsp->req); 78462306a36Sopenharmony_ci} 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_cistatic void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) 78762306a36Sopenharmony_ci{ 78862306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp = 78962306a36Sopenharmony_ci container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe); 79062306a36Sopenharmony_ci struct nvmet_rdma_queue *queue = wc->qp->qp_context; 79162306a36Sopenharmony_ci struct rdma_cm_id *cm_id = rsp->queue->cm_id; 79262306a36Sopenharmony_ci u16 status; 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) 79562306a36Sopenharmony_ci return; 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci WARN_ON(rsp->n_rdma <= 0); 79862306a36Sopenharmony_ci atomic_add(rsp->n_rdma, &queue->sq_wr_avail); 79962306a36Sopenharmony_ci rsp->n_rdma = 0; 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) { 80262306a36Sopenharmony_ci nvmet_rdma_rw_ctx_destroy(rsp); 80362306a36Sopenharmony_ci nvmet_req_uninit(&rsp->req); 80462306a36Sopenharmony_ci nvmet_rdma_release_rsp(rsp); 80562306a36Sopenharmony_ci if (wc->status != IB_WC_WR_FLUSH_ERR) { 80662306a36Sopenharmony_ci pr_info("RDMA WRITE for CQE failed with status %s (%d).\n", 80762306a36Sopenharmony_ci ib_wc_status_msg(wc->status), wc->status); 80862306a36Sopenharmony_ci nvmet_rdma_error_comp(queue); 80962306a36Sopenharmony_ci } 81062306a36Sopenharmony_ci return; 81162306a36Sopenharmony_ci } 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci /* 81462306a36Sopenharmony_ci * Upon RDMA completion check the signature status 81562306a36Sopenharmony_ci * - if succeeded send good NVMe response 81662306a36Sopenharmony_ci * - if failed send bad NVMe response with appropriate error 81762306a36Sopenharmony_ci */ 81862306a36Sopenharmony_ci status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); 81962306a36Sopenharmony_ci if (unlikely(status)) 82062306a36Sopenharmony_ci rsp->req.cqe->status = cpu_to_le16(status << 1); 82162306a36Sopenharmony_ci nvmet_rdma_rw_ctx_destroy(rsp); 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) { 82462306a36Sopenharmony_ci pr_err("sending cmd response failed\n"); 82562306a36Sopenharmony_ci nvmet_rdma_release_rsp(rsp); 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci} 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_cistatic void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, 83062306a36Sopenharmony_ci u64 off) 83162306a36Sopenharmony_ci{ 83262306a36Sopenharmony_ci int sg_count = num_pages(len); 83362306a36Sopenharmony_ci struct scatterlist *sg; 83462306a36Sopenharmony_ci int i; 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci sg = rsp->cmd->inline_sg; 83762306a36Sopenharmony_ci for (i = 0; i < sg_count; i++, sg++) { 83862306a36Sopenharmony_ci if (i < sg_count - 1) 83962306a36Sopenharmony_ci sg_unmark_end(sg); 84062306a36Sopenharmony_ci else 84162306a36Sopenharmony_ci sg_mark_end(sg); 84262306a36Sopenharmony_ci sg->offset = off; 84362306a36Sopenharmony_ci sg->length = min_t(int, len, PAGE_SIZE - off); 84462306a36Sopenharmony_ci len -= sg->length; 84562306a36Sopenharmony_ci if (!i) 84662306a36Sopenharmony_ci off = 0; 84762306a36Sopenharmony_ci } 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci rsp->req.sg = rsp->cmd->inline_sg; 85062306a36Sopenharmony_ci rsp->req.sg_cnt = sg_count; 85162306a36Sopenharmony_ci} 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_cistatic u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) 85462306a36Sopenharmony_ci{ 85562306a36Sopenharmony_ci struct nvme_sgl_desc *sgl = &rsp->req.cmd->common.dptr.sgl; 85662306a36Sopenharmony_ci u64 off = le64_to_cpu(sgl->addr); 85762306a36Sopenharmony_ci u32 len = le32_to_cpu(sgl->length); 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci if (!nvme_is_write(rsp->req.cmd)) { 86062306a36Sopenharmony_ci rsp->req.error_loc = 86162306a36Sopenharmony_ci offsetof(struct nvme_common_command, opcode); 86262306a36Sopenharmony_ci return NVME_SC_INVALID_FIELD | NVME_SC_DNR; 86362306a36Sopenharmony_ci } 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci if (off + len > rsp->queue->dev->inline_data_size) { 86662306a36Sopenharmony_ci pr_err("invalid inline data offset!\n"); 86762306a36Sopenharmony_ci return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR; 86862306a36Sopenharmony_ci } 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci /* no data command? */ 87162306a36Sopenharmony_ci if (!len) 87262306a36Sopenharmony_ci return 0; 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ci nvmet_rdma_use_inline_sg(rsp, len, off); 87562306a36Sopenharmony_ci rsp->flags |= NVMET_RDMA_REQ_INLINE_DATA; 87662306a36Sopenharmony_ci rsp->req.transfer_len += len; 87762306a36Sopenharmony_ci return 0; 87862306a36Sopenharmony_ci} 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_cistatic u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, 88162306a36Sopenharmony_ci struct nvme_keyed_sgl_desc *sgl, bool invalidate) 88262306a36Sopenharmony_ci{ 88362306a36Sopenharmony_ci u64 addr = le64_to_cpu(sgl->addr); 88462306a36Sopenharmony_ci u32 key = get_unaligned_le32(sgl->key); 88562306a36Sopenharmony_ci struct ib_sig_attrs sig_attrs; 88662306a36Sopenharmony_ci int ret; 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci rsp->req.transfer_len = get_unaligned_le24(sgl->length); 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci /* no data command? */ 89162306a36Sopenharmony_ci if (!rsp->req.transfer_len) 89262306a36Sopenharmony_ci return 0; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci if (rsp->req.metadata_len) 89562306a36Sopenharmony_ci nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs); 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci ret = nvmet_req_alloc_sgls(&rsp->req); 89862306a36Sopenharmony_ci if (unlikely(ret < 0)) 89962306a36Sopenharmony_ci goto error_out; 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs); 90262306a36Sopenharmony_ci if (unlikely(ret < 0)) 90362306a36Sopenharmony_ci goto error_out; 90462306a36Sopenharmony_ci rsp->n_rdma += ret; 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci if (invalidate) { 90762306a36Sopenharmony_ci rsp->invalidate_rkey = key; 90862306a36Sopenharmony_ci rsp->flags |= NVMET_RDMA_REQ_INVALIDATE_RKEY; 90962306a36Sopenharmony_ci } 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci return 0; 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_cierror_out: 91462306a36Sopenharmony_ci rsp->req.transfer_len = 0; 91562306a36Sopenharmony_ci return NVME_SC_INTERNAL; 91662306a36Sopenharmony_ci} 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_cistatic u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp) 91962306a36Sopenharmony_ci{ 92062306a36Sopenharmony_ci struct nvme_keyed_sgl_desc *sgl = &rsp->req.cmd->common.dptr.ksgl; 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_ci switch (sgl->type >> 4) { 92362306a36Sopenharmony_ci case NVME_SGL_FMT_DATA_DESC: 92462306a36Sopenharmony_ci switch (sgl->type & 0xf) { 92562306a36Sopenharmony_ci case NVME_SGL_FMT_OFFSET: 92662306a36Sopenharmony_ci return nvmet_rdma_map_sgl_inline(rsp); 92762306a36Sopenharmony_ci default: 92862306a36Sopenharmony_ci pr_err("invalid SGL subtype: %#x\n", sgl->type); 92962306a36Sopenharmony_ci rsp->req.error_loc = 93062306a36Sopenharmony_ci offsetof(struct nvme_common_command, dptr); 93162306a36Sopenharmony_ci return NVME_SC_INVALID_FIELD | NVME_SC_DNR; 93262306a36Sopenharmony_ci } 93362306a36Sopenharmony_ci case NVME_KEY_SGL_FMT_DATA_DESC: 93462306a36Sopenharmony_ci switch (sgl->type & 0xf) { 93562306a36Sopenharmony_ci case NVME_SGL_FMT_ADDRESS | NVME_SGL_FMT_INVALIDATE: 93662306a36Sopenharmony_ci return nvmet_rdma_map_sgl_keyed(rsp, sgl, true); 93762306a36Sopenharmony_ci case NVME_SGL_FMT_ADDRESS: 93862306a36Sopenharmony_ci return nvmet_rdma_map_sgl_keyed(rsp, sgl, false); 93962306a36Sopenharmony_ci default: 94062306a36Sopenharmony_ci pr_err("invalid SGL subtype: %#x\n", sgl->type); 94162306a36Sopenharmony_ci rsp->req.error_loc = 94262306a36Sopenharmony_ci offsetof(struct nvme_common_command, dptr); 94362306a36Sopenharmony_ci return NVME_SC_INVALID_FIELD | NVME_SC_DNR; 94462306a36Sopenharmony_ci } 94562306a36Sopenharmony_ci default: 94662306a36Sopenharmony_ci pr_err("invalid SGL type: %#x\n", sgl->type); 94762306a36Sopenharmony_ci rsp->req.error_loc = offsetof(struct nvme_common_command, dptr); 94862306a36Sopenharmony_ci return NVME_SC_SGL_INVALID_TYPE | NVME_SC_DNR; 94962306a36Sopenharmony_ci } 95062306a36Sopenharmony_ci} 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_cistatic bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) 95362306a36Sopenharmony_ci{ 95462306a36Sopenharmony_ci struct nvmet_rdma_queue *queue = rsp->queue; 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci if (unlikely(atomic_sub_return(1 + rsp->n_rdma, 95762306a36Sopenharmony_ci &queue->sq_wr_avail) < 0)) { 95862306a36Sopenharmony_ci pr_debug("IB send queue full (needed %d): queue %u cntlid %u\n", 95962306a36Sopenharmony_ci 1 + rsp->n_rdma, queue->idx, 96062306a36Sopenharmony_ci queue->nvme_sq.ctrl->cntlid); 96162306a36Sopenharmony_ci atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); 96262306a36Sopenharmony_ci return false; 96362306a36Sopenharmony_ci } 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci if (nvmet_rdma_need_data_in(rsp)) { 96662306a36Sopenharmony_ci if (rdma_rw_ctx_post(&rsp->rw, queue->qp, 96762306a36Sopenharmony_ci queue->cm_id->port_num, &rsp->read_cqe, NULL)) 96862306a36Sopenharmony_ci nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); 96962306a36Sopenharmony_ci } else { 97062306a36Sopenharmony_ci rsp->req.execute(&rsp->req); 97162306a36Sopenharmony_ci } 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci return true; 97462306a36Sopenharmony_ci} 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_cistatic void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, 97762306a36Sopenharmony_ci struct nvmet_rdma_rsp *cmd) 97862306a36Sopenharmony_ci{ 97962306a36Sopenharmony_ci u16 status; 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci ib_dma_sync_single_for_cpu(queue->dev->device, 98262306a36Sopenharmony_ci cmd->cmd->sge[0].addr, cmd->cmd->sge[0].length, 98362306a36Sopenharmony_ci DMA_FROM_DEVICE); 98462306a36Sopenharmony_ci ib_dma_sync_single_for_cpu(queue->dev->device, 98562306a36Sopenharmony_ci cmd->send_sge.addr, cmd->send_sge.length, 98662306a36Sopenharmony_ci DMA_TO_DEVICE); 98762306a36Sopenharmony_ci 98862306a36Sopenharmony_ci if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, 98962306a36Sopenharmony_ci &queue->nvme_sq, &nvmet_rdma_ops)) 99062306a36Sopenharmony_ci return; 99162306a36Sopenharmony_ci 99262306a36Sopenharmony_ci status = nvmet_rdma_map_sgl(cmd); 99362306a36Sopenharmony_ci if (status) 99462306a36Sopenharmony_ci goto out_err; 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci if (unlikely(!nvmet_rdma_execute_command(cmd))) { 99762306a36Sopenharmony_ci spin_lock(&queue->rsp_wr_wait_lock); 99862306a36Sopenharmony_ci list_add_tail(&cmd->wait_list, &queue->rsp_wr_wait_list); 99962306a36Sopenharmony_ci spin_unlock(&queue->rsp_wr_wait_lock); 100062306a36Sopenharmony_ci } 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci return; 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ciout_err: 100562306a36Sopenharmony_ci nvmet_req_complete(&cmd->req, status); 100662306a36Sopenharmony_ci} 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_cistatic void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) 100962306a36Sopenharmony_ci{ 101062306a36Sopenharmony_ci struct nvmet_rdma_cmd *cmd = 101162306a36Sopenharmony_ci container_of(wc->wr_cqe, struct nvmet_rdma_cmd, cqe); 101262306a36Sopenharmony_ci struct nvmet_rdma_queue *queue = wc->qp->qp_context; 101362306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp; 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) { 101662306a36Sopenharmony_ci if (wc->status != IB_WC_WR_FLUSH_ERR) { 101762306a36Sopenharmony_ci pr_err("RECV for CQE 0x%p failed with status %s (%d)\n", 101862306a36Sopenharmony_ci wc->wr_cqe, ib_wc_status_msg(wc->status), 101962306a36Sopenharmony_ci wc->status); 102062306a36Sopenharmony_ci nvmet_rdma_error_comp(queue); 102162306a36Sopenharmony_ci } 102262306a36Sopenharmony_ci return; 102362306a36Sopenharmony_ci } 102462306a36Sopenharmony_ci 102562306a36Sopenharmony_ci if (unlikely(wc->byte_len < sizeof(struct nvme_command))) { 102662306a36Sopenharmony_ci pr_err("Ctrl Fatal Error: capsule size less than 64 bytes\n"); 102762306a36Sopenharmony_ci nvmet_rdma_error_comp(queue); 102862306a36Sopenharmony_ci return; 102962306a36Sopenharmony_ci } 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci cmd->queue = queue; 103262306a36Sopenharmony_ci rsp = nvmet_rdma_get_rsp(queue); 103362306a36Sopenharmony_ci if (unlikely(!rsp)) { 103462306a36Sopenharmony_ci /* 103562306a36Sopenharmony_ci * we get here only under memory pressure, 103662306a36Sopenharmony_ci * silently drop and have the host retry 103762306a36Sopenharmony_ci * as we can't even fail it. 103862306a36Sopenharmony_ci */ 103962306a36Sopenharmony_ci nvmet_rdma_post_recv(queue->dev, cmd); 104062306a36Sopenharmony_ci return; 104162306a36Sopenharmony_ci } 104262306a36Sopenharmony_ci rsp->queue = queue; 104362306a36Sopenharmony_ci rsp->cmd = cmd; 104462306a36Sopenharmony_ci rsp->flags = 0; 104562306a36Sopenharmony_ci rsp->req.cmd = cmd->nvme_cmd; 104662306a36Sopenharmony_ci rsp->req.port = queue->port; 104762306a36Sopenharmony_ci rsp->n_rdma = 0; 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { 105062306a36Sopenharmony_ci unsigned long flags; 105162306a36Sopenharmony_ci 105262306a36Sopenharmony_ci spin_lock_irqsave(&queue->state_lock, flags); 105362306a36Sopenharmony_ci if (queue->state == NVMET_RDMA_Q_CONNECTING) 105462306a36Sopenharmony_ci list_add_tail(&rsp->wait_list, &queue->rsp_wait_list); 105562306a36Sopenharmony_ci else 105662306a36Sopenharmony_ci nvmet_rdma_put_rsp(rsp); 105762306a36Sopenharmony_ci spin_unlock_irqrestore(&queue->state_lock, flags); 105862306a36Sopenharmony_ci return; 105962306a36Sopenharmony_ci } 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci nvmet_rdma_handle_command(queue, rsp); 106262306a36Sopenharmony_ci} 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_cistatic void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq) 106562306a36Sopenharmony_ci{ 106662306a36Sopenharmony_ci nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size, 106762306a36Sopenharmony_ci false); 106862306a36Sopenharmony_ci ib_destroy_srq(nsrq->srq); 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci kfree(nsrq); 107162306a36Sopenharmony_ci} 107262306a36Sopenharmony_ci 107362306a36Sopenharmony_cistatic void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev) 107462306a36Sopenharmony_ci{ 107562306a36Sopenharmony_ci int i; 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci if (!ndev->srqs) 107862306a36Sopenharmony_ci return; 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci for (i = 0; i < ndev->srq_count; i++) 108162306a36Sopenharmony_ci nvmet_rdma_destroy_srq(ndev->srqs[i]); 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_ci kfree(ndev->srqs); 108462306a36Sopenharmony_ci} 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_cistatic struct nvmet_rdma_srq * 108762306a36Sopenharmony_cinvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) 108862306a36Sopenharmony_ci{ 108962306a36Sopenharmony_ci struct ib_srq_init_attr srq_attr = { NULL, }; 109062306a36Sopenharmony_ci size_t srq_size = ndev->srq_size; 109162306a36Sopenharmony_ci struct nvmet_rdma_srq *nsrq; 109262306a36Sopenharmony_ci struct ib_srq *srq; 109362306a36Sopenharmony_ci int ret, i; 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL); 109662306a36Sopenharmony_ci if (!nsrq) 109762306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci srq_attr.attr.max_wr = srq_size; 110062306a36Sopenharmony_ci srq_attr.attr.max_sge = 1 + ndev->inline_page_count; 110162306a36Sopenharmony_ci srq_attr.attr.srq_limit = 0; 110262306a36Sopenharmony_ci srq_attr.srq_type = IB_SRQT_BASIC; 110362306a36Sopenharmony_ci srq = ib_create_srq(ndev->pd, &srq_attr); 110462306a36Sopenharmony_ci if (IS_ERR(srq)) { 110562306a36Sopenharmony_ci ret = PTR_ERR(srq); 110662306a36Sopenharmony_ci goto out_free; 110762306a36Sopenharmony_ci } 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); 111062306a36Sopenharmony_ci if (IS_ERR(nsrq->cmds)) { 111162306a36Sopenharmony_ci ret = PTR_ERR(nsrq->cmds); 111262306a36Sopenharmony_ci goto out_destroy_srq; 111362306a36Sopenharmony_ci } 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci nsrq->srq = srq; 111662306a36Sopenharmony_ci nsrq->ndev = ndev; 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci for (i = 0; i < srq_size; i++) { 111962306a36Sopenharmony_ci nsrq->cmds[i].nsrq = nsrq; 112062306a36Sopenharmony_ci ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]); 112162306a36Sopenharmony_ci if (ret) 112262306a36Sopenharmony_ci goto out_free_cmds; 112362306a36Sopenharmony_ci } 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci return nsrq; 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_ciout_free_cmds: 112862306a36Sopenharmony_ci nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false); 112962306a36Sopenharmony_ciout_destroy_srq: 113062306a36Sopenharmony_ci ib_destroy_srq(srq); 113162306a36Sopenharmony_ciout_free: 113262306a36Sopenharmony_ci kfree(nsrq); 113362306a36Sopenharmony_ci return ERR_PTR(ret); 113462306a36Sopenharmony_ci} 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_cistatic int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev) 113762306a36Sopenharmony_ci{ 113862306a36Sopenharmony_ci int i, ret; 113962306a36Sopenharmony_ci 114062306a36Sopenharmony_ci if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) { 114162306a36Sopenharmony_ci /* 114262306a36Sopenharmony_ci * If SRQs aren't supported we just go ahead and use normal 114362306a36Sopenharmony_ci * non-shared receive queues. 114462306a36Sopenharmony_ci */ 114562306a36Sopenharmony_ci pr_info("SRQ requested but not supported.\n"); 114662306a36Sopenharmony_ci return 0; 114762306a36Sopenharmony_ci } 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci ndev->srq_size = min(ndev->device->attrs.max_srq_wr, 115062306a36Sopenharmony_ci nvmet_rdma_srq_size); 115162306a36Sopenharmony_ci ndev->srq_count = min(ndev->device->num_comp_vectors, 115262306a36Sopenharmony_ci ndev->device->attrs.max_srq); 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL); 115562306a36Sopenharmony_ci if (!ndev->srqs) 115662306a36Sopenharmony_ci return -ENOMEM; 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci for (i = 0; i < ndev->srq_count; i++) { 115962306a36Sopenharmony_ci ndev->srqs[i] = nvmet_rdma_init_srq(ndev); 116062306a36Sopenharmony_ci if (IS_ERR(ndev->srqs[i])) { 116162306a36Sopenharmony_ci ret = PTR_ERR(ndev->srqs[i]); 116262306a36Sopenharmony_ci goto err_srq; 116362306a36Sopenharmony_ci } 116462306a36Sopenharmony_ci } 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci return 0; 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_cierr_srq: 116962306a36Sopenharmony_ci while (--i >= 0) 117062306a36Sopenharmony_ci nvmet_rdma_destroy_srq(ndev->srqs[i]); 117162306a36Sopenharmony_ci kfree(ndev->srqs); 117262306a36Sopenharmony_ci return ret; 117362306a36Sopenharmony_ci} 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_cistatic void nvmet_rdma_free_dev(struct kref *ref) 117662306a36Sopenharmony_ci{ 117762306a36Sopenharmony_ci struct nvmet_rdma_device *ndev = 117862306a36Sopenharmony_ci container_of(ref, struct nvmet_rdma_device, ref); 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci mutex_lock(&device_list_mutex); 118162306a36Sopenharmony_ci list_del(&ndev->entry); 118262306a36Sopenharmony_ci mutex_unlock(&device_list_mutex); 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci nvmet_rdma_destroy_srqs(ndev); 118562306a36Sopenharmony_ci ib_dealloc_pd(ndev->pd); 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_ci kfree(ndev); 118862306a36Sopenharmony_ci} 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_cistatic struct nvmet_rdma_device * 119162306a36Sopenharmony_cinvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) 119262306a36Sopenharmony_ci{ 119362306a36Sopenharmony_ci struct nvmet_rdma_port *port = cm_id->context; 119462306a36Sopenharmony_ci struct nvmet_port *nport = port->nport; 119562306a36Sopenharmony_ci struct nvmet_rdma_device *ndev; 119662306a36Sopenharmony_ci int inline_page_count; 119762306a36Sopenharmony_ci int inline_sge_count; 119862306a36Sopenharmony_ci int ret; 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci mutex_lock(&device_list_mutex); 120162306a36Sopenharmony_ci list_for_each_entry(ndev, &device_list, entry) { 120262306a36Sopenharmony_ci if (ndev->device->node_guid == cm_id->device->node_guid && 120362306a36Sopenharmony_ci kref_get_unless_zero(&ndev->ref)) 120462306a36Sopenharmony_ci goto out_unlock; 120562306a36Sopenharmony_ci } 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci ndev = kzalloc(sizeof(*ndev), GFP_KERNEL); 120862306a36Sopenharmony_ci if (!ndev) 120962306a36Sopenharmony_ci goto out_err; 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_ci inline_page_count = num_pages(nport->inline_data_size); 121262306a36Sopenharmony_ci inline_sge_count = max(cm_id->device->attrs.max_sge_rd, 121362306a36Sopenharmony_ci cm_id->device->attrs.max_recv_sge) - 1; 121462306a36Sopenharmony_ci if (inline_page_count > inline_sge_count) { 121562306a36Sopenharmony_ci pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", 121662306a36Sopenharmony_ci nport->inline_data_size, cm_id->device->name, 121762306a36Sopenharmony_ci inline_sge_count * PAGE_SIZE); 121862306a36Sopenharmony_ci nport->inline_data_size = inline_sge_count * PAGE_SIZE; 121962306a36Sopenharmony_ci inline_page_count = inline_sge_count; 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci ndev->inline_data_size = nport->inline_data_size; 122262306a36Sopenharmony_ci ndev->inline_page_count = inline_page_count; 122362306a36Sopenharmony_ci 122462306a36Sopenharmony_ci if (nport->pi_enable && !(cm_id->device->attrs.kernel_cap_flags & 122562306a36Sopenharmony_ci IBK_INTEGRITY_HANDOVER)) { 122662306a36Sopenharmony_ci pr_warn("T10-PI is not supported by device %s. Disabling it\n", 122762306a36Sopenharmony_ci cm_id->device->name); 122862306a36Sopenharmony_ci nport->pi_enable = false; 122962306a36Sopenharmony_ci } 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_ci ndev->device = cm_id->device; 123262306a36Sopenharmony_ci kref_init(&ndev->ref); 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci ndev->pd = ib_alloc_pd(ndev->device, 0); 123562306a36Sopenharmony_ci if (IS_ERR(ndev->pd)) 123662306a36Sopenharmony_ci goto out_free_dev; 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci if (nvmet_rdma_use_srq) { 123962306a36Sopenharmony_ci ret = nvmet_rdma_init_srqs(ndev); 124062306a36Sopenharmony_ci if (ret) 124162306a36Sopenharmony_ci goto out_free_pd; 124262306a36Sopenharmony_ci } 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci list_add(&ndev->entry, &device_list); 124562306a36Sopenharmony_ciout_unlock: 124662306a36Sopenharmony_ci mutex_unlock(&device_list_mutex); 124762306a36Sopenharmony_ci pr_debug("added %s.\n", ndev->device->name); 124862306a36Sopenharmony_ci return ndev; 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ciout_free_pd: 125162306a36Sopenharmony_ci ib_dealloc_pd(ndev->pd); 125262306a36Sopenharmony_ciout_free_dev: 125362306a36Sopenharmony_ci kfree(ndev); 125462306a36Sopenharmony_ciout_err: 125562306a36Sopenharmony_ci mutex_unlock(&device_list_mutex); 125662306a36Sopenharmony_ci return NULL; 125762306a36Sopenharmony_ci} 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_cistatic int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) 126062306a36Sopenharmony_ci{ 126162306a36Sopenharmony_ci struct ib_qp_init_attr qp_attr = { }; 126262306a36Sopenharmony_ci struct nvmet_rdma_device *ndev = queue->dev; 126362306a36Sopenharmony_ci int nr_cqe, ret, i, factor; 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci /* 126662306a36Sopenharmony_ci * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND. 126762306a36Sopenharmony_ci */ 126862306a36Sopenharmony_ci nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size; 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci queue->cq = ib_cq_pool_get(ndev->device, nr_cqe + 1, 127162306a36Sopenharmony_ci queue->comp_vector, IB_POLL_WORKQUEUE); 127262306a36Sopenharmony_ci if (IS_ERR(queue->cq)) { 127362306a36Sopenharmony_ci ret = PTR_ERR(queue->cq); 127462306a36Sopenharmony_ci pr_err("failed to create CQ cqe= %d ret= %d\n", 127562306a36Sopenharmony_ci nr_cqe + 1, ret); 127662306a36Sopenharmony_ci goto out; 127762306a36Sopenharmony_ci } 127862306a36Sopenharmony_ci 127962306a36Sopenharmony_ci qp_attr.qp_context = queue; 128062306a36Sopenharmony_ci qp_attr.event_handler = nvmet_rdma_qp_event; 128162306a36Sopenharmony_ci qp_attr.send_cq = queue->cq; 128262306a36Sopenharmony_ci qp_attr.recv_cq = queue->cq; 128362306a36Sopenharmony_ci qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 128462306a36Sopenharmony_ci qp_attr.qp_type = IB_QPT_RC; 128562306a36Sopenharmony_ci /* +1 for drain */ 128662306a36Sopenharmony_ci qp_attr.cap.max_send_wr = queue->send_queue_size + 1; 128762306a36Sopenharmony_ci factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num, 128862306a36Sopenharmony_ci 1 << NVMET_RDMA_MAX_MDTS); 128962306a36Sopenharmony_ci qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor; 129062306a36Sopenharmony_ci qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, 129162306a36Sopenharmony_ci ndev->device->attrs.max_send_sge); 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_ci if (queue->nsrq) { 129462306a36Sopenharmony_ci qp_attr.srq = queue->nsrq->srq; 129562306a36Sopenharmony_ci } else { 129662306a36Sopenharmony_ci /* +1 for drain */ 129762306a36Sopenharmony_ci qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size; 129862306a36Sopenharmony_ci qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count; 129962306a36Sopenharmony_ci } 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_ci if (queue->port->pi_enable && queue->host_qid) 130262306a36Sopenharmony_ci qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr); 130562306a36Sopenharmony_ci if (ret) { 130662306a36Sopenharmony_ci pr_err("failed to create_qp ret= %d\n", ret); 130762306a36Sopenharmony_ci goto err_destroy_cq; 130862306a36Sopenharmony_ci } 130962306a36Sopenharmony_ci queue->qp = queue->cm_id->qp; 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr); 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n", 131462306a36Sopenharmony_ci __func__, queue->cq->cqe, qp_attr.cap.max_send_sge, 131562306a36Sopenharmony_ci qp_attr.cap.max_send_wr, queue->cm_id); 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci if (!queue->nsrq) { 131862306a36Sopenharmony_ci for (i = 0; i < queue->recv_queue_size; i++) { 131962306a36Sopenharmony_ci queue->cmds[i].queue = queue; 132062306a36Sopenharmony_ci ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]); 132162306a36Sopenharmony_ci if (ret) 132262306a36Sopenharmony_ci goto err_destroy_qp; 132362306a36Sopenharmony_ci } 132462306a36Sopenharmony_ci } 132562306a36Sopenharmony_ci 132662306a36Sopenharmony_ciout: 132762306a36Sopenharmony_ci return ret; 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_cierr_destroy_qp: 133062306a36Sopenharmony_ci rdma_destroy_qp(queue->cm_id); 133162306a36Sopenharmony_cierr_destroy_cq: 133262306a36Sopenharmony_ci ib_cq_pool_put(queue->cq, nr_cqe + 1); 133362306a36Sopenharmony_ci goto out; 133462306a36Sopenharmony_ci} 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_cistatic void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) 133762306a36Sopenharmony_ci{ 133862306a36Sopenharmony_ci ib_drain_qp(queue->qp); 133962306a36Sopenharmony_ci if (queue->cm_id) 134062306a36Sopenharmony_ci rdma_destroy_id(queue->cm_id); 134162306a36Sopenharmony_ci ib_destroy_qp(queue->qp); 134262306a36Sopenharmony_ci ib_cq_pool_put(queue->cq, queue->recv_queue_size + 2 * 134362306a36Sopenharmony_ci queue->send_queue_size + 1); 134462306a36Sopenharmony_ci} 134562306a36Sopenharmony_ci 134662306a36Sopenharmony_cistatic void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) 134762306a36Sopenharmony_ci{ 134862306a36Sopenharmony_ci pr_debug("freeing queue %d\n", queue->idx); 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci nvmet_sq_destroy(&queue->nvme_sq); 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci nvmet_rdma_destroy_queue_ib(queue); 135362306a36Sopenharmony_ci if (!queue->nsrq) { 135462306a36Sopenharmony_ci nvmet_rdma_free_cmds(queue->dev, queue->cmds, 135562306a36Sopenharmony_ci queue->recv_queue_size, 135662306a36Sopenharmony_ci !queue->host_qid); 135762306a36Sopenharmony_ci } 135862306a36Sopenharmony_ci nvmet_rdma_free_rsps(queue); 135962306a36Sopenharmony_ci ida_free(&nvmet_rdma_queue_ida, queue->idx); 136062306a36Sopenharmony_ci kfree(queue); 136162306a36Sopenharmony_ci} 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_cistatic void nvmet_rdma_release_queue_work(struct work_struct *w) 136462306a36Sopenharmony_ci{ 136562306a36Sopenharmony_ci struct nvmet_rdma_queue *queue = 136662306a36Sopenharmony_ci container_of(w, struct nvmet_rdma_queue, release_work); 136762306a36Sopenharmony_ci struct nvmet_rdma_device *dev = queue->dev; 136862306a36Sopenharmony_ci 136962306a36Sopenharmony_ci nvmet_rdma_free_queue(queue); 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci kref_put(&dev->ref, nvmet_rdma_free_dev); 137262306a36Sopenharmony_ci} 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_cistatic int 137562306a36Sopenharmony_cinvmet_rdma_parse_cm_connect_req(struct rdma_conn_param *conn, 137662306a36Sopenharmony_ci struct nvmet_rdma_queue *queue) 137762306a36Sopenharmony_ci{ 137862306a36Sopenharmony_ci struct nvme_rdma_cm_req *req; 137962306a36Sopenharmony_ci 138062306a36Sopenharmony_ci req = (struct nvme_rdma_cm_req *)conn->private_data; 138162306a36Sopenharmony_ci if (!req || conn->private_data_len == 0) 138262306a36Sopenharmony_ci return NVME_RDMA_CM_INVALID_LEN; 138362306a36Sopenharmony_ci 138462306a36Sopenharmony_ci if (le16_to_cpu(req->recfmt) != NVME_RDMA_CM_FMT_1_0) 138562306a36Sopenharmony_ci return NVME_RDMA_CM_INVALID_RECFMT; 138662306a36Sopenharmony_ci 138762306a36Sopenharmony_ci queue->host_qid = le16_to_cpu(req->qid); 138862306a36Sopenharmony_ci 138962306a36Sopenharmony_ci /* 139062306a36Sopenharmony_ci * req->hsqsize corresponds to our recv queue size plus 1 139162306a36Sopenharmony_ci * req->hrqsize corresponds to our send queue size 139262306a36Sopenharmony_ci */ 139362306a36Sopenharmony_ci queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1; 139462306a36Sopenharmony_ci queue->send_queue_size = le16_to_cpu(req->hrqsize); 139562306a36Sopenharmony_ci 139662306a36Sopenharmony_ci if (!queue->host_qid && queue->recv_queue_size > NVME_AQ_DEPTH) 139762306a36Sopenharmony_ci return NVME_RDMA_CM_INVALID_HSQSIZE; 139862306a36Sopenharmony_ci 139962306a36Sopenharmony_ci /* XXX: Should we enforce some kind of max for IO queues? */ 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci return 0; 140262306a36Sopenharmony_ci} 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_cistatic int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id, 140562306a36Sopenharmony_ci enum nvme_rdma_cm_status status) 140662306a36Sopenharmony_ci{ 140762306a36Sopenharmony_ci struct nvme_rdma_cm_rej rej; 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci pr_debug("rejecting connect request: status %d (%s)\n", 141062306a36Sopenharmony_ci status, nvme_rdma_cm_msg(status)); 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); 141362306a36Sopenharmony_ci rej.sts = cpu_to_le16(status); 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ci return rdma_reject(cm_id, (void *)&rej, sizeof(rej), 141662306a36Sopenharmony_ci IB_CM_REJ_CONSUMER_DEFINED); 141762306a36Sopenharmony_ci} 141862306a36Sopenharmony_ci 141962306a36Sopenharmony_cistatic struct nvmet_rdma_queue * 142062306a36Sopenharmony_cinvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, 142162306a36Sopenharmony_ci struct rdma_cm_id *cm_id, 142262306a36Sopenharmony_ci struct rdma_cm_event *event) 142362306a36Sopenharmony_ci{ 142462306a36Sopenharmony_ci struct nvmet_rdma_port *port = cm_id->context; 142562306a36Sopenharmony_ci struct nvmet_rdma_queue *queue; 142662306a36Sopenharmony_ci int ret; 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci queue = kzalloc(sizeof(*queue), GFP_KERNEL); 142962306a36Sopenharmony_ci if (!queue) { 143062306a36Sopenharmony_ci ret = NVME_RDMA_CM_NO_RSC; 143162306a36Sopenharmony_ci goto out_reject; 143262306a36Sopenharmony_ci } 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci ret = nvmet_sq_init(&queue->nvme_sq); 143562306a36Sopenharmony_ci if (ret) { 143662306a36Sopenharmony_ci ret = NVME_RDMA_CM_NO_RSC; 143762306a36Sopenharmony_ci goto out_free_queue; 143862306a36Sopenharmony_ci } 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci ret = nvmet_rdma_parse_cm_connect_req(&event->param.conn, queue); 144162306a36Sopenharmony_ci if (ret) 144262306a36Sopenharmony_ci goto out_destroy_sq; 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci /* 144562306a36Sopenharmony_ci * Schedules the actual release because calling rdma_destroy_id from 144662306a36Sopenharmony_ci * inside a CM callback would trigger a deadlock. (great API design..) 144762306a36Sopenharmony_ci */ 144862306a36Sopenharmony_ci INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work); 144962306a36Sopenharmony_ci queue->dev = ndev; 145062306a36Sopenharmony_ci queue->cm_id = cm_id; 145162306a36Sopenharmony_ci queue->port = port->nport; 145262306a36Sopenharmony_ci 145362306a36Sopenharmony_ci spin_lock_init(&queue->state_lock); 145462306a36Sopenharmony_ci queue->state = NVMET_RDMA_Q_CONNECTING; 145562306a36Sopenharmony_ci INIT_LIST_HEAD(&queue->rsp_wait_list); 145662306a36Sopenharmony_ci INIT_LIST_HEAD(&queue->rsp_wr_wait_list); 145762306a36Sopenharmony_ci spin_lock_init(&queue->rsp_wr_wait_lock); 145862306a36Sopenharmony_ci INIT_LIST_HEAD(&queue->free_rsps); 145962306a36Sopenharmony_ci spin_lock_init(&queue->rsps_lock); 146062306a36Sopenharmony_ci INIT_LIST_HEAD(&queue->queue_list); 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL); 146362306a36Sopenharmony_ci if (queue->idx < 0) { 146462306a36Sopenharmony_ci ret = NVME_RDMA_CM_NO_RSC; 146562306a36Sopenharmony_ci goto out_destroy_sq; 146662306a36Sopenharmony_ci } 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_ci /* 146962306a36Sopenharmony_ci * Spread the io queues across completion vectors, 147062306a36Sopenharmony_ci * but still keep all admin queues on vector 0. 147162306a36Sopenharmony_ci */ 147262306a36Sopenharmony_ci queue->comp_vector = !queue->host_qid ? 0 : 147362306a36Sopenharmony_ci queue->idx % ndev->device->num_comp_vectors; 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_ci ret = nvmet_rdma_alloc_rsps(queue); 147762306a36Sopenharmony_ci if (ret) { 147862306a36Sopenharmony_ci ret = NVME_RDMA_CM_NO_RSC; 147962306a36Sopenharmony_ci goto out_ida_remove; 148062306a36Sopenharmony_ci } 148162306a36Sopenharmony_ci 148262306a36Sopenharmony_ci if (ndev->srqs) { 148362306a36Sopenharmony_ci queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count]; 148462306a36Sopenharmony_ci } else { 148562306a36Sopenharmony_ci queue->cmds = nvmet_rdma_alloc_cmds(ndev, 148662306a36Sopenharmony_ci queue->recv_queue_size, 148762306a36Sopenharmony_ci !queue->host_qid); 148862306a36Sopenharmony_ci if (IS_ERR(queue->cmds)) { 148962306a36Sopenharmony_ci ret = NVME_RDMA_CM_NO_RSC; 149062306a36Sopenharmony_ci goto out_free_responses; 149162306a36Sopenharmony_ci } 149262306a36Sopenharmony_ci } 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci ret = nvmet_rdma_create_queue_ib(queue); 149562306a36Sopenharmony_ci if (ret) { 149662306a36Sopenharmony_ci pr_err("%s: creating RDMA queue failed (%d).\n", 149762306a36Sopenharmony_ci __func__, ret); 149862306a36Sopenharmony_ci ret = NVME_RDMA_CM_NO_RSC; 149962306a36Sopenharmony_ci goto out_free_cmds; 150062306a36Sopenharmony_ci } 150162306a36Sopenharmony_ci 150262306a36Sopenharmony_ci return queue; 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_ciout_free_cmds: 150562306a36Sopenharmony_ci if (!queue->nsrq) { 150662306a36Sopenharmony_ci nvmet_rdma_free_cmds(queue->dev, queue->cmds, 150762306a36Sopenharmony_ci queue->recv_queue_size, 150862306a36Sopenharmony_ci !queue->host_qid); 150962306a36Sopenharmony_ci } 151062306a36Sopenharmony_ciout_free_responses: 151162306a36Sopenharmony_ci nvmet_rdma_free_rsps(queue); 151262306a36Sopenharmony_ciout_ida_remove: 151362306a36Sopenharmony_ci ida_free(&nvmet_rdma_queue_ida, queue->idx); 151462306a36Sopenharmony_ciout_destroy_sq: 151562306a36Sopenharmony_ci nvmet_sq_destroy(&queue->nvme_sq); 151662306a36Sopenharmony_ciout_free_queue: 151762306a36Sopenharmony_ci kfree(queue); 151862306a36Sopenharmony_ciout_reject: 151962306a36Sopenharmony_ci nvmet_rdma_cm_reject(cm_id, ret); 152062306a36Sopenharmony_ci return NULL; 152162306a36Sopenharmony_ci} 152262306a36Sopenharmony_ci 152362306a36Sopenharmony_cistatic void nvmet_rdma_qp_event(struct ib_event *event, void *priv) 152462306a36Sopenharmony_ci{ 152562306a36Sopenharmony_ci struct nvmet_rdma_queue *queue = priv; 152662306a36Sopenharmony_ci 152762306a36Sopenharmony_ci switch (event->event) { 152862306a36Sopenharmony_ci case IB_EVENT_COMM_EST: 152962306a36Sopenharmony_ci rdma_notify(queue->cm_id, event->event); 153062306a36Sopenharmony_ci break; 153162306a36Sopenharmony_ci case IB_EVENT_QP_LAST_WQE_REACHED: 153262306a36Sopenharmony_ci pr_debug("received last WQE reached event for queue=0x%p\n", 153362306a36Sopenharmony_ci queue); 153462306a36Sopenharmony_ci break; 153562306a36Sopenharmony_ci default: 153662306a36Sopenharmony_ci pr_err("received IB QP event: %s (%d)\n", 153762306a36Sopenharmony_ci ib_event_msg(event->event), event->event); 153862306a36Sopenharmony_ci break; 153962306a36Sopenharmony_ci } 154062306a36Sopenharmony_ci} 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_cistatic int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, 154362306a36Sopenharmony_ci struct nvmet_rdma_queue *queue, 154462306a36Sopenharmony_ci struct rdma_conn_param *p) 154562306a36Sopenharmony_ci{ 154662306a36Sopenharmony_ci struct rdma_conn_param param = { }; 154762306a36Sopenharmony_ci struct nvme_rdma_cm_rep priv = { }; 154862306a36Sopenharmony_ci int ret = -ENOMEM; 154962306a36Sopenharmony_ci 155062306a36Sopenharmony_ci param.rnr_retry_count = 7; 155162306a36Sopenharmony_ci param.flow_control = 1; 155262306a36Sopenharmony_ci param.initiator_depth = min_t(u8, p->initiator_depth, 155362306a36Sopenharmony_ci queue->dev->device->attrs.max_qp_init_rd_atom); 155462306a36Sopenharmony_ci param.private_data = &priv; 155562306a36Sopenharmony_ci param.private_data_len = sizeof(priv); 155662306a36Sopenharmony_ci priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0); 155762306a36Sopenharmony_ci priv.crqsize = cpu_to_le16(queue->recv_queue_size); 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_ci ret = rdma_accept(cm_id, ¶m); 156062306a36Sopenharmony_ci if (ret) 156162306a36Sopenharmony_ci pr_err("rdma_accept failed (error code = %d)\n", ret); 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_ci return ret; 156462306a36Sopenharmony_ci} 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_cistatic int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, 156762306a36Sopenharmony_ci struct rdma_cm_event *event) 156862306a36Sopenharmony_ci{ 156962306a36Sopenharmony_ci struct nvmet_rdma_device *ndev; 157062306a36Sopenharmony_ci struct nvmet_rdma_queue *queue; 157162306a36Sopenharmony_ci int ret = -EINVAL; 157262306a36Sopenharmony_ci 157362306a36Sopenharmony_ci ndev = nvmet_rdma_find_get_device(cm_id); 157462306a36Sopenharmony_ci if (!ndev) { 157562306a36Sopenharmony_ci nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC); 157662306a36Sopenharmony_ci return -ECONNREFUSED; 157762306a36Sopenharmony_ci } 157862306a36Sopenharmony_ci 157962306a36Sopenharmony_ci queue = nvmet_rdma_alloc_queue(ndev, cm_id, event); 158062306a36Sopenharmony_ci if (!queue) { 158162306a36Sopenharmony_ci ret = -ENOMEM; 158262306a36Sopenharmony_ci goto put_device; 158362306a36Sopenharmony_ci } 158462306a36Sopenharmony_ci 158562306a36Sopenharmony_ci if (queue->host_qid == 0) { 158662306a36Sopenharmony_ci /* Let inflight controller teardown complete */ 158762306a36Sopenharmony_ci flush_workqueue(nvmet_wq); 158862306a36Sopenharmony_ci } 158962306a36Sopenharmony_ci 159062306a36Sopenharmony_ci ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); 159162306a36Sopenharmony_ci if (ret) { 159262306a36Sopenharmony_ci /* 159362306a36Sopenharmony_ci * Don't destroy the cm_id in free path, as we implicitly 159462306a36Sopenharmony_ci * destroy the cm_id here with non-zero ret code. 159562306a36Sopenharmony_ci */ 159662306a36Sopenharmony_ci queue->cm_id = NULL; 159762306a36Sopenharmony_ci goto free_queue; 159862306a36Sopenharmony_ci } 159962306a36Sopenharmony_ci 160062306a36Sopenharmony_ci mutex_lock(&nvmet_rdma_queue_mutex); 160162306a36Sopenharmony_ci list_add_tail(&queue->queue_list, &nvmet_rdma_queue_list); 160262306a36Sopenharmony_ci mutex_unlock(&nvmet_rdma_queue_mutex); 160362306a36Sopenharmony_ci 160462306a36Sopenharmony_ci return 0; 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_cifree_queue: 160762306a36Sopenharmony_ci nvmet_rdma_free_queue(queue); 160862306a36Sopenharmony_ciput_device: 160962306a36Sopenharmony_ci kref_put(&ndev->ref, nvmet_rdma_free_dev); 161062306a36Sopenharmony_ci 161162306a36Sopenharmony_ci return ret; 161262306a36Sopenharmony_ci} 161362306a36Sopenharmony_ci 161462306a36Sopenharmony_cistatic void nvmet_rdma_queue_established(struct nvmet_rdma_queue *queue) 161562306a36Sopenharmony_ci{ 161662306a36Sopenharmony_ci unsigned long flags; 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_ci spin_lock_irqsave(&queue->state_lock, flags); 161962306a36Sopenharmony_ci if (queue->state != NVMET_RDMA_Q_CONNECTING) { 162062306a36Sopenharmony_ci pr_warn("trying to establish a connected queue\n"); 162162306a36Sopenharmony_ci goto out_unlock; 162262306a36Sopenharmony_ci } 162362306a36Sopenharmony_ci queue->state = NVMET_RDMA_Q_LIVE; 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci while (!list_empty(&queue->rsp_wait_list)) { 162662306a36Sopenharmony_ci struct nvmet_rdma_rsp *cmd; 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci cmd = list_first_entry(&queue->rsp_wait_list, 162962306a36Sopenharmony_ci struct nvmet_rdma_rsp, wait_list); 163062306a36Sopenharmony_ci list_del(&cmd->wait_list); 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_ci spin_unlock_irqrestore(&queue->state_lock, flags); 163362306a36Sopenharmony_ci nvmet_rdma_handle_command(queue, cmd); 163462306a36Sopenharmony_ci spin_lock_irqsave(&queue->state_lock, flags); 163562306a36Sopenharmony_ci } 163662306a36Sopenharmony_ci 163762306a36Sopenharmony_ciout_unlock: 163862306a36Sopenharmony_ci spin_unlock_irqrestore(&queue->state_lock, flags); 163962306a36Sopenharmony_ci} 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_cistatic void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) 164262306a36Sopenharmony_ci{ 164362306a36Sopenharmony_ci bool disconnect = false; 164462306a36Sopenharmony_ci unsigned long flags; 164562306a36Sopenharmony_ci 164662306a36Sopenharmony_ci pr_debug("cm_id= %p queue->state= %d\n", queue->cm_id, queue->state); 164762306a36Sopenharmony_ci 164862306a36Sopenharmony_ci spin_lock_irqsave(&queue->state_lock, flags); 164962306a36Sopenharmony_ci switch (queue->state) { 165062306a36Sopenharmony_ci case NVMET_RDMA_Q_CONNECTING: 165162306a36Sopenharmony_ci while (!list_empty(&queue->rsp_wait_list)) { 165262306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp; 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci rsp = list_first_entry(&queue->rsp_wait_list, 165562306a36Sopenharmony_ci struct nvmet_rdma_rsp, 165662306a36Sopenharmony_ci wait_list); 165762306a36Sopenharmony_ci list_del(&rsp->wait_list); 165862306a36Sopenharmony_ci nvmet_rdma_put_rsp(rsp); 165962306a36Sopenharmony_ci } 166062306a36Sopenharmony_ci fallthrough; 166162306a36Sopenharmony_ci case NVMET_RDMA_Q_LIVE: 166262306a36Sopenharmony_ci queue->state = NVMET_RDMA_Q_DISCONNECTING; 166362306a36Sopenharmony_ci disconnect = true; 166462306a36Sopenharmony_ci break; 166562306a36Sopenharmony_ci case NVMET_RDMA_Q_DISCONNECTING: 166662306a36Sopenharmony_ci break; 166762306a36Sopenharmony_ci } 166862306a36Sopenharmony_ci spin_unlock_irqrestore(&queue->state_lock, flags); 166962306a36Sopenharmony_ci 167062306a36Sopenharmony_ci if (disconnect) { 167162306a36Sopenharmony_ci rdma_disconnect(queue->cm_id); 167262306a36Sopenharmony_ci queue_work(nvmet_wq, &queue->release_work); 167362306a36Sopenharmony_ci } 167462306a36Sopenharmony_ci} 167562306a36Sopenharmony_ci 167662306a36Sopenharmony_cistatic void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) 167762306a36Sopenharmony_ci{ 167862306a36Sopenharmony_ci bool disconnect = false; 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci mutex_lock(&nvmet_rdma_queue_mutex); 168162306a36Sopenharmony_ci if (!list_empty(&queue->queue_list)) { 168262306a36Sopenharmony_ci list_del_init(&queue->queue_list); 168362306a36Sopenharmony_ci disconnect = true; 168462306a36Sopenharmony_ci } 168562306a36Sopenharmony_ci mutex_unlock(&nvmet_rdma_queue_mutex); 168662306a36Sopenharmony_ci 168762306a36Sopenharmony_ci if (disconnect) 168862306a36Sopenharmony_ci __nvmet_rdma_queue_disconnect(queue); 168962306a36Sopenharmony_ci} 169062306a36Sopenharmony_ci 169162306a36Sopenharmony_cistatic void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, 169262306a36Sopenharmony_ci struct nvmet_rdma_queue *queue) 169362306a36Sopenharmony_ci{ 169462306a36Sopenharmony_ci WARN_ON_ONCE(queue->state != NVMET_RDMA_Q_CONNECTING); 169562306a36Sopenharmony_ci 169662306a36Sopenharmony_ci mutex_lock(&nvmet_rdma_queue_mutex); 169762306a36Sopenharmony_ci if (!list_empty(&queue->queue_list)) 169862306a36Sopenharmony_ci list_del_init(&queue->queue_list); 169962306a36Sopenharmony_ci mutex_unlock(&nvmet_rdma_queue_mutex); 170062306a36Sopenharmony_ci 170162306a36Sopenharmony_ci pr_err("failed to connect queue %d\n", queue->idx); 170262306a36Sopenharmony_ci queue_work(nvmet_wq, &queue->release_work); 170362306a36Sopenharmony_ci} 170462306a36Sopenharmony_ci 170562306a36Sopenharmony_ci/** 170662306a36Sopenharmony_ci * nvmet_rdma_device_removal() - Handle RDMA device removal 170762306a36Sopenharmony_ci * @cm_id: rdma_cm id, used for nvmet port 170862306a36Sopenharmony_ci * @queue: nvmet rdma queue (cm id qp_context) 170962306a36Sopenharmony_ci * 171062306a36Sopenharmony_ci * DEVICE_REMOVAL event notifies us that the RDMA device is about 171162306a36Sopenharmony_ci * to unplug. Note that this event can be generated on a normal 171262306a36Sopenharmony_ci * queue cm_id and/or a device bound listener cm_id (where in this 171362306a36Sopenharmony_ci * case queue will be null). 171462306a36Sopenharmony_ci * 171562306a36Sopenharmony_ci * We registered an ib_client to handle device removal for queues, 171662306a36Sopenharmony_ci * so we only need to handle the listening port cm_ids. In this case 171762306a36Sopenharmony_ci * we nullify the priv to prevent double cm_id destruction and destroying 171862306a36Sopenharmony_ci * the cm_id implicitely by returning a non-zero rc to the callout. 171962306a36Sopenharmony_ci */ 172062306a36Sopenharmony_cistatic int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, 172162306a36Sopenharmony_ci struct nvmet_rdma_queue *queue) 172262306a36Sopenharmony_ci{ 172362306a36Sopenharmony_ci struct nvmet_rdma_port *port; 172462306a36Sopenharmony_ci 172562306a36Sopenharmony_ci if (queue) { 172662306a36Sopenharmony_ci /* 172762306a36Sopenharmony_ci * This is a queue cm_id. we have registered 172862306a36Sopenharmony_ci * an ib_client to handle queues removal 172962306a36Sopenharmony_ci * so don't interfear and just return. 173062306a36Sopenharmony_ci */ 173162306a36Sopenharmony_ci return 0; 173262306a36Sopenharmony_ci } 173362306a36Sopenharmony_ci 173462306a36Sopenharmony_ci port = cm_id->context; 173562306a36Sopenharmony_ci 173662306a36Sopenharmony_ci /* 173762306a36Sopenharmony_ci * This is a listener cm_id. Make sure that 173862306a36Sopenharmony_ci * future remove_port won't invoke a double 173962306a36Sopenharmony_ci * cm_id destroy. use atomic xchg to make sure 174062306a36Sopenharmony_ci * we don't compete with remove_port. 174162306a36Sopenharmony_ci */ 174262306a36Sopenharmony_ci if (xchg(&port->cm_id, NULL) != cm_id) 174362306a36Sopenharmony_ci return 0; 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_ci /* 174662306a36Sopenharmony_ci * We need to return 1 so that the core will destroy 174762306a36Sopenharmony_ci * it's own ID. What a great API design.. 174862306a36Sopenharmony_ci */ 174962306a36Sopenharmony_ci return 1; 175062306a36Sopenharmony_ci} 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_cistatic int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, 175362306a36Sopenharmony_ci struct rdma_cm_event *event) 175462306a36Sopenharmony_ci{ 175562306a36Sopenharmony_ci struct nvmet_rdma_queue *queue = NULL; 175662306a36Sopenharmony_ci int ret = 0; 175762306a36Sopenharmony_ci 175862306a36Sopenharmony_ci if (cm_id->qp) 175962306a36Sopenharmony_ci queue = cm_id->qp->qp_context; 176062306a36Sopenharmony_ci 176162306a36Sopenharmony_ci pr_debug("%s (%d): status %d id %p\n", 176262306a36Sopenharmony_ci rdma_event_msg(event->event), event->event, 176362306a36Sopenharmony_ci event->status, cm_id); 176462306a36Sopenharmony_ci 176562306a36Sopenharmony_ci switch (event->event) { 176662306a36Sopenharmony_ci case RDMA_CM_EVENT_CONNECT_REQUEST: 176762306a36Sopenharmony_ci ret = nvmet_rdma_queue_connect(cm_id, event); 176862306a36Sopenharmony_ci break; 176962306a36Sopenharmony_ci case RDMA_CM_EVENT_ESTABLISHED: 177062306a36Sopenharmony_ci nvmet_rdma_queue_established(queue); 177162306a36Sopenharmony_ci break; 177262306a36Sopenharmony_ci case RDMA_CM_EVENT_ADDR_CHANGE: 177362306a36Sopenharmony_ci if (!queue) { 177462306a36Sopenharmony_ci struct nvmet_rdma_port *port = cm_id->context; 177562306a36Sopenharmony_ci 177662306a36Sopenharmony_ci queue_delayed_work(nvmet_wq, &port->repair_work, 0); 177762306a36Sopenharmony_ci break; 177862306a36Sopenharmony_ci } 177962306a36Sopenharmony_ci fallthrough; 178062306a36Sopenharmony_ci case RDMA_CM_EVENT_DISCONNECTED: 178162306a36Sopenharmony_ci case RDMA_CM_EVENT_TIMEWAIT_EXIT: 178262306a36Sopenharmony_ci nvmet_rdma_queue_disconnect(queue); 178362306a36Sopenharmony_ci break; 178462306a36Sopenharmony_ci case RDMA_CM_EVENT_DEVICE_REMOVAL: 178562306a36Sopenharmony_ci ret = nvmet_rdma_device_removal(cm_id, queue); 178662306a36Sopenharmony_ci break; 178762306a36Sopenharmony_ci case RDMA_CM_EVENT_REJECTED: 178862306a36Sopenharmony_ci pr_debug("Connection rejected: %s\n", 178962306a36Sopenharmony_ci rdma_reject_msg(cm_id, event->status)); 179062306a36Sopenharmony_ci fallthrough; 179162306a36Sopenharmony_ci case RDMA_CM_EVENT_UNREACHABLE: 179262306a36Sopenharmony_ci case RDMA_CM_EVENT_CONNECT_ERROR: 179362306a36Sopenharmony_ci nvmet_rdma_queue_connect_fail(cm_id, queue); 179462306a36Sopenharmony_ci break; 179562306a36Sopenharmony_ci default: 179662306a36Sopenharmony_ci pr_err("received unrecognized RDMA CM event %d\n", 179762306a36Sopenharmony_ci event->event); 179862306a36Sopenharmony_ci break; 179962306a36Sopenharmony_ci } 180062306a36Sopenharmony_ci 180162306a36Sopenharmony_ci return ret; 180262306a36Sopenharmony_ci} 180362306a36Sopenharmony_ci 180462306a36Sopenharmony_cistatic void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl) 180562306a36Sopenharmony_ci{ 180662306a36Sopenharmony_ci struct nvmet_rdma_queue *queue; 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_cirestart: 180962306a36Sopenharmony_ci mutex_lock(&nvmet_rdma_queue_mutex); 181062306a36Sopenharmony_ci list_for_each_entry(queue, &nvmet_rdma_queue_list, queue_list) { 181162306a36Sopenharmony_ci if (queue->nvme_sq.ctrl == ctrl) { 181262306a36Sopenharmony_ci list_del_init(&queue->queue_list); 181362306a36Sopenharmony_ci mutex_unlock(&nvmet_rdma_queue_mutex); 181462306a36Sopenharmony_ci 181562306a36Sopenharmony_ci __nvmet_rdma_queue_disconnect(queue); 181662306a36Sopenharmony_ci goto restart; 181762306a36Sopenharmony_ci } 181862306a36Sopenharmony_ci } 181962306a36Sopenharmony_ci mutex_unlock(&nvmet_rdma_queue_mutex); 182062306a36Sopenharmony_ci} 182162306a36Sopenharmony_ci 182262306a36Sopenharmony_cistatic void nvmet_rdma_destroy_port_queues(struct nvmet_rdma_port *port) 182362306a36Sopenharmony_ci{ 182462306a36Sopenharmony_ci struct nvmet_rdma_queue *queue, *tmp; 182562306a36Sopenharmony_ci struct nvmet_port *nport = port->nport; 182662306a36Sopenharmony_ci 182762306a36Sopenharmony_ci mutex_lock(&nvmet_rdma_queue_mutex); 182862306a36Sopenharmony_ci list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list, 182962306a36Sopenharmony_ci queue_list) { 183062306a36Sopenharmony_ci if (queue->port != nport) 183162306a36Sopenharmony_ci continue; 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci list_del_init(&queue->queue_list); 183462306a36Sopenharmony_ci __nvmet_rdma_queue_disconnect(queue); 183562306a36Sopenharmony_ci } 183662306a36Sopenharmony_ci mutex_unlock(&nvmet_rdma_queue_mutex); 183762306a36Sopenharmony_ci} 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_cistatic void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) 184062306a36Sopenharmony_ci{ 184162306a36Sopenharmony_ci struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); 184262306a36Sopenharmony_ci 184362306a36Sopenharmony_ci if (cm_id) 184462306a36Sopenharmony_ci rdma_destroy_id(cm_id); 184562306a36Sopenharmony_ci 184662306a36Sopenharmony_ci /* 184762306a36Sopenharmony_ci * Destroy the remaining queues, which are not belong to any 184862306a36Sopenharmony_ci * controller yet. Do it here after the RDMA-CM was destroyed 184962306a36Sopenharmony_ci * guarantees that no new queue will be created. 185062306a36Sopenharmony_ci */ 185162306a36Sopenharmony_ci nvmet_rdma_destroy_port_queues(port); 185262306a36Sopenharmony_ci} 185362306a36Sopenharmony_ci 185462306a36Sopenharmony_cistatic int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) 185562306a36Sopenharmony_ci{ 185662306a36Sopenharmony_ci struct sockaddr *addr = (struct sockaddr *)&port->addr; 185762306a36Sopenharmony_ci struct rdma_cm_id *cm_id; 185862306a36Sopenharmony_ci int ret; 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, 186162306a36Sopenharmony_ci RDMA_PS_TCP, IB_QPT_RC); 186262306a36Sopenharmony_ci if (IS_ERR(cm_id)) { 186362306a36Sopenharmony_ci pr_err("CM ID creation failed\n"); 186462306a36Sopenharmony_ci return PTR_ERR(cm_id); 186562306a36Sopenharmony_ci } 186662306a36Sopenharmony_ci 186762306a36Sopenharmony_ci /* 186862306a36Sopenharmony_ci * Allow both IPv4 and IPv6 sockets to bind a single port 186962306a36Sopenharmony_ci * at the same time. 187062306a36Sopenharmony_ci */ 187162306a36Sopenharmony_ci ret = rdma_set_afonly(cm_id, 1); 187262306a36Sopenharmony_ci if (ret) { 187362306a36Sopenharmony_ci pr_err("rdma_set_afonly failed (%d)\n", ret); 187462306a36Sopenharmony_ci goto out_destroy_id; 187562306a36Sopenharmony_ci } 187662306a36Sopenharmony_ci 187762306a36Sopenharmony_ci ret = rdma_bind_addr(cm_id, addr); 187862306a36Sopenharmony_ci if (ret) { 187962306a36Sopenharmony_ci pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret); 188062306a36Sopenharmony_ci goto out_destroy_id; 188162306a36Sopenharmony_ci } 188262306a36Sopenharmony_ci 188362306a36Sopenharmony_ci ret = rdma_listen(cm_id, 128); 188462306a36Sopenharmony_ci if (ret) { 188562306a36Sopenharmony_ci pr_err("listening to %pISpcs failed (%d)\n", addr, ret); 188662306a36Sopenharmony_ci goto out_destroy_id; 188762306a36Sopenharmony_ci } 188862306a36Sopenharmony_ci 188962306a36Sopenharmony_ci port->cm_id = cm_id; 189062306a36Sopenharmony_ci return 0; 189162306a36Sopenharmony_ci 189262306a36Sopenharmony_ciout_destroy_id: 189362306a36Sopenharmony_ci rdma_destroy_id(cm_id); 189462306a36Sopenharmony_ci return ret; 189562306a36Sopenharmony_ci} 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_cistatic void nvmet_rdma_repair_port_work(struct work_struct *w) 189862306a36Sopenharmony_ci{ 189962306a36Sopenharmony_ci struct nvmet_rdma_port *port = container_of(to_delayed_work(w), 190062306a36Sopenharmony_ci struct nvmet_rdma_port, repair_work); 190162306a36Sopenharmony_ci int ret; 190262306a36Sopenharmony_ci 190362306a36Sopenharmony_ci nvmet_rdma_disable_port(port); 190462306a36Sopenharmony_ci ret = nvmet_rdma_enable_port(port); 190562306a36Sopenharmony_ci if (ret) 190662306a36Sopenharmony_ci queue_delayed_work(nvmet_wq, &port->repair_work, 5 * HZ); 190762306a36Sopenharmony_ci} 190862306a36Sopenharmony_ci 190962306a36Sopenharmony_cistatic int nvmet_rdma_add_port(struct nvmet_port *nport) 191062306a36Sopenharmony_ci{ 191162306a36Sopenharmony_ci struct nvmet_rdma_port *port; 191262306a36Sopenharmony_ci __kernel_sa_family_t af; 191362306a36Sopenharmony_ci int ret; 191462306a36Sopenharmony_ci 191562306a36Sopenharmony_ci port = kzalloc(sizeof(*port), GFP_KERNEL); 191662306a36Sopenharmony_ci if (!port) 191762306a36Sopenharmony_ci return -ENOMEM; 191862306a36Sopenharmony_ci 191962306a36Sopenharmony_ci nport->priv = port; 192062306a36Sopenharmony_ci port->nport = nport; 192162306a36Sopenharmony_ci INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work); 192262306a36Sopenharmony_ci 192362306a36Sopenharmony_ci switch (nport->disc_addr.adrfam) { 192462306a36Sopenharmony_ci case NVMF_ADDR_FAMILY_IP4: 192562306a36Sopenharmony_ci af = AF_INET; 192662306a36Sopenharmony_ci break; 192762306a36Sopenharmony_ci case NVMF_ADDR_FAMILY_IP6: 192862306a36Sopenharmony_ci af = AF_INET6; 192962306a36Sopenharmony_ci break; 193062306a36Sopenharmony_ci default: 193162306a36Sopenharmony_ci pr_err("address family %d not supported\n", 193262306a36Sopenharmony_ci nport->disc_addr.adrfam); 193362306a36Sopenharmony_ci ret = -EINVAL; 193462306a36Sopenharmony_ci goto out_free_port; 193562306a36Sopenharmony_ci } 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_ci if (nport->inline_data_size < 0) { 193862306a36Sopenharmony_ci nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; 193962306a36Sopenharmony_ci } else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { 194062306a36Sopenharmony_ci pr_warn("inline_data_size %u is too large, reducing to %u\n", 194162306a36Sopenharmony_ci nport->inline_data_size, 194262306a36Sopenharmony_ci NVMET_RDMA_MAX_INLINE_DATA_SIZE); 194362306a36Sopenharmony_ci nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; 194462306a36Sopenharmony_ci } 194562306a36Sopenharmony_ci 194662306a36Sopenharmony_ci ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, 194762306a36Sopenharmony_ci nport->disc_addr.trsvcid, &port->addr); 194862306a36Sopenharmony_ci if (ret) { 194962306a36Sopenharmony_ci pr_err("malformed ip/port passed: %s:%s\n", 195062306a36Sopenharmony_ci nport->disc_addr.traddr, nport->disc_addr.trsvcid); 195162306a36Sopenharmony_ci goto out_free_port; 195262306a36Sopenharmony_ci } 195362306a36Sopenharmony_ci 195462306a36Sopenharmony_ci ret = nvmet_rdma_enable_port(port); 195562306a36Sopenharmony_ci if (ret) 195662306a36Sopenharmony_ci goto out_free_port; 195762306a36Sopenharmony_ci 195862306a36Sopenharmony_ci pr_info("enabling port %d (%pISpcs)\n", 195962306a36Sopenharmony_ci le16_to_cpu(nport->disc_addr.portid), 196062306a36Sopenharmony_ci (struct sockaddr *)&port->addr); 196162306a36Sopenharmony_ci 196262306a36Sopenharmony_ci return 0; 196362306a36Sopenharmony_ci 196462306a36Sopenharmony_ciout_free_port: 196562306a36Sopenharmony_ci kfree(port); 196662306a36Sopenharmony_ci return ret; 196762306a36Sopenharmony_ci} 196862306a36Sopenharmony_ci 196962306a36Sopenharmony_cistatic void nvmet_rdma_remove_port(struct nvmet_port *nport) 197062306a36Sopenharmony_ci{ 197162306a36Sopenharmony_ci struct nvmet_rdma_port *port = nport->priv; 197262306a36Sopenharmony_ci 197362306a36Sopenharmony_ci cancel_delayed_work_sync(&port->repair_work); 197462306a36Sopenharmony_ci nvmet_rdma_disable_port(port); 197562306a36Sopenharmony_ci kfree(port); 197662306a36Sopenharmony_ci} 197762306a36Sopenharmony_ci 197862306a36Sopenharmony_cistatic void nvmet_rdma_disc_port_addr(struct nvmet_req *req, 197962306a36Sopenharmony_ci struct nvmet_port *nport, char *traddr) 198062306a36Sopenharmony_ci{ 198162306a36Sopenharmony_ci struct nvmet_rdma_port *port = nport->priv; 198262306a36Sopenharmony_ci struct rdma_cm_id *cm_id = port->cm_id; 198362306a36Sopenharmony_ci 198462306a36Sopenharmony_ci if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { 198562306a36Sopenharmony_ci struct nvmet_rdma_rsp *rsp = 198662306a36Sopenharmony_ci container_of(req, struct nvmet_rdma_rsp, req); 198762306a36Sopenharmony_ci struct rdma_cm_id *req_cm_id = rsp->queue->cm_id; 198862306a36Sopenharmony_ci struct sockaddr *addr = (void *)&req_cm_id->route.addr.src_addr; 198962306a36Sopenharmony_ci 199062306a36Sopenharmony_ci sprintf(traddr, "%pISc", addr); 199162306a36Sopenharmony_ci } else { 199262306a36Sopenharmony_ci memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); 199362306a36Sopenharmony_ci } 199462306a36Sopenharmony_ci} 199562306a36Sopenharmony_ci 199662306a36Sopenharmony_cistatic u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) 199762306a36Sopenharmony_ci{ 199862306a36Sopenharmony_ci if (ctrl->pi_support) 199962306a36Sopenharmony_ci return NVMET_RDMA_MAX_METADATA_MDTS; 200062306a36Sopenharmony_ci return NVMET_RDMA_MAX_MDTS; 200162306a36Sopenharmony_ci} 200262306a36Sopenharmony_ci 200362306a36Sopenharmony_cistatic u16 nvmet_rdma_get_max_queue_size(const struct nvmet_ctrl *ctrl) 200462306a36Sopenharmony_ci{ 200562306a36Sopenharmony_ci return NVME_RDMA_MAX_QUEUE_SIZE; 200662306a36Sopenharmony_ci} 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_cistatic const struct nvmet_fabrics_ops nvmet_rdma_ops = { 200962306a36Sopenharmony_ci .owner = THIS_MODULE, 201062306a36Sopenharmony_ci .type = NVMF_TRTYPE_RDMA, 201162306a36Sopenharmony_ci .msdbd = 1, 201262306a36Sopenharmony_ci .flags = NVMF_KEYED_SGLS | NVMF_METADATA_SUPPORTED, 201362306a36Sopenharmony_ci .add_port = nvmet_rdma_add_port, 201462306a36Sopenharmony_ci .remove_port = nvmet_rdma_remove_port, 201562306a36Sopenharmony_ci .queue_response = nvmet_rdma_queue_response, 201662306a36Sopenharmony_ci .delete_ctrl = nvmet_rdma_delete_ctrl, 201762306a36Sopenharmony_ci .disc_traddr = nvmet_rdma_disc_port_addr, 201862306a36Sopenharmony_ci .get_mdts = nvmet_rdma_get_mdts, 201962306a36Sopenharmony_ci .get_max_queue_size = nvmet_rdma_get_max_queue_size, 202062306a36Sopenharmony_ci}; 202162306a36Sopenharmony_ci 202262306a36Sopenharmony_cistatic void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) 202362306a36Sopenharmony_ci{ 202462306a36Sopenharmony_ci struct nvmet_rdma_queue *queue, *tmp; 202562306a36Sopenharmony_ci struct nvmet_rdma_device *ndev; 202662306a36Sopenharmony_ci bool found = false; 202762306a36Sopenharmony_ci 202862306a36Sopenharmony_ci mutex_lock(&device_list_mutex); 202962306a36Sopenharmony_ci list_for_each_entry(ndev, &device_list, entry) { 203062306a36Sopenharmony_ci if (ndev->device == ib_device) { 203162306a36Sopenharmony_ci found = true; 203262306a36Sopenharmony_ci break; 203362306a36Sopenharmony_ci } 203462306a36Sopenharmony_ci } 203562306a36Sopenharmony_ci mutex_unlock(&device_list_mutex); 203662306a36Sopenharmony_ci 203762306a36Sopenharmony_ci if (!found) 203862306a36Sopenharmony_ci return; 203962306a36Sopenharmony_ci 204062306a36Sopenharmony_ci /* 204162306a36Sopenharmony_ci * IB Device that is used by nvmet controllers is being removed, 204262306a36Sopenharmony_ci * delete all queues using this device. 204362306a36Sopenharmony_ci */ 204462306a36Sopenharmony_ci mutex_lock(&nvmet_rdma_queue_mutex); 204562306a36Sopenharmony_ci list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list, 204662306a36Sopenharmony_ci queue_list) { 204762306a36Sopenharmony_ci if (queue->dev->device != ib_device) 204862306a36Sopenharmony_ci continue; 204962306a36Sopenharmony_ci 205062306a36Sopenharmony_ci pr_info("Removing queue %d\n", queue->idx); 205162306a36Sopenharmony_ci list_del_init(&queue->queue_list); 205262306a36Sopenharmony_ci __nvmet_rdma_queue_disconnect(queue); 205362306a36Sopenharmony_ci } 205462306a36Sopenharmony_ci mutex_unlock(&nvmet_rdma_queue_mutex); 205562306a36Sopenharmony_ci 205662306a36Sopenharmony_ci flush_workqueue(nvmet_wq); 205762306a36Sopenharmony_ci} 205862306a36Sopenharmony_ci 205962306a36Sopenharmony_cistatic struct ib_client nvmet_rdma_ib_client = { 206062306a36Sopenharmony_ci .name = "nvmet_rdma", 206162306a36Sopenharmony_ci .remove = nvmet_rdma_remove_one 206262306a36Sopenharmony_ci}; 206362306a36Sopenharmony_ci 206462306a36Sopenharmony_cistatic int __init nvmet_rdma_init(void) 206562306a36Sopenharmony_ci{ 206662306a36Sopenharmony_ci int ret; 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci ret = ib_register_client(&nvmet_rdma_ib_client); 206962306a36Sopenharmony_ci if (ret) 207062306a36Sopenharmony_ci return ret; 207162306a36Sopenharmony_ci 207262306a36Sopenharmony_ci ret = nvmet_register_transport(&nvmet_rdma_ops); 207362306a36Sopenharmony_ci if (ret) 207462306a36Sopenharmony_ci goto err_ib_client; 207562306a36Sopenharmony_ci 207662306a36Sopenharmony_ci return 0; 207762306a36Sopenharmony_ci 207862306a36Sopenharmony_cierr_ib_client: 207962306a36Sopenharmony_ci ib_unregister_client(&nvmet_rdma_ib_client); 208062306a36Sopenharmony_ci return ret; 208162306a36Sopenharmony_ci} 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_cistatic void __exit nvmet_rdma_exit(void) 208462306a36Sopenharmony_ci{ 208562306a36Sopenharmony_ci nvmet_unregister_transport(&nvmet_rdma_ops); 208662306a36Sopenharmony_ci ib_unregister_client(&nvmet_rdma_ib_client); 208762306a36Sopenharmony_ci WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list)); 208862306a36Sopenharmony_ci ida_destroy(&nvmet_rdma_queue_ida); 208962306a36Sopenharmony_ci} 209062306a36Sopenharmony_ci 209162306a36Sopenharmony_cimodule_init(nvmet_rdma_init); 209262306a36Sopenharmony_cimodule_exit(nvmet_rdma_exit); 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_ciMODULE_LICENSE("GPL v2"); 209562306a36Sopenharmony_ciMODULE_ALIAS("nvmet-transport-1"); /* 1 == NVMF_TRTYPE_RDMA */ 2096