162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2016-2018 Oracle. All rights reserved. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Use the core R/W API to move RPC-over-RDMA Read and Write chunks. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci#include <rdma/rw.h> 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/sunrpc/xdr.h> 1162306a36Sopenharmony_ci#include <linux/sunrpc/rpc_rdma.h> 1262306a36Sopenharmony_ci#include <linux/sunrpc/svc_rdma.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include "xprt_rdma.h" 1562306a36Sopenharmony_ci#include <trace/events/rpcrdma.h> 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_cistatic void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc); 1862306a36Sopenharmony_cistatic void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc); 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci/* Each R/W context contains state for one chain of RDMA Read or 2162306a36Sopenharmony_ci * Write Work Requests. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * Each WR chain handles a single contiguous server-side buffer, 2462306a36Sopenharmony_ci * because scatterlist entries after the first have to start on 2562306a36Sopenharmony_ci * page alignment. xdr_buf iovecs cannot guarantee alignment. 2662306a36Sopenharmony_ci * 2762306a36Sopenharmony_ci * Each WR chain handles only one R_key. Each RPC-over-RDMA segment 2862306a36Sopenharmony_ci * from a client may contain a unique R_key, so each WR chain moves 2962306a36Sopenharmony_ci * up to one segment at a time. 3062306a36Sopenharmony_ci * 3162306a36Sopenharmony_ci * The scatterlist makes this data structure over 4KB in size. To 3262306a36Sopenharmony_ci * make it less likely to fail, and to handle the allocation for 3362306a36Sopenharmony_ci * smaller I/O requests without disabling bottom-halves, these 3462306a36Sopenharmony_ci * contexts are created on demand, but cached and reused until the 3562306a36Sopenharmony_ci * controlling svcxprt_rdma is destroyed. 3662306a36Sopenharmony_ci */ 3762306a36Sopenharmony_cistruct svc_rdma_rw_ctxt { 3862306a36Sopenharmony_ci struct llist_node rw_node; 3962306a36Sopenharmony_ci struct list_head rw_list; 4062306a36Sopenharmony_ci struct rdma_rw_ctx rw_ctx; 4162306a36Sopenharmony_ci unsigned int rw_nents; 4262306a36Sopenharmony_ci struct sg_table rw_sg_table; 4362306a36Sopenharmony_ci struct scatterlist rw_first_sgl[]; 4462306a36Sopenharmony_ci}; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_cistatic inline struct svc_rdma_rw_ctxt * 4762306a36Sopenharmony_cisvc_rdma_next_ctxt(struct list_head *list) 4862306a36Sopenharmony_ci{ 4962306a36Sopenharmony_ci return list_first_entry_or_null(list, struct svc_rdma_rw_ctxt, 5062306a36Sopenharmony_ci rw_list); 5162306a36Sopenharmony_ci} 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic struct svc_rdma_rw_ctxt * 5462306a36Sopenharmony_cisvc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) 5562306a36Sopenharmony_ci{ 5662306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt; 5762306a36Sopenharmony_ci struct llist_node *node; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci spin_lock(&rdma->sc_rw_ctxt_lock); 6062306a36Sopenharmony_ci node = llist_del_first(&rdma->sc_rw_ctxts); 6162306a36Sopenharmony_ci spin_unlock(&rdma->sc_rw_ctxt_lock); 6262306a36Sopenharmony_ci if (node) { 6362306a36Sopenharmony_ci ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); 6462306a36Sopenharmony_ci } else { 6562306a36Sopenharmony_ci ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), 6662306a36Sopenharmony_ci GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device)); 6762306a36Sopenharmony_ci if (!ctxt) 6862306a36Sopenharmony_ci goto out_noctx; 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci INIT_LIST_HEAD(&ctxt->rw_list); 7162306a36Sopenharmony_ci } 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl; 7462306a36Sopenharmony_ci if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges, 7562306a36Sopenharmony_ci ctxt->rw_sg_table.sgl, 7662306a36Sopenharmony_ci SG_CHUNK_SIZE)) 7762306a36Sopenharmony_ci goto out_free; 7862306a36Sopenharmony_ci return ctxt; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ciout_free: 8162306a36Sopenharmony_ci kfree(ctxt); 8262306a36Sopenharmony_ciout_noctx: 8362306a36Sopenharmony_ci trace_svcrdma_no_rwctx_err(rdma, sges); 8462306a36Sopenharmony_ci return NULL; 8562306a36Sopenharmony_ci} 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_cistatic void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt, 8862306a36Sopenharmony_ci struct llist_head *list) 8962306a36Sopenharmony_ci{ 9062306a36Sopenharmony_ci sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE); 9162306a36Sopenharmony_ci llist_add(&ctxt->rw_node, list); 9262306a36Sopenharmony_ci} 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_cistatic void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, 9562306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt) 9662306a36Sopenharmony_ci{ 9762306a36Sopenharmony_ci __svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts); 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci/** 10162306a36Sopenharmony_ci * svc_rdma_destroy_rw_ctxts - Free accumulated R/W contexts 10262306a36Sopenharmony_ci * @rdma: transport about to be destroyed 10362306a36Sopenharmony_ci * 10462306a36Sopenharmony_ci */ 10562306a36Sopenharmony_civoid svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt; 10862306a36Sopenharmony_ci struct llist_node *node; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci while ((node = llist_del_first(&rdma->sc_rw_ctxts)) != NULL) { 11162306a36Sopenharmony_ci ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node); 11262306a36Sopenharmony_ci kfree(ctxt); 11362306a36Sopenharmony_ci } 11462306a36Sopenharmony_ci} 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci/** 11762306a36Sopenharmony_ci * svc_rdma_rw_ctx_init - Prepare a R/W context for I/O 11862306a36Sopenharmony_ci * @rdma: controlling transport instance 11962306a36Sopenharmony_ci * @ctxt: R/W context to prepare 12062306a36Sopenharmony_ci * @offset: RDMA offset 12162306a36Sopenharmony_ci * @handle: RDMA tag/handle 12262306a36Sopenharmony_ci * @direction: I/O direction 12362306a36Sopenharmony_ci * 12462306a36Sopenharmony_ci * Returns on success, the number of WQEs that will be needed 12562306a36Sopenharmony_ci * on the workqueue, or a negative errno. 12662306a36Sopenharmony_ci */ 12762306a36Sopenharmony_cistatic int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma, 12862306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt, 12962306a36Sopenharmony_ci u64 offset, u32 handle, 13062306a36Sopenharmony_ci enum dma_data_direction direction) 13162306a36Sopenharmony_ci{ 13262306a36Sopenharmony_ci int ret; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num, 13562306a36Sopenharmony_ci ctxt->rw_sg_table.sgl, ctxt->rw_nents, 13662306a36Sopenharmony_ci 0, offset, handle, direction); 13762306a36Sopenharmony_ci if (unlikely(ret < 0)) { 13862306a36Sopenharmony_ci svc_rdma_put_rw_ctxt(rdma, ctxt); 13962306a36Sopenharmony_ci trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret); 14062306a36Sopenharmony_ci } 14162306a36Sopenharmony_ci return ret; 14262306a36Sopenharmony_ci} 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci/* A chunk context tracks all I/O for moving one Read or Write 14562306a36Sopenharmony_ci * chunk. This is a set of rdma_rw's that handle data movement 14662306a36Sopenharmony_ci * for all segments of one chunk. 14762306a36Sopenharmony_ci * 14862306a36Sopenharmony_ci * These are small, acquired with a single allocator call, and 14962306a36Sopenharmony_ci * no more than one is needed per chunk. They are allocated on 15062306a36Sopenharmony_ci * demand, and not cached. 15162306a36Sopenharmony_ci */ 15262306a36Sopenharmony_cistruct svc_rdma_chunk_ctxt { 15362306a36Sopenharmony_ci struct rpc_rdma_cid cc_cid; 15462306a36Sopenharmony_ci struct ib_cqe cc_cqe; 15562306a36Sopenharmony_ci struct svcxprt_rdma *cc_rdma; 15662306a36Sopenharmony_ci struct list_head cc_rwctxts; 15762306a36Sopenharmony_ci ktime_t cc_posttime; 15862306a36Sopenharmony_ci int cc_sqecount; 15962306a36Sopenharmony_ci enum ib_wc_status cc_status; 16062306a36Sopenharmony_ci struct completion cc_done; 16162306a36Sopenharmony_ci}; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_cistatic void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma, 16462306a36Sopenharmony_ci struct rpc_rdma_cid *cid) 16562306a36Sopenharmony_ci{ 16662306a36Sopenharmony_ci cid->ci_queue_id = rdma->sc_sq_cq->res.id; 16762306a36Sopenharmony_ci cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); 16862306a36Sopenharmony_ci} 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_cistatic void svc_rdma_cc_init(struct svcxprt_rdma *rdma, 17162306a36Sopenharmony_ci struct svc_rdma_chunk_ctxt *cc) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci svc_rdma_cc_cid_init(rdma, &cc->cc_cid); 17462306a36Sopenharmony_ci cc->cc_rdma = rdma; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci INIT_LIST_HEAD(&cc->cc_rwctxts); 17762306a36Sopenharmony_ci cc->cc_sqecount = 0; 17862306a36Sopenharmony_ci} 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci/* 18162306a36Sopenharmony_ci * The consumed rw_ctx's are cleaned and placed on a local llist so 18262306a36Sopenharmony_ci * that only one atomic llist operation is needed to put them all 18362306a36Sopenharmony_ci * back on the free list. 18462306a36Sopenharmony_ci */ 18562306a36Sopenharmony_cistatic void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc, 18662306a36Sopenharmony_ci enum dma_data_direction dir) 18762306a36Sopenharmony_ci{ 18862306a36Sopenharmony_ci struct svcxprt_rdma *rdma = cc->cc_rdma; 18962306a36Sopenharmony_ci struct llist_node *first, *last; 19062306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt; 19162306a36Sopenharmony_ci LLIST_HEAD(free); 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci first = last = NULL; 19662306a36Sopenharmony_ci while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) { 19762306a36Sopenharmony_ci list_del(&ctxt->rw_list); 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp, 20062306a36Sopenharmony_ci rdma->sc_port_num, ctxt->rw_sg_table.sgl, 20162306a36Sopenharmony_ci ctxt->rw_nents, dir); 20262306a36Sopenharmony_ci __svc_rdma_put_rw_ctxt(ctxt, &free); 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci ctxt->rw_node.next = first; 20562306a36Sopenharmony_ci first = &ctxt->rw_node; 20662306a36Sopenharmony_ci if (!last) 20762306a36Sopenharmony_ci last = first; 20862306a36Sopenharmony_ci } 20962306a36Sopenharmony_ci if (first) 21062306a36Sopenharmony_ci llist_add_batch(first, last, &rdma->sc_rw_ctxts); 21162306a36Sopenharmony_ci} 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci/* State for sending a Write or Reply chunk. 21462306a36Sopenharmony_ci * - Tracks progress of writing one chunk over all its segments 21562306a36Sopenharmony_ci * - Stores arguments for the SGL constructor functions 21662306a36Sopenharmony_ci */ 21762306a36Sopenharmony_cistruct svc_rdma_write_info { 21862306a36Sopenharmony_ci const struct svc_rdma_chunk *wi_chunk; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci /* write state of this chunk */ 22162306a36Sopenharmony_ci unsigned int wi_seg_off; 22262306a36Sopenharmony_ci unsigned int wi_seg_no; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci /* SGL constructor arguments */ 22562306a36Sopenharmony_ci const struct xdr_buf *wi_xdr; 22662306a36Sopenharmony_ci unsigned char *wi_base; 22762306a36Sopenharmony_ci unsigned int wi_next_off; 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci struct svc_rdma_chunk_ctxt wi_cc; 23062306a36Sopenharmony_ci}; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_cistatic struct svc_rdma_write_info * 23362306a36Sopenharmony_cisvc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, 23462306a36Sopenharmony_ci const struct svc_rdma_chunk *chunk) 23562306a36Sopenharmony_ci{ 23662306a36Sopenharmony_ci struct svc_rdma_write_info *info; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci info = kmalloc_node(sizeof(*info), GFP_KERNEL, 23962306a36Sopenharmony_ci ibdev_to_node(rdma->sc_cm_id->device)); 24062306a36Sopenharmony_ci if (!info) 24162306a36Sopenharmony_ci return info; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci info->wi_chunk = chunk; 24462306a36Sopenharmony_ci info->wi_seg_off = 0; 24562306a36Sopenharmony_ci info->wi_seg_no = 0; 24662306a36Sopenharmony_ci svc_rdma_cc_init(rdma, &info->wi_cc); 24762306a36Sopenharmony_ci info->wi_cc.cc_cqe.done = svc_rdma_write_done; 24862306a36Sopenharmony_ci return info; 24962306a36Sopenharmony_ci} 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_cistatic void svc_rdma_write_info_free(struct svc_rdma_write_info *info) 25262306a36Sopenharmony_ci{ 25362306a36Sopenharmony_ci svc_rdma_cc_release(&info->wi_cc, DMA_TO_DEVICE); 25462306a36Sopenharmony_ci kfree(info); 25562306a36Sopenharmony_ci} 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci/** 25862306a36Sopenharmony_ci * svc_rdma_write_done - Write chunk completion 25962306a36Sopenharmony_ci * @cq: controlling Completion Queue 26062306a36Sopenharmony_ci * @wc: Work Completion 26162306a36Sopenharmony_ci * 26262306a36Sopenharmony_ci * Pages under I/O are freed by a subsequent Send completion. 26362306a36Sopenharmony_ci */ 26462306a36Sopenharmony_cistatic void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) 26562306a36Sopenharmony_ci{ 26662306a36Sopenharmony_ci struct ib_cqe *cqe = wc->wr_cqe; 26762306a36Sopenharmony_ci struct svc_rdma_chunk_ctxt *cc = 26862306a36Sopenharmony_ci container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); 26962306a36Sopenharmony_ci struct svcxprt_rdma *rdma = cc->cc_rdma; 27062306a36Sopenharmony_ci struct svc_rdma_write_info *info = 27162306a36Sopenharmony_ci container_of(cc, struct svc_rdma_write_info, wi_cc); 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci switch (wc->status) { 27462306a36Sopenharmony_ci case IB_WC_SUCCESS: 27562306a36Sopenharmony_ci trace_svcrdma_wc_write(wc, &cc->cc_cid); 27662306a36Sopenharmony_ci break; 27762306a36Sopenharmony_ci case IB_WC_WR_FLUSH_ERR: 27862306a36Sopenharmony_ci trace_svcrdma_wc_write_flush(wc, &cc->cc_cid); 27962306a36Sopenharmony_ci break; 28062306a36Sopenharmony_ci default: 28162306a36Sopenharmony_ci trace_svcrdma_wc_write_err(wc, &cc->cc_cid); 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount); 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) 28762306a36Sopenharmony_ci svc_xprt_deferred_close(&rdma->sc_xprt); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci svc_rdma_write_info_free(info); 29062306a36Sopenharmony_ci} 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci/* State for pulling a Read chunk. 29362306a36Sopenharmony_ci */ 29462306a36Sopenharmony_cistruct svc_rdma_read_info { 29562306a36Sopenharmony_ci struct svc_rqst *ri_rqst; 29662306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ri_readctxt; 29762306a36Sopenharmony_ci unsigned int ri_pageno; 29862306a36Sopenharmony_ci unsigned int ri_pageoff; 29962306a36Sopenharmony_ci unsigned int ri_totalbytes; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci struct svc_rdma_chunk_ctxt ri_cc; 30262306a36Sopenharmony_ci}; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_cistatic struct svc_rdma_read_info * 30562306a36Sopenharmony_cisvc_rdma_read_info_alloc(struct svcxprt_rdma *rdma) 30662306a36Sopenharmony_ci{ 30762306a36Sopenharmony_ci struct svc_rdma_read_info *info; 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci info = kmalloc_node(sizeof(*info), GFP_KERNEL, 31062306a36Sopenharmony_ci ibdev_to_node(rdma->sc_cm_id->device)); 31162306a36Sopenharmony_ci if (!info) 31262306a36Sopenharmony_ci return info; 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci svc_rdma_cc_init(rdma, &info->ri_cc); 31562306a36Sopenharmony_ci info->ri_cc.cc_cqe.done = svc_rdma_wc_read_done; 31662306a36Sopenharmony_ci return info; 31762306a36Sopenharmony_ci} 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_cistatic void svc_rdma_read_info_free(struct svc_rdma_read_info *info) 32062306a36Sopenharmony_ci{ 32162306a36Sopenharmony_ci svc_rdma_cc_release(&info->ri_cc, DMA_FROM_DEVICE); 32262306a36Sopenharmony_ci kfree(info); 32362306a36Sopenharmony_ci} 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci/** 32662306a36Sopenharmony_ci * svc_rdma_wc_read_done - Handle completion of an RDMA Read ctx 32762306a36Sopenharmony_ci * @cq: controlling Completion Queue 32862306a36Sopenharmony_ci * @wc: Work Completion 32962306a36Sopenharmony_ci * 33062306a36Sopenharmony_ci */ 33162306a36Sopenharmony_cistatic void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) 33262306a36Sopenharmony_ci{ 33362306a36Sopenharmony_ci struct ib_cqe *cqe = wc->wr_cqe; 33462306a36Sopenharmony_ci struct svc_rdma_chunk_ctxt *cc = 33562306a36Sopenharmony_ci container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe); 33662306a36Sopenharmony_ci struct svc_rdma_read_info *info; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci switch (wc->status) { 33962306a36Sopenharmony_ci case IB_WC_SUCCESS: 34062306a36Sopenharmony_ci info = container_of(cc, struct svc_rdma_read_info, ri_cc); 34162306a36Sopenharmony_ci trace_svcrdma_wc_read(wc, &cc->cc_cid, info->ri_totalbytes, 34262306a36Sopenharmony_ci cc->cc_posttime); 34362306a36Sopenharmony_ci break; 34462306a36Sopenharmony_ci case IB_WC_WR_FLUSH_ERR: 34562306a36Sopenharmony_ci trace_svcrdma_wc_read_flush(wc, &cc->cc_cid); 34662306a36Sopenharmony_ci break; 34762306a36Sopenharmony_ci default: 34862306a36Sopenharmony_ci trace_svcrdma_wc_read_err(wc, &cc->cc_cid); 34962306a36Sopenharmony_ci } 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci svc_rdma_wake_send_waiters(cc->cc_rdma, cc->cc_sqecount); 35262306a36Sopenharmony_ci cc->cc_status = wc->status; 35362306a36Sopenharmony_ci complete(&cc->cc_done); 35462306a36Sopenharmony_ci return; 35562306a36Sopenharmony_ci} 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci/* 35862306a36Sopenharmony_ci * Assumptions: 35962306a36Sopenharmony_ci * - If ib_post_send() succeeds, only one completion is expected, 36062306a36Sopenharmony_ci * even if one or more WRs are flushed. This is true when posting 36162306a36Sopenharmony_ci * an rdma_rw_ctx or when posting a single signaled WR. 36262306a36Sopenharmony_ci */ 36362306a36Sopenharmony_cistatic int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) 36462306a36Sopenharmony_ci{ 36562306a36Sopenharmony_ci struct svcxprt_rdma *rdma = cc->cc_rdma; 36662306a36Sopenharmony_ci struct ib_send_wr *first_wr; 36762306a36Sopenharmony_ci const struct ib_send_wr *bad_wr; 36862306a36Sopenharmony_ci struct list_head *tmp; 36962306a36Sopenharmony_ci struct ib_cqe *cqe; 37062306a36Sopenharmony_ci int ret; 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci might_sleep(); 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci if (cc->cc_sqecount > rdma->sc_sq_depth) 37562306a36Sopenharmony_ci return -EINVAL; 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci first_wr = NULL; 37862306a36Sopenharmony_ci cqe = &cc->cc_cqe; 37962306a36Sopenharmony_ci list_for_each(tmp, &cc->cc_rwctxts) { 38062306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci ctxt = list_entry(tmp, struct svc_rdma_rw_ctxt, rw_list); 38362306a36Sopenharmony_ci first_wr = rdma_rw_ctx_wrs(&ctxt->rw_ctx, rdma->sc_qp, 38462306a36Sopenharmony_ci rdma->sc_port_num, cqe, first_wr); 38562306a36Sopenharmony_ci cqe = NULL; 38662306a36Sopenharmony_ci } 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci do { 38962306a36Sopenharmony_ci if (atomic_sub_return(cc->cc_sqecount, 39062306a36Sopenharmony_ci &rdma->sc_sq_avail) > 0) { 39162306a36Sopenharmony_ci cc->cc_posttime = ktime_get(); 39262306a36Sopenharmony_ci ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr); 39362306a36Sopenharmony_ci if (ret) 39462306a36Sopenharmony_ci break; 39562306a36Sopenharmony_ci return 0; 39662306a36Sopenharmony_ci } 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci percpu_counter_inc(&svcrdma_stat_sq_starve); 39962306a36Sopenharmony_ci trace_svcrdma_sq_full(rdma); 40062306a36Sopenharmony_ci atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); 40162306a36Sopenharmony_ci wait_event(rdma->sc_send_wait, 40262306a36Sopenharmony_ci atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount); 40362306a36Sopenharmony_ci trace_svcrdma_sq_retry(rdma); 40462306a36Sopenharmony_ci } while (1); 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci trace_svcrdma_sq_post_err(rdma, ret); 40762306a36Sopenharmony_ci svc_xprt_deferred_close(&rdma->sc_xprt); 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci /* If even one was posted, there will be a completion. */ 41062306a36Sopenharmony_ci if (bad_wr != first_wr) 41162306a36Sopenharmony_ci return 0; 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); 41462306a36Sopenharmony_ci wake_up(&rdma->sc_send_wait); 41562306a36Sopenharmony_ci return -ENOTCONN; 41662306a36Sopenharmony_ci} 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci/* Build and DMA-map an SGL that covers one kvec in an xdr_buf 41962306a36Sopenharmony_ci */ 42062306a36Sopenharmony_cistatic void svc_rdma_vec_to_sg(struct svc_rdma_write_info *info, 42162306a36Sopenharmony_ci unsigned int len, 42262306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt) 42362306a36Sopenharmony_ci{ 42462306a36Sopenharmony_ci struct scatterlist *sg = ctxt->rw_sg_table.sgl; 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci sg_set_buf(&sg[0], info->wi_base, len); 42762306a36Sopenharmony_ci info->wi_base += len; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci ctxt->rw_nents = 1; 43062306a36Sopenharmony_ci} 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci/* Build and DMA-map an SGL that covers part of an xdr_buf's pagelist. 43362306a36Sopenharmony_ci */ 43462306a36Sopenharmony_cistatic void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info, 43562306a36Sopenharmony_ci unsigned int remaining, 43662306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt) 43762306a36Sopenharmony_ci{ 43862306a36Sopenharmony_ci unsigned int sge_no, sge_bytes, page_off, page_no; 43962306a36Sopenharmony_ci const struct xdr_buf *xdr = info->wi_xdr; 44062306a36Sopenharmony_ci struct scatterlist *sg; 44162306a36Sopenharmony_ci struct page **page; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci page_off = info->wi_next_off + xdr->page_base; 44462306a36Sopenharmony_ci page_no = page_off >> PAGE_SHIFT; 44562306a36Sopenharmony_ci page_off = offset_in_page(page_off); 44662306a36Sopenharmony_ci page = xdr->pages + page_no; 44762306a36Sopenharmony_ci info->wi_next_off += remaining; 44862306a36Sopenharmony_ci sg = ctxt->rw_sg_table.sgl; 44962306a36Sopenharmony_ci sge_no = 0; 45062306a36Sopenharmony_ci do { 45162306a36Sopenharmony_ci sge_bytes = min_t(unsigned int, remaining, 45262306a36Sopenharmony_ci PAGE_SIZE - page_off); 45362306a36Sopenharmony_ci sg_set_page(sg, *page, sge_bytes, page_off); 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci remaining -= sge_bytes; 45662306a36Sopenharmony_ci sg = sg_next(sg); 45762306a36Sopenharmony_ci page_off = 0; 45862306a36Sopenharmony_ci sge_no++; 45962306a36Sopenharmony_ci page++; 46062306a36Sopenharmony_ci } while (remaining); 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci ctxt->rw_nents = sge_no; 46362306a36Sopenharmony_ci} 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci/* Construct RDMA Write WRs to send a portion of an xdr_buf containing 46662306a36Sopenharmony_ci * an RPC Reply. 46762306a36Sopenharmony_ci */ 46862306a36Sopenharmony_cistatic int 46962306a36Sopenharmony_cisvc_rdma_build_writes(struct svc_rdma_write_info *info, 47062306a36Sopenharmony_ci void (*constructor)(struct svc_rdma_write_info *info, 47162306a36Sopenharmony_ci unsigned int len, 47262306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt), 47362306a36Sopenharmony_ci unsigned int remaining) 47462306a36Sopenharmony_ci{ 47562306a36Sopenharmony_ci struct svc_rdma_chunk_ctxt *cc = &info->wi_cc; 47662306a36Sopenharmony_ci struct svcxprt_rdma *rdma = cc->cc_rdma; 47762306a36Sopenharmony_ci const struct svc_rdma_segment *seg; 47862306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt; 47962306a36Sopenharmony_ci int ret; 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci do { 48262306a36Sopenharmony_ci unsigned int write_len; 48362306a36Sopenharmony_ci u64 offset; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci if (info->wi_seg_no >= info->wi_chunk->ch_segcount) 48662306a36Sopenharmony_ci goto out_overflow; 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci seg = &info->wi_chunk->ch_segments[info->wi_seg_no]; 48962306a36Sopenharmony_ci write_len = min(remaining, seg->rs_length - info->wi_seg_off); 49062306a36Sopenharmony_ci if (!write_len) 49162306a36Sopenharmony_ci goto out_overflow; 49262306a36Sopenharmony_ci ctxt = svc_rdma_get_rw_ctxt(rdma, 49362306a36Sopenharmony_ci (write_len >> PAGE_SHIFT) + 2); 49462306a36Sopenharmony_ci if (!ctxt) 49562306a36Sopenharmony_ci return -ENOMEM; 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_ci constructor(info, write_len, ctxt); 49862306a36Sopenharmony_ci offset = seg->rs_offset + info->wi_seg_off; 49962306a36Sopenharmony_ci ret = svc_rdma_rw_ctx_init(rdma, ctxt, offset, seg->rs_handle, 50062306a36Sopenharmony_ci DMA_TO_DEVICE); 50162306a36Sopenharmony_ci if (ret < 0) 50262306a36Sopenharmony_ci return -EIO; 50362306a36Sopenharmony_ci percpu_counter_inc(&svcrdma_stat_write); 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci list_add(&ctxt->rw_list, &cc->cc_rwctxts); 50662306a36Sopenharmony_ci cc->cc_sqecount += ret; 50762306a36Sopenharmony_ci if (write_len == seg->rs_length - info->wi_seg_off) { 50862306a36Sopenharmony_ci info->wi_seg_no++; 50962306a36Sopenharmony_ci info->wi_seg_off = 0; 51062306a36Sopenharmony_ci } else { 51162306a36Sopenharmony_ci info->wi_seg_off += write_len; 51262306a36Sopenharmony_ci } 51362306a36Sopenharmony_ci remaining -= write_len; 51462306a36Sopenharmony_ci } while (remaining); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci return 0; 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ciout_overflow: 51962306a36Sopenharmony_ci trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no, 52062306a36Sopenharmony_ci info->wi_chunk->ch_segcount); 52162306a36Sopenharmony_ci return -E2BIG; 52262306a36Sopenharmony_ci} 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci/** 52562306a36Sopenharmony_ci * svc_rdma_iov_write - Construct RDMA Writes from an iov 52662306a36Sopenharmony_ci * @info: pointer to write arguments 52762306a36Sopenharmony_ci * @iov: kvec to write 52862306a36Sopenharmony_ci * 52962306a36Sopenharmony_ci * Returns: 53062306a36Sopenharmony_ci * On success, returns zero 53162306a36Sopenharmony_ci * %-E2BIG if the client-provided Write chunk is too small 53262306a36Sopenharmony_ci * %-ENOMEM if a resource has been exhausted 53362306a36Sopenharmony_ci * %-EIO if an rdma-rw error occurred 53462306a36Sopenharmony_ci */ 53562306a36Sopenharmony_cistatic int svc_rdma_iov_write(struct svc_rdma_write_info *info, 53662306a36Sopenharmony_ci const struct kvec *iov) 53762306a36Sopenharmony_ci{ 53862306a36Sopenharmony_ci info->wi_base = iov->iov_base; 53962306a36Sopenharmony_ci return svc_rdma_build_writes(info, svc_rdma_vec_to_sg, 54062306a36Sopenharmony_ci iov->iov_len); 54162306a36Sopenharmony_ci} 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci/** 54462306a36Sopenharmony_ci * svc_rdma_pages_write - Construct RDMA Writes from pages 54562306a36Sopenharmony_ci * @info: pointer to write arguments 54662306a36Sopenharmony_ci * @xdr: xdr_buf with pages to write 54762306a36Sopenharmony_ci * @offset: offset into the content of @xdr 54862306a36Sopenharmony_ci * @length: number of bytes to write 54962306a36Sopenharmony_ci * 55062306a36Sopenharmony_ci * Returns: 55162306a36Sopenharmony_ci * On success, returns zero 55262306a36Sopenharmony_ci * %-E2BIG if the client-provided Write chunk is too small 55362306a36Sopenharmony_ci * %-ENOMEM if a resource has been exhausted 55462306a36Sopenharmony_ci * %-EIO if an rdma-rw error occurred 55562306a36Sopenharmony_ci */ 55662306a36Sopenharmony_cistatic int svc_rdma_pages_write(struct svc_rdma_write_info *info, 55762306a36Sopenharmony_ci const struct xdr_buf *xdr, 55862306a36Sopenharmony_ci unsigned int offset, 55962306a36Sopenharmony_ci unsigned long length) 56062306a36Sopenharmony_ci{ 56162306a36Sopenharmony_ci info->wi_xdr = xdr; 56262306a36Sopenharmony_ci info->wi_next_off = offset - xdr->head[0].iov_len; 56362306a36Sopenharmony_ci return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg, 56462306a36Sopenharmony_ci length); 56562306a36Sopenharmony_ci} 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci/** 56862306a36Sopenharmony_ci * svc_rdma_xb_write - Construct RDMA Writes to write an xdr_buf 56962306a36Sopenharmony_ci * @xdr: xdr_buf to write 57062306a36Sopenharmony_ci * @data: pointer to write arguments 57162306a36Sopenharmony_ci * 57262306a36Sopenharmony_ci * Returns: 57362306a36Sopenharmony_ci * On success, returns zero 57462306a36Sopenharmony_ci * %-E2BIG if the client-provided Write chunk is too small 57562306a36Sopenharmony_ci * %-ENOMEM if a resource has been exhausted 57662306a36Sopenharmony_ci * %-EIO if an rdma-rw error occurred 57762306a36Sopenharmony_ci */ 57862306a36Sopenharmony_cistatic int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data) 57962306a36Sopenharmony_ci{ 58062306a36Sopenharmony_ci struct svc_rdma_write_info *info = data; 58162306a36Sopenharmony_ci int ret; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci if (xdr->head[0].iov_len) { 58462306a36Sopenharmony_ci ret = svc_rdma_iov_write(info, &xdr->head[0]); 58562306a36Sopenharmony_ci if (ret < 0) 58662306a36Sopenharmony_ci return ret; 58762306a36Sopenharmony_ci } 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci if (xdr->page_len) { 59062306a36Sopenharmony_ci ret = svc_rdma_pages_write(info, xdr, xdr->head[0].iov_len, 59162306a36Sopenharmony_ci xdr->page_len); 59262306a36Sopenharmony_ci if (ret < 0) 59362306a36Sopenharmony_ci return ret; 59462306a36Sopenharmony_ci } 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci if (xdr->tail[0].iov_len) { 59762306a36Sopenharmony_ci ret = svc_rdma_iov_write(info, &xdr->tail[0]); 59862306a36Sopenharmony_ci if (ret < 0) 59962306a36Sopenharmony_ci return ret; 60062306a36Sopenharmony_ci } 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci return xdr->len; 60362306a36Sopenharmony_ci} 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci/** 60662306a36Sopenharmony_ci * svc_rdma_send_write_chunk - Write all segments in a Write chunk 60762306a36Sopenharmony_ci * @rdma: controlling RDMA transport 60862306a36Sopenharmony_ci * @chunk: Write chunk provided by the client 60962306a36Sopenharmony_ci * @xdr: xdr_buf containing the data payload 61062306a36Sopenharmony_ci * 61162306a36Sopenharmony_ci * Returns a non-negative number of bytes the chunk consumed, or 61262306a36Sopenharmony_ci * %-E2BIG if the payload was larger than the Write chunk, 61362306a36Sopenharmony_ci * %-EINVAL if client provided too many segments, 61462306a36Sopenharmony_ci * %-ENOMEM if rdma_rw context pool was exhausted, 61562306a36Sopenharmony_ci * %-ENOTCONN if posting failed (connection is lost), 61662306a36Sopenharmony_ci * %-EIO if rdma_rw initialization failed (DMA mapping, etc). 61762306a36Sopenharmony_ci */ 61862306a36Sopenharmony_ciint svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, 61962306a36Sopenharmony_ci const struct svc_rdma_chunk *chunk, 62062306a36Sopenharmony_ci const struct xdr_buf *xdr) 62162306a36Sopenharmony_ci{ 62262306a36Sopenharmony_ci struct svc_rdma_write_info *info; 62362306a36Sopenharmony_ci struct svc_rdma_chunk_ctxt *cc; 62462306a36Sopenharmony_ci int ret; 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci info = svc_rdma_write_info_alloc(rdma, chunk); 62762306a36Sopenharmony_ci if (!info) 62862306a36Sopenharmony_ci return -ENOMEM; 62962306a36Sopenharmony_ci cc = &info->wi_cc; 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci ret = svc_rdma_xb_write(xdr, info); 63262306a36Sopenharmony_ci if (ret != xdr->len) 63362306a36Sopenharmony_ci goto out_err; 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount); 63662306a36Sopenharmony_ci ret = svc_rdma_post_chunk_ctxt(cc); 63762306a36Sopenharmony_ci if (ret < 0) 63862306a36Sopenharmony_ci goto out_err; 63962306a36Sopenharmony_ci return xdr->len; 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ciout_err: 64262306a36Sopenharmony_ci svc_rdma_write_info_free(info); 64362306a36Sopenharmony_ci return ret; 64462306a36Sopenharmony_ci} 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci/** 64762306a36Sopenharmony_ci * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk 64862306a36Sopenharmony_ci * @rdma: controlling RDMA transport 64962306a36Sopenharmony_ci * @rctxt: Write and Reply chunks from client 65062306a36Sopenharmony_ci * @xdr: xdr_buf containing an RPC Reply 65162306a36Sopenharmony_ci * 65262306a36Sopenharmony_ci * Returns a non-negative number of bytes the chunk consumed, or 65362306a36Sopenharmony_ci * %-E2BIG if the payload was larger than the Reply chunk, 65462306a36Sopenharmony_ci * %-EINVAL if client provided too many segments, 65562306a36Sopenharmony_ci * %-ENOMEM if rdma_rw context pool was exhausted, 65662306a36Sopenharmony_ci * %-ENOTCONN if posting failed (connection is lost), 65762306a36Sopenharmony_ci * %-EIO if rdma_rw initialization failed (DMA mapping, etc). 65862306a36Sopenharmony_ci */ 65962306a36Sopenharmony_ciint svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, 66062306a36Sopenharmony_ci const struct svc_rdma_recv_ctxt *rctxt, 66162306a36Sopenharmony_ci const struct xdr_buf *xdr) 66262306a36Sopenharmony_ci{ 66362306a36Sopenharmony_ci struct svc_rdma_write_info *info; 66462306a36Sopenharmony_ci struct svc_rdma_chunk_ctxt *cc; 66562306a36Sopenharmony_ci struct svc_rdma_chunk *chunk; 66662306a36Sopenharmony_ci int ret; 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci if (pcl_is_empty(&rctxt->rc_reply_pcl)) 66962306a36Sopenharmony_ci return 0; 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci chunk = pcl_first_chunk(&rctxt->rc_reply_pcl); 67262306a36Sopenharmony_ci info = svc_rdma_write_info_alloc(rdma, chunk); 67362306a36Sopenharmony_ci if (!info) 67462306a36Sopenharmony_ci return -ENOMEM; 67562306a36Sopenharmony_ci cc = &info->wi_cc; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr, 67862306a36Sopenharmony_ci svc_rdma_xb_write, info); 67962306a36Sopenharmony_ci if (ret < 0) 68062306a36Sopenharmony_ci goto out_err; 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ci trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount); 68362306a36Sopenharmony_ci ret = svc_rdma_post_chunk_ctxt(cc); 68462306a36Sopenharmony_ci if (ret < 0) 68562306a36Sopenharmony_ci goto out_err; 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci return xdr->len; 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ciout_err: 69062306a36Sopenharmony_ci svc_rdma_write_info_free(info); 69162306a36Sopenharmony_ci return ret; 69262306a36Sopenharmony_ci} 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci/** 69562306a36Sopenharmony_ci * svc_rdma_build_read_segment - Build RDMA Read WQEs to pull one RDMA segment 69662306a36Sopenharmony_ci * @info: context for ongoing I/O 69762306a36Sopenharmony_ci * @segment: co-ordinates of remote memory to be read 69862306a36Sopenharmony_ci * 69962306a36Sopenharmony_ci * Returns: 70062306a36Sopenharmony_ci * %0: the Read WR chain was constructed successfully 70162306a36Sopenharmony_ci * %-EINVAL: there were not enough rq_pages to finish 70262306a36Sopenharmony_ci * %-ENOMEM: allocating a local resources failed 70362306a36Sopenharmony_ci * %-EIO: a DMA mapping error occurred 70462306a36Sopenharmony_ci */ 70562306a36Sopenharmony_cistatic int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, 70662306a36Sopenharmony_ci const struct svc_rdma_segment *segment) 70762306a36Sopenharmony_ci{ 70862306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *head = info->ri_readctxt; 70962306a36Sopenharmony_ci struct svc_rdma_chunk_ctxt *cc = &info->ri_cc; 71062306a36Sopenharmony_ci struct svc_rqst *rqstp = info->ri_rqst; 71162306a36Sopenharmony_ci unsigned int sge_no, seg_len, len; 71262306a36Sopenharmony_ci struct svc_rdma_rw_ctxt *ctxt; 71362306a36Sopenharmony_ci struct scatterlist *sg; 71462306a36Sopenharmony_ci int ret; 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci len = segment->rs_length; 71762306a36Sopenharmony_ci sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT; 71862306a36Sopenharmony_ci ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no); 71962306a36Sopenharmony_ci if (!ctxt) 72062306a36Sopenharmony_ci return -ENOMEM; 72162306a36Sopenharmony_ci ctxt->rw_nents = sge_no; 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci sg = ctxt->rw_sg_table.sgl; 72462306a36Sopenharmony_ci for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) { 72562306a36Sopenharmony_ci seg_len = min_t(unsigned int, len, 72662306a36Sopenharmony_ci PAGE_SIZE - info->ri_pageoff); 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci if (!info->ri_pageoff) 72962306a36Sopenharmony_ci head->rc_page_count++; 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci sg_set_page(sg, rqstp->rq_pages[info->ri_pageno], 73262306a36Sopenharmony_ci seg_len, info->ri_pageoff); 73362306a36Sopenharmony_ci sg = sg_next(sg); 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci info->ri_pageoff += seg_len; 73662306a36Sopenharmony_ci if (info->ri_pageoff == PAGE_SIZE) { 73762306a36Sopenharmony_ci info->ri_pageno++; 73862306a36Sopenharmony_ci info->ri_pageoff = 0; 73962306a36Sopenharmony_ci } 74062306a36Sopenharmony_ci len -= seg_len; 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci /* Safety check */ 74362306a36Sopenharmony_ci if (len && 74462306a36Sopenharmony_ci &rqstp->rq_pages[info->ri_pageno + 1] > rqstp->rq_page_end) 74562306a36Sopenharmony_ci goto out_overrun; 74662306a36Sopenharmony_ci } 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_ci ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, segment->rs_offset, 74962306a36Sopenharmony_ci segment->rs_handle, DMA_FROM_DEVICE); 75062306a36Sopenharmony_ci if (ret < 0) 75162306a36Sopenharmony_ci return -EIO; 75262306a36Sopenharmony_ci percpu_counter_inc(&svcrdma_stat_read); 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci list_add(&ctxt->rw_list, &cc->cc_rwctxts); 75562306a36Sopenharmony_ci cc->cc_sqecount += ret; 75662306a36Sopenharmony_ci return 0; 75762306a36Sopenharmony_ci 75862306a36Sopenharmony_ciout_overrun: 75962306a36Sopenharmony_ci trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno); 76062306a36Sopenharmony_ci return -EINVAL; 76162306a36Sopenharmony_ci} 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci/** 76462306a36Sopenharmony_ci * svc_rdma_build_read_chunk - Build RDMA Read WQEs to pull one RDMA chunk 76562306a36Sopenharmony_ci * @info: context for ongoing I/O 76662306a36Sopenharmony_ci * @chunk: Read chunk to pull 76762306a36Sopenharmony_ci * 76862306a36Sopenharmony_ci * Return values: 76962306a36Sopenharmony_ci * %0: the Read WR chain was constructed successfully 77062306a36Sopenharmony_ci * %-EINVAL: there were not enough resources to finish 77162306a36Sopenharmony_ci * %-ENOMEM: allocating a local resources failed 77262306a36Sopenharmony_ci * %-EIO: a DMA mapping error occurred 77362306a36Sopenharmony_ci */ 77462306a36Sopenharmony_cistatic int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info, 77562306a36Sopenharmony_ci const struct svc_rdma_chunk *chunk) 77662306a36Sopenharmony_ci{ 77762306a36Sopenharmony_ci const struct svc_rdma_segment *segment; 77862306a36Sopenharmony_ci int ret; 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_ci ret = -EINVAL; 78162306a36Sopenharmony_ci pcl_for_each_segment(segment, chunk) { 78262306a36Sopenharmony_ci ret = svc_rdma_build_read_segment(info, segment); 78362306a36Sopenharmony_ci if (ret < 0) 78462306a36Sopenharmony_ci break; 78562306a36Sopenharmony_ci info->ri_totalbytes += segment->rs_length; 78662306a36Sopenharmony_ci } 78762306a36Sopenharmony_ci return ret; 78862306a36Sopenharmony_ci} 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci/** 79162306a36Sopenharmony_ci * svc_rdma_copy_inline_range - Copy part of the inline content into pages 79262306a36Sopenharmony_ci * @info: context for RDMA Reads 79362306a36Sopenharmony_ci * @offset: offset into the Receive buffer of region to copy 79462306a36Sopenharmony_ci * @remaining: length of region to copy 79562306a36Sopenharmony_ci * 79662306a36Sopenharmony_ci * Take a page at a time from rqstp->rq_pages and copy the inline 79762306a36Sopenharmony_ci * content from the Receive buffer into that page. Update 79862306a36Sopenharmony_ci * info->ri_pageno and info->ri_pageoff so that the next RDMA Read 79962306a36Sopenharmony_ci * result will land contiguously with the copied content. 80062306a36Sopenharmony_ci * 80162306a36Sopenharmony_ci * Return values: 80262306a36Sopenharmony_ci * %0: Inline content was successfully copied 80362306a36Sopenharmony_ci * %-EINVAL: offset or length was incorrect 80462306a36Sopenharmony_ci */ 80562306a36Sopenharmony_cistatic int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info, 80662306a36Sopenharmony_ci unsigned int offset, 80762306a36Sopenharmony_ci unsigned int remaining) 80862306a36Sopenharmony_ci{ 80962306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *head = info->ri_readctxt; 81062306a36Sopenharmony_ci unsigned char *dst, *src = head->rc_recv_buf; 81162306a36Sopenharmony_ci struct svc_rqst *rqstp = info->ri_rqst; 81262306a36Sopenharmony_ci unsigned int page_no, numpages; 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_ci numpages = PAGE_ALIGN(info->ri_pageoff + remaining) >> PAGE_SHIFT; 81562306a36Sopenharmony_ci for (page_no = 0; page_no < numpages; page_no++) { 81662306a36Sopenharmony_ci unsigned int page_len; 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci page_len = min_t(unsigned int, remaining, 81962306a36Sopenharmony_ci PAGE_SIZE - info->ri_pageoff); 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci if (!info->ri_pageoff) 82262306a36Sopenharmony_ci head->rc_page_count++; 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci dst = page_address(rqstp->rq_pages[info->ri_pageno]); 82562306a36Sopenharmony_ci memcpy(dst + info->ri_pageno, src + offset, page_len); 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci info->ri_totalbytes += page_len; 82862306a36Sopenharmony_ci info->ri_pageoff += page_len; 82962306a36Sopenharmony_ci if (info->ri_pageoff == PAGE_SIZE) { 83062306a36Sopenharmony_ci info->ri_pageno++; 83162306a36Sopenharmony_ci info->ri_pageoff = 0; 83262306a36Sopenharmony_ci } 83362306a36Sopenharmony_ci remaining -= page_len; 83462306a36Sopenharmony_ci offset += page_len; 83562306a36Sopenharmony_ci } 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci return -EINVAL; 83862306a36Sopenharmony_ci} 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci/** 84162306a36Sopenharmony_ci * svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks 84262306a36Sopenharmony_ci * @info: context for RDMA Reads 84362306a36Sopenharmony_ci * 84462306a36Sopenharmony_ci * The chunk data lands in rqstp->rq_arg as a series of contiguous pages, 84562306a36Sopenharmony_ci * like an incoming TCP call. 84662306a36Sopenharmony_ci * 84762306a36Sopenharmony_ci * Return values: 84862306a36Sopenharmony_ci * %0: RDMA Read WQEs were successfully built 84962306a36Sopenharmony_ci * %-EINVAL: client provided too many chunks or segments, 85062306a36Sopenharmony_ci * %-ENOMEM: rdma_rw context pool was exhausted, 85162306a36Sopenharmony_ci * %-ENOTCONN: posting failed (connection is lost), 85262306a36Sopenharmony_ci * %-EIO: rdma_rw initialization failed (DMA mapping, etc). 85362306a36Sopenharmony_ci */ 85462306a36Sopenharmony_cistatic noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *info) 85562306a36Sopenharmony_ci{ 85662306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *head = info->ri_readctxt; 85762306a36Sopenharmony_ci const struct svc_rdma_pcl *pcl = &head->rc_read_pcl; 85862306a36Sopenharmony_ci struct xdr_buf *buf = &info->ri_rqst->rq_arg; 85962306a36Sopenharmony_ci struct svc_rdma_chunk *chunk, *next; 86062306a36Sopenharmony_ci unsigned int start, length; 86162306a36Sopenharmony_ci int ret; 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci start = 0; 86462306a36Sopenharmony_ci chunk = pcl_first_chunk(pcl); 86562306a36Sopenharmony_ci length = chunk->ch_position; 86662306a36Sopenharmony_ci ret = svc_rdma_copy_inline_range(info, start, length); 86762306a36Sopenharmony_ci if (ret < 0) 86862306a36Sopenharmony_ci return ret; 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci pcl_for_each_chunk(chunk, pcl) { 87162306a36Sopenharmony_ci ret = svc_rdma_build_read_chunk(info, chunk); 87262306a36Sopenharmony_ci if (ret < 0) 87362306a36Sopenharmony_ci return ret; 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci next = pcl_next_chunk(pcl, chunk); 87662306a36Sopenharmony_ci if (!next) 87762306a36Sopenharmony_ci break; 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci start += length; 88062306a36Sopenharmony_ci length = next->ch_position - info->ri_totalbytes; 88162306a36Sopenharmony_ci ret = svc_rdma_copy_inline_range(info, start, length); 88262306a36Sopenharmony_ci if (ret < 0) 88362306a36Sopenharmony_ci return ret; 88462306a36Sopenharmony_ci } 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci start += length; 88762306a36Sopenharmony_ci length = head->rc_byte_len - start; 88862306a36Sopenharmony_ci ret = svc_rdma_copy_inline_range(info, start, length); 88962306a36Sopenharmony_ci if (ret < 0) 89062306a36Sopenharmony_ci return ret; 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci buf->len += info->ri_totalbytes; 89362306a36Sopenharmony_ci buf->buflen += info->ri_totalbytes; 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]); 89662306a36Sopenharmony_ci buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes); 89762306a36Sopenharmony_ci buf->pages = &info->ri_rqst->rq_pages[1]; 89862306a36Sopenharmony_ci buf->page_len = info->ri_totalbytes - buf->head[0].iov_len; 89962306a36Sopenharmony_ci return 0; 90062306a36Sopenharmony_ci} 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci/** 90362306a36Sopenharmony_ci * svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks 90462306a36Sopenharmony_ci * @info: context for RDMA Reads 90562306a36Sopenharmony_ci * 90662306a36Sopenharmony_ci * The chunk data lands in the page list of rqstp->rq_arg.pages. 90762306a36Sopenharmony_ci * 90862306a36Sopenharmony_ci * Currently NFSD does not look at the rqstp->rq_arg.tail[0] kvec. 90962306a36Sopenharmony_ci * Therefore, XDR round-up of the Read chunk and trailing 91062306a36Sopenharmony_ci * inline content must both be added at the end of the pagelist. 91162306a36Sopenharmony_ci * 91262306a36Sopenharmony_ci * Return values: 91362306a36Sopenharmony_ci * %0: RDMA Read WQEs were successfully built 91462306a36Sopenharmony_ci * %-EINVAL: client provided too many chunks or segments, 91562306a36Sopenharmony_ci * %-ENOMEM: rdma_rw context pool was exhausted, 91662306a36Sopenharmony_ci * %-ENOTCONN: posting failed (connection is lost), 91762306a36Sopenharmony_ci * %-EIO: rdma_rw initialization failed (DMA mapping, etc). 91862306a36Sopenharmony_ci */ 91962306a36Sopenharmony_cistatic int svc_rdma_read_data_item(struct svc_rdma_read_info *info) 92062306a36Sopenharmony_ci{ 92162306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *head = info->ri_readctxt; 92262306a36Sopenharmony_ci struct xdr_buf *buf = &info->ri_rqst->rq_arg; 92362306a36Sopenharmony_ci struct svc_rdma_chunk *chunk; 92462306a36Sopenharmony_ci unsigned int length; 92562306a36Sopenharmony_ci int ret; 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_ci chunk = pcl_first_chunk(&head->rc_read_pcl); 92862306a36Sopenharmony_ci ret = svc_rdma_build_read_chunk(info, chunk); 92962306a36Sopenharmony_ci if (ret < 0) 93062306a36Sopenharmony_ci goto out; 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci /* Split the Receive buffer between the head and tail 93362306a36Sopenharmony_ci * buffers at Read chunk's position. XDR roundup of the 93462306a36Sopenharmony_ci * chunk is not included in either the pagelist or in 93562306a36Sopenharmony_ci * the tail. 93662306a36Sopenharmony_ci */ 93762306a36Sopenharmony_ci buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position; 93862306a36Sopenharmony_ci buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position; 93962306a36Sopenharmony_ci buf->head[0].iov_len = chunk->ch_position; 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2). 94262306a36Sopenharmony_ci * 94362306a36Sopenharmony_ci * If the client already rounded up the chunk length, the 94462306a36Sopenharmony_ci * length does not change. Otherwise, the length of the page 94562306a36Sopenharmony_ci * list is increased to include XDR round-up. 94662306a36Sopenharmony_ci * 94762306a36Sopenharmony_ci * Currently these chunks always start at page offset 0, 94862306a36Sopenharmony_ci * thus the rounded-up length never crosses a page boundary. 94962306a36Sopenharmony_ci */ 95062306a36Sopenharmony_ci buf->pages = &info->ri_rqst->rq_pages[0]; 95162306a36Sopenharmony_ci length = xdr_align_size(chunk->ch_length); 95262306a36Sopenharmony_ci buf->page_len = length; 95362306a36Sopenharmony_ci buf->len += length; 95462306a36Sopenharmony_ci buf->buflen += length; 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ciout: 95762306a36Sopenharmony_ci return ret; 95862306a36Sopenharmony_ci} 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci/** 96162306a36Sopenharmony_ci * svc_rdma_read_chunk_range - Build RDMA Read WQEs for portion of a chunk 96262306a36Sopenharmony_ci * @info: context for RDMA Reads 96362306a36Sopenharmony_ci * @chunk: parsed Call chunk to pull 96462306a36Sopenharmony_ci * @offset: offset of region to pull 96562306a36Sopenharmony_ci * @length: length of region to pull 96662306a36Sopenharmony_ci * 96762306a36Sopenharmony_ci * Return values: 96862306a36Sopenharmony_ci * %0: RDMA Read WQEs were successfully built 96962306a36Sopenharmony_ci * %-EINVAL: there were not enough resources to finish 97062306a36Sopenharmony_ci * %-ENOMEM: rdma_rw context pool was exhausted, 97162306a36Sopenharmony_ci * %-ENOTCONN: posting failed (connection is lost), 97262306a36Sopenharmony_ci * %-EIO: rdma_rw initialization failed (DMA mapping, etc). 97362306a36Sopenharmony_ci */ 97462306a36Sopenharmony_cistatic int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info, 97562306a36Sopenharmony_ci const struct svc_rdma_chunk *chunk, 97662306a36Sopenharmony_ci unsigned int offset, unsigned int length) 97762306a36Sopenharmony_ci{ 97862306a36Sopenharmony_ci const struct svc_rdma_segment *segment; 97962306a36Sopenharmony_ci int ret; 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci ret = -EINVAL; 98262306a36Sopenharmony_ci pcl_for_each_segment(segment, chunk) { 98362306a36Sopenharmony_ci struct svc_rdma_segment dummy; 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci if (offset > segment->rs_length) { 98662306a36Sopenharmony_ci offset -= segment->rs_length; 98762306a36Sopenharmony_ci continue; 98862306a36Sopenharmony_ci } 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci dummy.rs_handle = segment->rs_handle; 99162306a36Sopenharmony_ci dummy.rs_length = min_t(u32, length, segment->rs_length) - offset; 99262306a36Sopenharmony_ci dummy.rs_offset = segment->rs_offset + offset; 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci ret = svc_rdma_build_read_segment(info, &dummy); 99562306a36Sopenharmony_ci if (ret < 0) 99662306a36Sopenharmony_ci break; 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci info->ri_totalbytes += dummy.rs_length; 99962306a36Sopenharmony_ci length -= dummy.rs_length; 100062306a36Sopenharmony_ci offset = 0; 100162306a36Sopenharmony_ci } 100262306a36Sopenharmony_ci return ret; 100362306a36Sopenharmony_ci} 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci/** 100662306a36Sopenharmony_ci * svc_rdma_read_call_chunk - Build RDMA Read WQEs to pull a Long Message 100762306a36Sopenharmony_ci * @info: context for RDMA Reads 100862306a36Sopenharmony_ci * 100962306a36Sopenharmony_ci * Return values: 101062306a36Sopenharmony_ci * %0: RDMA Read WQEs were successfully built 101162306a36Sopenharmony_ci * %-EINVAL: there were not enough resources to finish 101262306a36Sopenharmony_ci * %-ENOMEM: rdma_rw context pool was exhausted, 101362306a36Sopenharmony_ci * %-ENOTCONN: posting failed (connection is lost), 101462306a36Sopenharmony_ci * %-EIO: rdma_rw initialization failed (DMA mapping, etc). 101562306a36Sopenharmony_ci */ 101662306a36Sopenharmony_cistatic int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info) 101762306a36Sopenharmony_ci{ 101862306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *head = info->ri_readctxt; 101962306a36Sopenharmony_ci const struct svc_rdma_chunk *call_chunk = 102062306a36Sopenharmony_ci pcl_first_chunk(&head->rc_call_pcl); 102162306a36Sopenharmony_ci const struct svc_rdma_pcl *pcl = &head->rc_read_pcl; 102262306a36Sopenharmony_ci struct svc_rdma_chunk *chunk, *next; 102362306a36Sopenharmony_ci unsigned int start, length; 102462306a36Sopenharmony_ci int ret; 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ci if (pcl_is_empty(pcl)) 102762306a36Sopenharmony_ci return svc_rdma_build_read_chunk(info, call_chunk); 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci start = 0; 103062306a36Sopenharmony_ci chunk = pcl_first_chunk(pcl); 103162306a36Sopenharmony_ci length = chunk->ch_position; 103262306a36Sopenharmony_ci ret = svc_rdma_read_chunk_range(info, call_chunk, start, length); 103362306a36Sopenharmony_ci if (ret < 0) 103462306a36Sopenharmony_ci return ret; 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci pcl_for_each_chunk(chunk, pcl) { 103762306a36Sopenharmony_ci ret = svc_rdma_build_read_chunk(info, chunk); 103862306a36Sopenharmony_ci if (ret < 0) 103962306a36Sopenharmony_ci return ret; 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci next = pcl_next_chunk(pcl, chunk); 104262306a36Sopenharmony_ci if (!next) 104362306a36Sopenharmony_ci break; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci start += length; 104662306a36Sopenharmony_ci length = next->ch_position - info->ri_totalbytes; 104762306a36Sopenharmony_ci ret = svc_rdma_read_chunk_range(info, call_chunk, 104862306a36Sopenharmony_ci start, length); 104962306a36Sopenharmony_ci if (ret < 0) 105062306a36Sopenharmony_ci return ret; 105162306a36Sopenharmony_ci } 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci start += length; 105462306a36Sopenharmony_ci length = call_chunk->ch_length - start; 105562306a36Sopenharmony_ci return svc_rdma_read_chunk_range(info, call_chunk, start, length); 105662306a36Sopenharmony_ci} 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci/** 105962306a36Sopenharmony_ci * svc_rdma_read_special - Build RDMA Read WQEs to pull a Long Message 106062306a36Sopenharmony_ci * @info: context for RDMA Reads 106162306a36Sopenharmony_ci * 106262306a36Sopenharmony_ci * The start of the data lands in the first page just after the 106362306a36Sopenharmony_ci * Transport header, and the rest lands in rqstp->rq_arg.pages. 106462306a36Sopenharmony_ci * 106562306a36Sopenharmony_ci * Assumptions: 106662306a36Sopenharmony_ci * - A PZRC is never sent in an RDMA_MSG message, though it's 106762306a36Sopenharmony_ci * allowed by spec. 106862306a36Sopenharmony_ci * 106962306a36Sopenharmony_ci * Return values: 107062306a36Sopenharmony_ci * %0: RDMA Read WQEs were successfully built 107162306a36Sopenharmony_ci * %-EINVAL: client provided too many chunks or segments, 107262306a36Sopenharmony_ci * %-ENOMEM: rdma_rw context pool was exhausted, 107362306a36Sopenharmony_ci * %-ENOTCONN: posting failed (connection is lost), 107462306a36Sopenharmony_ci * %-EIO: rdma_rw initialization failed (DMA mapping, etc). 107562306a36Sopenharmony_ci */ 107662306a36Sopenharmony_cistatic noinline int svc_rdma_read_special(struct svc_rdma_read_info *info) 107762306a36Sopenharmony_ci{ 107862306a36Sopenharmony_ci struct xdr_buf *buf = &info->ri_rqst->rq_arg; 107962306a36Sopenharmony_ci int ret; 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci ret = svc_rdma_read_call_chunk(info); 108262306a36Sopenharmony_ci if (ret < 0) 108362306a36Sopenharmony_ci goto out; 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_ci buf->len += info->ri_totalbytes; 108662306a36Sopenharmony_ci buf->buflen += info->ri_totalbytes; 108762306a36Sopenharmony_ci 108862306a36Sopenharmony_ci buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]); 108962306a36Sopenharmony_ci buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes); 109062306a36Sopenharmony_ci buf->pages = &info->ri_rqst->rq_pages[1]; 109162306a36Sopenharmony_ci buf->page_len = info->ri_totalbytes - buf->head[0].iov_len; 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_ciout: 109462306a36Sopenharmony_ci return ret; 109562306a36Sopenharmony_ci} 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci/** 109862306a36Sopenharmony_ci * svc_rdma_process_read_list - Pull list of Read chunks from the client 109962306a36Sopenharmony_ci * @rdma: controlling RDMA transport 110062306a36Sopenharmony_ci * @rqstp: set of pages to use as Read sink buffers 110162306a36Sopenharmony_ci * @head: pages under I/O collect here 110262306a36Sopenharmony_ci * 110362306a36Sopenharmony_ci * The RPC/RDMA protocol assumes that the upper layer's XDR decoders 110462306a36Sopenharmony_ci * pull each Read chunk as they decode an incoming RPC message. 110562306a36Sopenharmony_ci * 110662306a36Sopenharmony_ci * On Linux, however, the server needs to have a fully-constructed RPC 110762306a36Sopenharmony_ci * message in rqstp->rq_arg when there is a positive return code from 110862306a36Sopenharmony_ci * ->xpo_recvfrom. So the Read list is safety-checked immediately when 110962306a36Sopenharmony_ci * it is received, then here the whole Read list is pulled all at once. 111062306a36Sopenharmony_ci * The ingress RPC message is fully reconstructed once all associated 111162306a36Sopenharmony_ci * RDMA Reads have completed. 111262306a36Sopenharmony_ci * 111362306a36Sopenharmony_ci * Return values: 111462306a36Sopenharmony_ci * %1: all needed RDMA Reads were posted successfully, 111562306a36Sopenharmony_ci * %-EINVAL: client provided too many chunks or segments, 111662306a36Sopenharmony_ci * %-ENOMEM: rdma_rw context pool was exhausted, 111762306a36Sopenharmony_ci * %-ENOTCONN: posting failed (connection is lost), 111862306a36Sopenharmony_ci * %-EIO: rdma_rw initialization failed (DMA mapping, etc). 111962306a36Sopenharmony_ci */ 112062306a36Sopenharmony_ciint svc_rdma_process_read_list(struct svcxprt_rdma *rdma, 112162306a36Sopenharmony_ci struct svc_rqst *rqstp, 112262306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *head) 112362306a36Sopenharmony_ci{ 112462306a36Sopenharmony_ci struct svc_rdma_read_info *info; 112562306a36Sopenharmony_ci struct svc_rdma_chunk_ctxt *cc; 112662306a36Sopenharmony_ci int ret; 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci info = svc_rdma_read_info_alloc(rdma); 112962306a36Sopenharmony_ci if (!info) 113062306a36Sopenharmony_ci return -ENOMEM; 113162306a36Sopenharmony_ci cc = &info->ri_cc; 113262306a36Sopenharmony_ci info->ri_rqst = rqstp; 113362306a36Sopenharmony_ci info->ri_readctxt = head; 113462306a36Sopenharmony_ci info->ri_pageno = 0; 113562306a36Sopenharmony_ci info->ri_pageoff = 0; 113662306a36Sopenharmony_ci info->ri_totalbytes = 0; 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci if (pcl_is_empty(&head->rc_call_pcl)) { 113962306a36Sopenharmony_ci if (head->rc_read_pcl.cl_count == 1) 114062306a36Sopenharmony_ci ret = svc_rdma_read_data_item(info); 114162306a36Sopenharmony_ci else 114262306a36Sopenharmony_ci ret = svc_rdma_read_multiple_chunks(info); 114362306a36Sopenharmony_ci } else 114462306a36Sopenharmony_ci ret = svc_rdma_read_special(info); 114562306a36Sopenharmony_ci if (ret < 0) 114662306a36Sopenharmony_ci goto out_err; 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount); 114962306a36Sopenharmony_ci init_completion(&cc->cc_done); 115062306a36Sopenharmony_ci ret = svc_rdma_post_chunk_ctxt(cc); 115162306a36Sopenharmony_ci if (ret < 0) 115262306a36Sopenharmony_ci goto out_err; 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci ret = 1; 115562306a36Sopenharmony_ci wait_for_completion(&cc->cc_done); 115662306a36Sopenharmony_ci if (cc->cc_status != IB_WC_SUCCESS) 115762306a36Sopenharmony_ci ret = -EIO; 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ci /* rq_respages starts after the last arg page */ 116062306a36Sopenharmony_ci rqstp->rq_respages = &rqstp->rq_pages[head->rc_page_count]; 116162306a36Sopenharmony_ci rqstp->rq_next_page = rqstp->rq_respages + 1; 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_ci /* Ensure svc_rdma_recv_ctxt_put() does not try to release pages */ 116462306a36Sopenharmony_ci head->rc_page_count = 0; 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ciout_err: 116762306a36Sopenharmony_ci svc_rdma_read_info_free(info); 116862306a36Sopenharmony_ci return ret; 116962306a36Sopenharmony_ci} 1170