162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2016-2018 Oracle. All rights reserved. 462306a36Sopenharmony_ci * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 562306a36Sopenharmony_ci * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * This software is available to you under a choice of one of two 862306a36Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 962306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 1062306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the BSD-type 1162306a36Sopenharmony_ci * license below: 1262306a36Sopenharmony_ci * 1362306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 1462306a36Sopenharmony_ci * modification, are permitted provided that the following conditions 1562306a36Sopenharmony_ci * are met: 1662306a36Sopenharmony_ci * 1762306a36Sopenharmony_ci * Redistributions of source code must retain the above copyright 1862306a36Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 1962306a36Sopenharmony_ci * 2062306a36Sopenharmony_ci * Redistributions in binary form must reproduce the above 2162306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 2262306a36Sopenharmony_ci * disclaimer in the documentation and/or other materials provided 2362306a36Sopenharmony_ci * with the distribution. 2462306a36Sopenharmony_ci * 2562306a36Sopenharmony_ci * Neither the name of the Network Appliance, Inc. nor the names of 2662306a36Sopenharmony_ci * its contributors may be used to endorse or promote products 2762306a36Sopenharmony_ci * derived from this software without specific prior written 2862306a36Sopenharmony_ci * permission. 2962306a36Sopenharmony_ci * 3062306a36Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 3162306a36Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 3262306a36Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 3362306a36Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 3462306a36Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3562306a36Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 3662306a36Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 3762306a36Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 3862306a36Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 3962306a36Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 4062306a36Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4162306a36Sopenharmony_ci * 4262306a36Sopenharmony_ci * Author: Tom Tucker <tom@opengridcomputing.com> 4362306a36Sopenharmony_ci */ 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci/* Operation 4662306a36Sopenharmony_ci * 4762306a36Sopenharmony_ci * The main entry point is svc_rdma_recvfrom. This is called from 4862306a36Sopenharmony_ci * svc_recv when the transport indicates there is incoming data to 4962306a36Sopenharmony_ci * be read. "Data Ready" is signaled when an RDMA Receive completes, 5062306a36Sopenharmony_ci * or when a set of RDMA Reads complete. 5162306a36Sopenharmony_ci * 5262306a36Sopenharmony_ci * An svc_rqst is passed in. This structure contains an array of 5362306a36Sopenharmony_ci * free pages (rq_pages) that will contain the incoming RPC message. 5462306a36Sopenharmony_ci * 5562306a36Sopenharmony_ci * Short messages are moved directly into svc_rqst::rq_arg, and 5662306a36Sopenharmony_ci * the RPC Call is ready to be processed by the Upper Layer. 5762306a36Sopenharmony_ci * svc_rdma_recvfrom returns the length of the RPC Call message, 5862306a36Sopenharmony_ci * completing the reception of the RPC Call. 5962306a36Sopenharmony_ci * 6062306a36Sopenharmony_ci * However, when an incoming message has Read chunks, 6162306a36Sopenharmony_ci * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's 6262306a36Sopenharmony_ci * data payload from the client. svc_rdma_recvfrom sets up the 6362306a36Sopenharmony_ci * RDMA Reads using pages in svc_rqst::rq_pages, which are 6462306a36Sopenharmony_ci * transferred to an svc_rdma_recv_ctxt for the duration of the 6562306a36Sopenharmony_ci * I/O. svc_rdma_recvfrom then returns zero, since the RPC message 6662306a36Sopenharmony_ci * is still not yet ready. 6762306a36Sopenharmony_ci * 6862306a36Sopenharmony_ci * When the Read chunk payloads have become available on the 6962306a36Sopenharmony_ci * server, "Data Ready" is raised again, and svc_recv calls 7062306a36Sopenharmony_ci * svc_rdma_recvfrom again. This second call may use a different 7162306a36Sopenharmony_ci * svc_rqst than the first one, thus any information that needs 7262306a36Sopenharmony_ci * to be preserved across these two calls is kept in an 7362306a36Sopenharmony_ci * svc_rdma_recv_ctxt. 7462306a36Sopenharmony_ci * 7562306a36Sopenharmony_ci * The second call to svc_rdma_recvfrom performs final assembly 7662306a36Sopenharmony_ci * of the RPC Call message, using the RDMA Read sink pages kept in 7762306a36Sopenharmony_ci * the svc_rdma_recv_ctxt. The xdr_buf is copied from the 7862306a36Sopenharmony_ci * svc_rdma_recv_ctxt to the second svc_rqst. The second call returns 7962306a36Sopenharmony_ci * the length of the completed RPC Call message. 8062306a36Sopenharmony_ci * 8162306a36Sopenharmony_ci * Page Management 8262306a36Sopenharmony_ci * 8362306a36Sopenharmony_ci * Pages under I/O must be transferred from the first svc_rqst to an 8462306a36Sopenharmony_ci * svc_rdma_recv_ctxt before the first svc_rdma_recvfrom call returns. 8562306a36Sopenharmony_ci * 8662306a36Sopenharmony_ci * The first svc_rqst supplies pages for RDMA Reads. These are moved 8762306a36Sopenharmony_ci * from rqstp::rq_pages into ctxt::pages. The consumed elements of 8862306a36Sopenharmony_ci * the rq_pages array are set to NULL and refilled with the first 8962306a36Sopenharmony_ci * svc_rdma_recvfrom call returns. 9062306a36Sopenharmony_ci * 9162306a36Sopenharmony_ci * During the second svc_rdma_recvfrom call, RDMA Read sink pages 9262306a36Sopenharmony_ci * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst. 9362306a36Sopenharmony_ci */ 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci#include <linux/slab.h> 9662306a36Sopenharmony_ci#include <linux/spinlock.h> 9762306a36Sopenharmony_ci#include <asm/unaligned.h> 9862306a36Sopenharmony_ci#include <rdma/ib_verbs.h> 9962306a36Sopenharmony_ci#include <rdma/rdma_cm.h> 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci#include <linux/sunrpc/xdr.h> 10262306a36Sopenharmony_ci#include <linux/sunrpc/debug.h> 10362306a36Sopenharmony_ci#include <linux/sunrpc/rpc_rdma.h> 10462306a36Sopenharmony_ci#include <linux/sunrpc/svc_rdma.h> 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci#include "xprt_rdma.h" 10762306a36Sopenharmony_ci#include <trace/events/rpcrdma.h> 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_cistatic void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc); 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_cistatic inline struct svc_rdma_recv_ctxt * 11262306a36Sopenharmony_cisvc_rdma_next_recv_ctxt(struct list_head *list) 11362306a36Sopenharmony_ci{ 11462306a36Sopenharmony_ci return list_first_entry_or_null(list, struct svc_rdma_recv_ctxt, 11562306a36Sopenharmony_ci rc_list); 11662306a36Sopenharmony_ci} 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistatic void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, 11962306a36Sopenharmony_ci struct rpc_rdma_cid *cid) 12062306a36Sopenharmony_ci{ 12162306a36Sopenharmony_ci cid->ci_queue_id = rdma->sc_rq_cq->res.id; 12262306a36Sopenharmony_ci cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); 12362306a36Sopenharmony_ci} 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_cistatic struct svc_rdma_recv_ctxt * 12662306a36Sopenharmony_cisvc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci int node = ibdev_to_node(rdma->sc_cm_id->device); 12962306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 13062306a36Sopenharmony_ci dma_addr_t addr; 13162306a36Sopenharmony_ci void *buffer; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node); 13462306a36Sopenharmony_ci if (!ctxt) 13562306a36Sopenharmony_ci goto fail0; 13662306a36Sopenharmony_ci buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node); 13762306a36Sopenharmony_ci if (!buffer) 13862306a36Sopenharmony_ci goto fail1; 13962306a36Sopenharmony_ci addr = ib_dma_map_single(rdma->sc_pd->device, buffer, 14062306a36Sopenharmony_ci rdma->sc_max_req_size, DMA_FROM_DEVICE); 14162306a36Sopenharmony_ci if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) 14262306a36Sopenharmony_ci goto fail2; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid); 14562306a36Sopenharmony_ci pcl_init(&ctxt->rc_call_pcl); 14662306a36Sopenharmony_ci pcl_init(&ctxt->rc_read_pcl); 14762306a36Sopenharmony_ci pcl_init(&ctxt->rc_write_pcl); 14862306a36Sopenharmony_ci pcl_init(&ctxt->rc_reply_pcl); 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci ctxt->rc_recv_wr.next = NULL; 15162306a36Sopenharmony_ci ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe; 15262306a36Sopenharmony_ci ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge; 15362306a36Sopenharmony_ci ctxt->rc_recv_wr.num_sge = 1; 15462306a36Sopenharmony_ci ctxt->rc_cqe.done = svc_rdma_wc_receive; 15562306a36Sopenharmony_ci ctxt->rc_recv_sge.addr = addr; 15662306a36Sopenharmony_ci ctxt->rc_recv_sge.length = rdma->sc_max_req_size; 15762306a36Sopenharmony_ci ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; 15862306a36Sopenharmony_ci ctxt->rc_recv_buf = buffer; 15962306a36Sopenharmony_ci return ctxt; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_cifail2: 16262306a36Sopenharmony_ci kfree(buffer); 16362306a36Sopenharmony_cifail1: 16462306a36Sopenharmony_ci kfree(ctxt); 16562306a36Sopenharmony_cifail0: 16662306a36Sopenharmony_ci return NULL; 16762306a36Sopenharmony_ci} 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_cistatic void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma, 17062306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt) 17162306a36Sopenharmony_ci{ 17262306a36Sopenharmony_ci ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr, 17362306a36Sopenharmony_ci ctxt->rc_recv_sge.length, DMA_FROM_DEVICE); 17462306a36Sopenharmony_ci kfree(ctxt->rc_recv_buf); 17562306a36Sopenharmony_ci kfree(ctxt); 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci/** 17962306a36Sopenharmony_ci * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt 18062306a36Sopenharmony_ci * @rdma: svcxprt_rdma being torn down 18162306a36Sopenharmony_ci * 18262306a36Sopenharmony_ci */ 18362306a36Sopenharmony_civoid svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma) 18462306a36Sopenharmony_ci{ 18562306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 18662306a36Sopenharmony_ci struct llist_node *node; 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci while ((node = llist_del_first(&rdma->sc_recv_ctxts))) { 18962306a36Sopenharmony_ci ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node); 19062306a36Sopenharmony_ci svc_rdma_recv_ctxt_destroy(rdma, ctxt); 19162306a36Sopenharmony_ci } 19262306a36Sopenharmony_ci} 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci/** 19562306a36Sopenharmony_ci * svc_rdma_recv_ctxt_get - Allocate a recv_ctxt 19662306a36Sopenharmony_ci * @rdma: controlling svcxprt_rdma 19762306a36Sopenharmony_ci * 19862306a36Sopenharmony_ci * Returns a recv_ctxt or (rarely) NULL if none are available. 19962306a36Sopenharmony_ci */ 20062306a36Sopenharmony_cistruct svc_rdma_recv_ctxt *svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) 20162306a36Sopenharmony_ci{ 20262306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 20362306a36Sopenharmony_ci struct llist_node *node; 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci node = llist_del_first(&rdma->sc_recv_ctxts); 20662306a36Sopenharmony_ci if (!node) 20762306a36Sopenharmony_ci goto out_empty; 20862306a36Sopenharmony_ci ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node); 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ciout: 21162306a36Sopenharmony_ci ctxt->rc_page_count = 0; 21262306a36Sopenharmony_ci return ctxt; 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ciout_empty: 21562306a36Sopenharmony_ci ctxt = svc_rdma_recv_ctxt_alloc(rdma); 21662306a36Sopenharmony_ci if (!ctxt) 21762306a36Sopenharmony_ci return NULL; 21862306a36Sopenharmony_ci goto out; 21962306a36Sopenharmony_ci} 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci/** 22262306a36Sopenharmony_ci * svc_rdma_recv_ctxt_put - Return recv_ctxt to free list 22362306a36Sopenharmony_ci * @rdma: controlling svcxprt_rdma 22462306a36Sopenharmony_ci * @ctxt: object to return to the free list 22562306a36Sopenharmony_ci * 22662306a36Sopenharmony_ci */ 22762306a36Sopenharmony_civoid svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, 22862306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt) 22962306a36Sopenharmony_ci{ 23062306a36Sopenharmony_ci pcl_free(&ctxt->rc_call_pcl); 23162306a36Sopenharmony_ci pcl_free(&ctxt->rc_read_pcl); 23262306a36Sopenharmony_ci pcl_free(&ctxt->rc_write_pcl); 23362306a36Sopenharmony_ci pcl_free(&ctxt->rc_reply_pcl); 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts); 23662306a36Sopenharmony_ci} 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci/** 23962306a36Sopenharmony_ci * svc_rdma_release_ctxt - Release transport-specific per-rqst resources 24062306a36Sopenharmony_ci * @xprt: the transport which owned the context 24162306a36Sopenharmony_ci * @vctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt 24262306a36Sopenharmony_ci * 24362306a36Sopenharmony_ci * Ensure that the recv_ctxt is released whether or not a Reply 24462306a36Sopenharmony_ci * was sent. For example, the client could close the connection, 24562306a36Sopenharmony_ci * or svc_process could drop an RPC, before the Reply is sent. 24662306a36Sopenharmony_ci */ 24762306a36Sopenharmony_civoid svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt = vctxt; 25062306a36Sopenharmony_ci struct svcxprt_rdma *rdma = 25162306a36Sopenharmony_ci container_of(xprt, struct svcxprt_rdma, sc_xprt); 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci if (ctxt) 25462306a36Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma, ctxt); 25562306a36Sopenharmony_ci} 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_cistatic bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, 25862306a36Sopenharmony_ci unsigned int wanted) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci const struct ib_recv_wr *bad_wr = NULL; 26162306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 26262306a36Sopenharmony_ci struct ib_recv_wr *recv_chain; 26362306a36Sopenharmony_ci int ret; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) 26662306a36Sopenharmony_ci return false; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci recv_chain = NULL; 26962306a36Sopenharmony_ci while (wanted--) { 27062306a36Sopenharmony_ci ctxt = svc_rdma_recv_ctxt_get(rdma); 27162306a36Sopenharmony_ci if (!ctxt) 27262306a36Sopenharmony_ci break; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci trace_svcrdma_post_recv(ctxt); 27562306a36Sopenharmony_ci ctxt->rc_recv_wr.next = recv_chain; 27662306a36Sopenharmony_ci recv_chain = &ctxt->rc_recv_wr; 27762306a36Sopenharmony_ci rdma->sc_pending_recvs++; 27862306a36Sopenharmony_ci } 27962306a36Sopenharmony_ci if (!recv_chain) 28062306a36Sopenharmony_ci return false; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr); 28362306a36Sopenharmony_ci if (ret) 28462306a36Sopenharmony_ci goto err_free; 28562306a36Sopenharmony_ci return true; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_cierr_free: 28862306a36Sopenharmony_ci trace_svcrdma_rq_post_err(rdma, ret); 28962306a36Sopenharmony_ci while (bad_wr) { 29062306a36Sopenharmony_ci ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt, 29162306a36Sopenharmony_ci rc_recv_wr); 29262306a36Sopenharmony_ci bad_wr = bad_wr->next; 29362306a36Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma, ctxt); 29462306a36Sopenharmony_ci } 29562306a36Sopenharmony_ci /* Since we're destroying the xprt, no need to reset 29662306a36Sopenharmony_ci * sc_pending_recvs. */ 29762306a36Sopenharmony_ci return false; 29862306a36Sopenharmony_ci} 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci/** 30162306a36Sopenharmony_ci * svc_rdma_post_recvs - Post initial set of Recv WRs 30262306a36Sopenharmony_ci * @rdma: fresh svcxprt_rdma 30362306a36Sopenharmony_ci * 30462306a36Sopenharmony_ci * Returns true if successful, otherwise false. 30562306a36Sopenharmony_ci */ 30662306a36Sopenharmony_cibool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) 30762306a36Sopenharmony_ci{ 30862306a36Sopenharmony_ci return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests); 30962306a36Sopenharmony_ci} 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci/** 31262306a36Sopenharmony_ci * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC 31362306a36Sopenharmony_ci * @cq: Completion Queue context 31462306a36Sopenharmony_ci * @wc: Work Completion object 31562306a36Sopenharmony_ci * 31662306a36Sopenharmony_ci */ 31762306a36Sopenharmony_cistatic void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) 31862306a36Sopenharmony_ci{ 31962306a36Sopenharmony_ci struct svcxprt_rdma *rdma = cq->cq_context; 32062306a36Sopenharmony_ci struct ib_cqe *cqe = wc->wr_cqe; 32162306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci rdma->sc_pending_recvs--; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci /* WARNING: Only wc->wr_cqe and wc->status are reliable */ 32662306a36Sopenharmony_ci ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci if (wc->status != IB_WC_SUCCESS) 32962306a36Sopenharmony_ci goto flushed; 33062306a36Sopenharmony_ci trace_svcrdma_wc_recv(wc, &ctxt->rc_cid); 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci /* If receive posting fails, the connection is about to be 33362306a36Sopenharmony_ci * lost anyway. The server will not be able to send a reply 33462306a36Sopenharmony_ci * for this RPC, and the client will retransmit this RPC 33562306a36Sopenharmony_ci * anyway when it reconnects. 33662306a36Sopenharmony_ci * 33762306a36Sopenharmony_ci * Therefore we drop the Receive, even if status was SUCCESS 33862306a36Sopenharmony_ci * to reduce the likelihood of replayed requests once the 33962306a36Sopenharmony_ci * client reconnects. 34062306a36Sopenharmony_ci */ 34162306a36Sopenharmony_ci if (rdma->sc_pending_recvs < rdma->sc_max_requests) 34262306a36Sopenharmony_ci if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch)) 34362306a36Sopenharmony_ci goto dropped; 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ci /* All wc fields are now known to be valid */ 34662306a36Sopenharmony_ci ctxt->rc_byte_len = wc->byte_len; 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci spin_lock(&rdma->sc_rq_dto_lock); 34962306a36Sopenharmony_ci list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q); 35062306a36Sopenharmony_ci /* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */ 35162306a36Sopenharmony_ci set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); 35262306a36Sopenharmony_ci spin_unlock(&rdma->sc_rq_dto_lock); 35362306a36Sopenharmony_ci if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) 35462306a36Sopenharmony_ci svc_xprt_enqueue(&rdma->sc_xprt); 35562306a36Sopenharmony_ci return; 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ciflushed: 35862306a36Sopenharmony_ci if (wc->status == IB_WC_WR_FLUSH_ERR) 35962306a36Sopenharmony_ci trace_svcrdma_wc_recv_flush(wc, &ctxt->rc_cid); 36062306a36Sopenharmony_ci else 36162306a36Sopenharmony_ci trace_svcrdma_wc_recv_err(wc, &ctxt->rc_cid); 36262306a36Sopenharmony_cidropped: 36362306a36Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma, ctxt); 36462306a36Sopenharmony_ci svc_xprt_deferred_close(&rdma->sc_xprt); 36562306a36Sopenharmony_ci} 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci/** 36862306a36Sopenharmony_ci * svc_rdma_flush_recv_queues - Drain pending Receive work 36962306a36Sopenharmony_ci * @rdma: svcxprt_rdma being shut down 37062306a36Sopenharmony_ci * 37162306a36Sopenharmony_ci */ 37262306a36Sopenharmony_civoid svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma) 37362306a36Sopenharmony_ci{ 37462306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) { 37762306a36Sopenharmony_ci list_del(&ctxt->rc_list); 37862306a36Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma, ctxt); 37962306a36Sopenharmony_ci } 38062306a36Sopenharmony_ci} 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_cistatic void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, 38362306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt) 38462306a36Sopenharmony_ci{ 38562306a36Sopenharmony_ci struct xdr_buf *arg = &rqstp->rq_arg; 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci arg->head[0].iov_base = ctxt->rc_recv_buf; 38862306a36Sopenharmony_ci arg->head[0].iov_len = ctxt->rc_byte_len; 38962306a36Sopenharmony_ci arg->tail[0].iov_base = NULL; 39062306a36Sopenharmony_ci arg->tail[0].iov_len = 0; 39162306a36Sopenharmony_ci arg->page_len = 0; 39262306a36Sopenharmony_ci arg->page_base = 0; 39362306a36Sopenharmony_ci arg->buflen = ctxt->rc_byte_len; 39462306a36Sopenharmony_ci arg->len = ctxt->rc_byte_len; 39562306a36Sopenharmony_ci} 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci/** 39862306a36Sopenharmony_ci * xdr_count_read_segments - Count number of Read segments in Read list 39962306a36Sopenharmony_ci * @rctxt: Ingress receive context 40062306a36Sopenharmony_ci * @p: Start of an un-decoded Read list 40162306a36Sopenharmony_ci * 40262306a36Sopenharmony_ci * Before allocating anything, ensure the ingress Read list is safe 40362306a36Sopenharmony_ci * to use. 40462306a36Sopenharmony_ci * 40562306a36Sopenharmony_ci * The segment count is limited to how many segments can fit in the 40662306a36Sopenharmony_ci * transport header without overflowing the buffer. That's about 40 40762306a36Sopenharmony_ci * Read segments for a 1KB inline threshold. 40862306a36Sopenharmony_ci * 40962306a36Sopenharmony_ci * Return values: 41062306a36Sopenharmony_ci * %true: Read list is valid. @rctxt's xdr_stream is updated to point 41162306a36Sopenharmony_ci * to the first byte past the Read list. rc_read_pcl and 41262306a36Sopenharmony_ci * rc_call_pcl cl_count fields are set to the number of 41362306a36Sopenharmony_ci * Read segments in the list. 41462306a36Sopenharmony_ci * %false: Read list is corrupt. @rctxt's xdr_stream is left in an 41562306a36Sopenharmony_ci * unknown state. 41662306a36Sopenharmony_ci */ 41762306a36Sopenharmony_cistatic bool xdr_count_read_segments(struct svc_rdma_recv_ctxt *rctxt, __be32 *p) 41862306a36Sopenharmony_ci{ 41962306a36Sopenharmony_ci rctxt->rc_call_pcl.cl_count = 0; 42062306a36Sopenharmony_ci rctxt->rc_read_pcl.cl_count = 0; 42162306a36Sopenharmony_ci while (xdr_item_is_present(p)) { 42262306a36Sopenharmony_ci u32 position, handle, length; 42362306a36Sopenharmony_ci u64 offset; 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, 42662306a36Sopenharmony_ci rpcrdma_readseg_maxsz * sizeof(*p)); 42762306a36Sopenharmony_ci if (!p) 42862306a36Sopenharmony_ci return false; 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci xdr_decode_read_segment(p, &position, &handle, 43162306a36Sopenharmony_ci &length, &offset); 43262306a36Sopenharmony_ci if (position) { 43362306a36Sopenharmony_ci if (position & 3) 43462306a36Sopenharmony_ci return false; 43562306a36Sopenharmony_ci ++rctxt->rc_read_pcl.cl_count; 43662306a36Sopenharmony_ci } else { 43762306a36Sopenharmony_ci ++rctxt->rc_call_pcl.cl_count; 43862306a36Sopenharmony_ci } 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 44162306a36Sopenharmony_ci if (!p) 44262306a36Sopenharmony_ci return false; 44362306a36Sopenharmony_ci } 44462306a36Sopenharmony_ci return true; 44562306a36Sopenharmony_ci} 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci/* Sanity check the Read list. 44862306a36Sopenharmony_ci * 44962306a36Sopenharmony_ci * Sanity checks: 45062306a36Sopenharmony_ci * - Read list does not overflow Receive buffer. 45162306a36Sopenharmony_ci * - Chunk size limited by largest NFS data payload. 45262306a36Sopenharmony_ci * 45362306a36Sopenharmony_ci * Return values: 45462306a36Sopenharmony_ci * %true: Read list is valid. @rctxt's xdr_stream is updated 45562306a36Sopenharmony_ci * to point to the first byte past the Read list. 45662306a36Sopenharmony_ci * %false: Read list is corrupt. @rctxt's xdr_stream is left 45762306a36Sopenharmony_ci * in an unknown state. 45862306a36Sopenharmony_ci */ 45962306a36Sopenharmony_cistatic bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt) 46062306a36Sopenharmony_ci{ 46162306a36Sopenharmony_ci __be32 *p; 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 46462306a36Sopenharmony_ci if (!p) 46562306a36Sopenharmony_ci return false; 46662306a36Sopenharmony_ci if (!xdr_count_read_segments(rctxt, p)) 46762306a36Sopenharmony_ci return false; 46862306a36Sopenharmony_ci if (!pcl_alloc_call(rctxt, p)) 46962306a36Sopenharmony_ci return false; 47062306a36Sopenharmony_ci return pcl_alloc_read(rctxt, p); 47162306a36Sopenharmony_ci} 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_cistatic bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt) 47462306a36Sopenharmony_ci{ 47562306a36Sopenharmony_ci u32 segcount; 47662306a36Sopenharmony_ci __be32 *p; 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci if (xdr_stream_decode_u32(&rctxt->rc_stream, &segcount)) 47962306a36Sopenharmony_ci return false; 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci /* A bogus segcount causes this buffer overflow check to fail. */ 48262306a36Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, 48362306a36Sopenharmony_ci segcount * rpcrdma_segment_maxsz * sizeof(*p)); 48462306a36Sopenharmony_ci return p != NULL; 48562306a36Sopenharmony_ci} 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci/** 48862306a36Sopenharmony_ci * xdr_count_write_chunks - Count number of Write chunks in Write list 48962306a36Sopenharmony_ci * @rctxt: Received header and decoding state 49062306a36Sopenharmony_ci * @p: start of an un-decoded Write list 49162306a36Sopenharmony_ci * 49262306a36Sopenharmony_ci * Before allocating anything, ensure the ingress Write list is 49362306a36Sopenharmony_ci * safe to use. 49462306a36Sopenharmony_ci * 49562306a36Sopenharmony_ci * Return values: 49662306a36Sopenharmony_ci * %true: Write list is valid. @rctxt's xdr_stream is updated 49762306a36Sopenharmony_ci * to point to the first byte past the Write list, and 49862306a36Sopenharmony_ci * the number of Write chunks is in rc_write_pcl.cl_count. 49962306a36Sopenharmony_ci * %false: Write list is corrupt. @rctxt's xdr_stream is left 50062306a36Sopenharmony_ci * in an indeterminate state. 50162306a36Sopenharmony_ci */ 50262306a36Sopenharmony_cistatic bool xdr_count_write_chunks(struct svc_rdma_recv_ctxt *rctxt, __be32 *p) 50362306a36Sopenharmony_ci{ 50462306a36Sopenharmony_ci rctxt->rc_write_pcl.cl_count = 0; 50562306a36Sopenharmony_ci while (xdr_item_is_present(p)) { 50662306a36Sopenharmony_ci if (!xdr_check_write_chunk(rctxt)) 50762306a36Sopenharmony_ci return false; 50862306a36Sopenharmony_ci ++rctxt->rc_write_pcl.cl_count; 50962306a36Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 51062306a36Sopenharmony_ci if (!p) 51162306a36Sopenharmony_ci return false; 51262306a36Sopenharmony_ci } 51362306a36Sopenharmony_ci return true; 51462306a36Sopenharmony_ci} 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci/* Sanity check the Write list. 51762306a36Sopenharmony_ci * 51862306a36Sopenharmony_ci * Implementation limits: 51962306a36Sopenharmony_ci * - This implementation currently supports only one Write chunk. 52062306a36Sopenharmony_ci * 52162306a36Sopenharmony_ci * Sanity checks: 52262306a36Sopenharmony_ci * - Write list does not overflow Receive buffer. 52362306a36Sopenharmony_ci * - Chunk size limited by largest NFS data payload. 52462306a36Sopenharmony_ci * 52562306a36Sopenharmony_ci * Return values: 52662306a36Sopenharmony_ci * %true: Write list is valid. @rctxt's xdr_stream is updated 52762306a36Sopenharmony_ci * to point to the first byte past the Write list. 52862306a36Sopenharmony_ci * %false: Write list is corrupt. @rctxt's xdr_stream is left 52962306a36Sopenharmony_ci * in an unknown state. 53062306a36Sopenharmony_ci */ 53162306a36Sopenharmony_cistatic bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt) 53262306a36Sopenharmony_ci{ 53362306a36Sopenharmony_ci __be32 *p; 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 53662306a36Sopenharmony_ci if (!p) 53762306a36Sopenharmony_ci return false; 53862306a36Sopenharmony_ci if (!xdr_count_write_chunks(rctxt, p)) 53962306a36Sopenharmony_ci return false; 54062306a36Sopenharmony_ci if (!pcl_alloc_write(rctxt, &rctxt->rc_write_pcl, p)) 54162306a36Sopenharmony_ci return false; 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci rctxt->rc_cur_result_payload = pcl_first_chunk(&rctxt->rc_write_pcl); 54462306a36Sopenharmony_ci return true; 54562306a36Sopenharmony_ci} 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci/* Sanity check the Reply chunk. 54862306a36Sopenharmony_ci * 54962306a36Sopenharmony_ci * Sanity checks: 55062306a36Sopenharmony_ci * - Reply chunk does not overflow Receive buffer. 55162306a36Sopenharmony_ci * - Chunk size limited by largest NFS data payload. 55262306a36Sopenharmony_ci * 55362306a36Sopenharmony_ci * Return values: 55462306a36Sopenharmony_ci * %true: Reply chunk is valid. @rctxt's xdr_stream is updated 55562306a36Sopenharmony_ci * to point to the first byte past the Reply chunk. 55662306a36Sopenharmony_ci * %false: Reply chunk is corrupt. @rctxt's xdr_stream is left 55762306a36Sopenharmony_ci * in an unknown state. 55862306a36Sopenharmony_ci */ 55962306a36Sopenharmony_cistatic bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt) 56062306a36Sopenharmony_ci{ 56162306a36Sopenharmony_ci __be32 *p; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 56462306a36Sopenharmony_ci if (!p) 56562306a36Sopenharmony_ci return false; 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci if (!xdr_item_is_present(p)) 56862306a36Sopenharmony_ci return true; 56962306a36Sopenharmony_ci if (!xdr_check_write_chunk(rctxt)) 57062306a36Sopenharmony_ci return false; 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci rctxt->rc_reply_pcl.cl_count = 1; 57362306a36Sopenharmony_ci return pcl_alloc_write(rctxt, &rctxt->rc_reply_pcl, p); 57462306a36Sopenharmony_ci} 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci/* RPC-over-RDMA Version One private extension: Remote Invalidation. 57762306a36Sopenharmony_ci * Responder's choice: requester signals it can handle Send With 57862306a36Sopenharmony_ci * Invalidate, and responder chooses one R_key to invalidate. 57962306a36Sopenharmony_ci * 58062306a36Sopenharmony_ci * If there is exactly one distinct R_key in the received transport 58162306a36Sopenharmony_ci * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero. 58262306a36Sopenharmony_ci */ 58362306a36Sopenharmony_cistatic void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, 58462306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt) 58562306a36Sopenharmony_ci{ 58662306a36Sopenharmony_ci struct svc_rdma_segment *segment; 58762306a36Sopenharmony_ci struct svc_rdma_chunk *chunk; 58862306a36Sopenharmony_ci u32 inv_rkey; 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci ctxt->rc_inv_rkey = 0; 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci if (!rdma->sc_snd_w_inv) 59362306a36Sopenharmony_ci return; 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ci inv_rkey = 0; 59662306a36Sopenharmony_ci pcl_for_each_chunk(chunk, &ctxt->rc_call_pcl) { 59762306a36Sopenharmony_ci pcl_for_each_segment(segment, chunk) { 59862306a36Sopenharmony_ci if (inv_rkey == 0) 59962306a36Sopenharmony_ci inv_rkey = segment->rs_handle; 60062306a36Sopenharmony_ci else if (inv_rkey != segment->rs_handle) 60162306a36Sopenharmony_ci return; 60262306a36Sopenharmony_ci } 60362306a36Sopenharmony_ci } 60462306a36Sopenharmony_ci pcl_for_each_chunk(chunk, &ctxt->rc_read_pcl) { 60562306a36Sopenharmony_ci pcl_for_each_segment(segment, chunk) { 60662306a36Sopenharmony_ci if (inv_rkey == 0) 60762306a36Sopenharmony_ci inv_rkey = segment->rs_handle; 60862306a36Sopenharmony_ci else if (inv_rkey != segment->rs_handle) 60962306a36Sopenharmony_ci return; 61062306a36Sopenharmony_ci } 61162306a36Sopenharmony_ci } 61262306a36Sopenharmony_ci pcl_for_each_chunk(chunk, &ctxt->rc_write_pcl) { 61362306a36Sopenharmony_ci pcl_for_each_segment(segment, chunk) { 61462306a36Sopenharmony_ci if (inv_rkey == 0) 61562306a36Sopenharmony_ci inv_rkey = segment->rs_handle; 61662306a36Sopenharmony_ci else if (inv_rkey != segment->rs_handle) 61762306a36Sopenharmony_ci return; 61862306a36Sopenharmony_ci } 61962306a36Sopenharmony_ci } 62062306a36Sopenharmony_ci pcl_for_each_chunk(chunk, &ctxt->rc_reply_pcl) { 62162306a36Sopenharmony_ci pcl_for_each_segment(segment, chunk) { 62262306a36Sopenharmony_ci if (inv_rkey == 0) 62362306a36Sopenharmony_ci inv_rkey = segment->rs_handle; 62462306a36Sopenharmony_ci else if (inv_rkey != segment->rs_handle) 62562306a36Sopenharmony_ci return; 62662306a36Sopenharmony_ci } 62762306a36Sopenharmony_ci } 62862306a36Sopenharmony_ci ctxt->rc_inv_rkey = inv_rkey; 62962306a36Sopenharmony_ci} 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci/** 63262306a36Sopenharmony_ci * svc_rdma_xdr_decode_req - Decode the transport header 63362306a36Sopenharmony_ci * @rq_arg: xdr_buf containing ingress RPC/RDMA message 63462306a36Sopenharmony_ci * @rctxt: state of decoding 63562306a36Sopenharmony_ci * 63662306a36Sopenharmony_ci * On entry, xdr->head[0].iov_base points to first byte of the 63762306a36Sopenharmony_ci * RPC-over-RDMA transport header. 63862306a36Sopenharmony_ci * 63962306a36Sopenharmony_ci * On successful exit, head[0] points to first byte past the 64062306a36Sopenharmony_ci * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message. 64162306a36Sopenharmony_ci * 64262306a36Sopenharmony_ci * The length of the RPC-over-RDMA header is returned. 64362306a36Sopenharmony_ci * 64462306a36Sopenharmony_ci * Assumptions: 64562306a36Sopenharmony_ci * - The transport header is entirely contained in the head iovec. 64662306a36Sopenharmony_ci */ 64762306a36Sopenharmony_cistatic int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg, 64862306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *rctxt) 64962306a36Sopenharmony_ci{ 65062306a36Sopenharmony_ci __be32 *p, *rdma_argp; 65162306a36Sopenharmony_ci unsigned int hdr_len; 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci rdma_argp = rq_arg->head[0].iov_base; 65462306a36Sopenharmony_ci xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL); 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, 65762306a36Sopenharmony_ci rpcrdma_fixed_maxsz * sizeof(*p)); 65862306a36Sopenharmony_ci if (unlikely(!p)) 65962306a36Sopenharmony_ci goto out_short; 66062306a36Sopenharmony_ci p++; 66162306a36Sopenharmony_ci if (*p != rpcrdma_version) 66262306a36Sopenharmony_ci goto out_version; 66362306a36Sopenharmony_ci p += 2; 66462306a36Sopenharmony_ci rctxt->rc_msgtype = *p; 66562306a36Sopenharmony_ci switch (rctxt->rc_msgtype) { 66662306a36Sopenharmony_ci case rdma_msg: 66762306a36Sopenharmony_ci break; 66862306a36Sopenharmony_ci case rdma_nomsg: 66962306a36Sopenharmony_ci break; 67062306a36Sopenharmony_ci case rdma_done: 67162306a36Sopenharmony_ci goto out_drop; 67262306a36Sopenharmony_ci case rdma_error: 67362306a36Sopenharmony_ci goto out_drop; 67462306a36Sopenharmony_ci default: 67562306a36Sopenharmony_ci goto out_proc; 67662306a36Sopenharmony_ci } 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci if (!xdr_check_read_list(rctxt)) 67962306a36Sopenharmony_ci goto out_inval; 68062306a36Sopenharmony_ci if (!xdr_check_write_list(rctxt)) 68162306a36Sopenharmony_ci goto out_inval; 68262306a36Sopenharmony_ci if (!xdr_check_reply_chunk(rctxt)) 68362306a36Sopenharmony_ci goto out_inval; 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci rq_arg->head[0].iov_base = rctxt->rc_stream.p; 68662306a36Sopenharmony_ci hdr_len = xdr_stream_pos(&rctxt->rc_stream); 68762306a36Sopenharmony_ci rq_arg->head[0].iov_len -= hdr_len; 68862306a36Sopenharmony_ci rq_arg->len -= hdr_len; 68962306a36Sopenharmony_ci trace_svcrdma_decode_rqst(rctxt, rdma_argp, hdr_len); 69062306a36Sopenharmony_ci return hdr_len; 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ciout_short: 69362306a36Sopenharmony_ci trace_svcrdma_decode_short_err(rctxt, rq_arg->len); 69462306a36Sopenharmony_ci return -EINVAL; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ciout_version: 69762306a36Sopenharmony_ci trace_svcrdma_decode_badvers_err(rctxt, rdma_argp); 69862306a36Sopenharmony_ci return -EPROTONOSUPPORT; 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ciout_drop: 70162306a36Sopenharmony_ci trace_svcrdma_decode_drop_err(rctxt, rdma_argp); 70262306a36Sopenharmony_ci return 0; 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ciout_proc: 70562306a36Sopenharmony_ci trace_svcrdma_decode_badproc_err(rctxt, rdma_argp); 70662306a36Sopenharmony_ci return -EINVAL; 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ciout_inval: 70962306a36Sopenharmony_ci trace_svcrdma_decode_parse_err(rctxt, rdma_argp); 71062306a36Sopenharmony_ci return -EINVAL; 71162306a36Sopenharmony_ci} 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_cistatic void svc_rdma_send_error(struct svcxprt_rdma *rdma, 71462306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *rctxt, 71562306a36Sopenharmony_ci int status) 71662306a36Sopenharmony_ci{ 71762306a36Sopenharmony_ci struct svc_rdma_send_ctxt *sctxt; 71862306a36Sopenharmony_ci 71962306a36Sopenharmony_ci sctxt = svc_rdma_send_ctxt_get(rdma); 72062306a36Sopenharmony_ci if (!sctxt) 72162306a36Sopenharmony_ci return; 72262306a36Sopenharmony_ci svc_rdma_send_error_msg(rdma, sctxt, rctxt, status); 72362306a36Sopenharmony_ci} 72462306a36Sopenharmony_ci 72562306a36Sopenharmony_ci/* By convention, backchannel calls arrive via rdma_msg type 72662306a36Sopenharmony_ci * messages, and never populate the chunk lists. This makes 72762306a36Sopenharmony_ci * the RPC/RDMA header small and fixed in size, so it is 72862306a36Sopenharmony_ci * straightforward to check the RPC header's direction field. 72962306a36Sopenharmony_ci */ 73062306a36Sopenharmony_cistatic bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt, 73162306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *rctxt) 73262306a36Sopenharmony_ci{ 73362306a36Sopenharmony_ci __be32 *p = rctxt->rc_recv_buf; 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci if (!xprt->xpt_bc_xprt) 73662306a36Sopenharmony_ci return false; 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci if (rctxt->rc_msgtype != rdma_msg) 73962306a36Sopenharmony_ci return false; 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci if (!pcl_is_empty(&rctxt->rc_call_pcl)) 74262306a36Sopenharmony_ci return false; 74362306a36Sopenharmony_ci if (!pcl_is_empty(&rctxt->rc_read_pcl)) 74462306a36Sopenharmony_ci return false; 74562306a36Sopenharmony_ci if (!pcl_is_empty(&rctxt->rc_write_pcl)) 74662306a36Sopenharmony_ci return false; 74762306a36Sopenharmony_ci if (!pcl_is_empty(&rctxt->rc_reply_pcl)) 74862306a36Sopenharmony_ci return false; 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci /* RPC call direction */ 75162306a36Sopenharmony_ci if (*(p + 8) == cpu_to_be32(RPC_CALL)) 75262306a36Sopenharmony_ci return false; 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci return true; 75562306a36Sopenharmony_ci} 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci/** 75862306a36Sopenharmony_ci * svc_rdma_recvfrom - Receive an RPC call 75962306a36Sopenharmony_ci * @rqstp: request structure into which to receive an RPC Call 76062306a36Sopenharmony_ci * 76162306a36Sopenharmony_ci * Returns: 76262306a36Sopenharmony_ci * The positive number of bytes in the RPC Call message, 76362306a36Sopenharmony_ci * %0 if there were no Calls ready to return, 76462306a36Sopenharmony_ci * %-EINVAL if the Read chunk data is too large, 76562306a36Sopenharmony_ci * %-ENOMEM if rdma_rw context pool was exhausted, 76662306a36Sopenharmony_ci * %-ENOTCONN if posting failed (connection is lost), 76762306a36Sopenharmony_ci * %-EIO if rdma_rw initialization failed (DMA mapping, etc). 76862306a36Sopenharmony_ci * 76962306a36Sopenharmony_ci * Called in a loop when XPT_DATA is set. XPT_DATA is cleared only 77062306a36Sopenharmony_ci * when there are no remaining ctxt's to process. 77162306a36Sopenharmony_ci * 77262306a36Sopenharmony_ci * The next ctxt is removed from the "receive" lists. 77362306a36Sopenharmony_ci * 77462306a36Sopenharmony_ci * - If the ctxt completes a Receive, then construct the Call 77562306a36Sopenharmony_ci * message from the contents of the Receive buffer. 77662306a36Sopenharmony_ci * 77762306a36Sopenharmony_ci * - If there are no Read chunks in this message, then finish 77862306a36Sopenharmony_ci * assembling the Call message and return the number of bytes 77962306a36Sopenharmony_ci * in the message. 78062306a36Sopenharmony_ci * 78162306a36Sopenharmony_ci * - If there are Read chunks in this message, post Read WRs to 78262306a36Sopenharmony_ci * pull that payload. When the Read WRs complete, build the 78362306a36Sopenharmony_ci * full message and return the number of bytes in it. 78462306a36Sopenharmony_ci */ 78562306a36Sopenharmony_ciint svc_rdma_recvfrom(struct svc_rqst *rqstp) 78662306a36Sopenharmony_ci{ 78762306a36Sopenharmony_ci struct svc_xprt *xprt = rqstp->rq_xprt; 78862306a36Sopenharmony_ci struct svcxprt_rdma *rdma_xprt = 78962306a36Sopenharmony_ci container_of(xprt, struct svcxprt_rdma, sc_xprt); 79062306a36Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 79162306a36Sopenharmony_ci int ret; 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_ci /* Prevent svc_xprt_release() from releasing pages in rq_pages 79462306a36Sopenharmony_ci * when returning 0 or an error. 79562306a36Sopenharmony_ci */ 79662306a36Sopenharmony_ci rqstp->rq_respages = rqstp->rq_pages; 79762306a36Sopenharmony_ci rqstp->rq_next_page = rqstp->rq_respages; 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci rqstp->rq_xprt_ctxt = NULL; 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci ctxt = NULL; 80262306a36Sopenharmony_ci spin_lock(&rdma_xprt->sc_rq_dto_lock); 80362306a36Sopenharmony_ci ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q); 80462306a36Sopenharmony_ci if (ctxt) 80562306a36Sopenharmony_ci list_del(&ctxt->rc_list); 80662306a36Sopenharmony_ci else 80762306a36Sopenharmony_ci /* No new incoming requests, terminate the loop */ 80862306a36Sopenharmony_ci clear_bit(XPT_DATA, &xprt->xpt_flags); 80962306a36Sopenharmony_ci spin_unlock(&rdma_xprt->sc_rq_dto_lock); 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci /* Unblock the transport for the next receive */ 81262306a36Sopenharmony_ci svc_xprt_received(xprt); 81362306a36Sopenharmony_ci if (!ctxt) 81462306a36Sopenharmony_ci return 0; 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ci percpu_counter_inc(&svcrdma_stat_recv); 81762306a36Sopenharmony_ci ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device, 81862306a36Sopenharmony_ci ctxt->rc_recv_sge.addr, ctxt->rc_byte_len, 81962306a36Sopenharmony_ci DMA_FROM_DEVICE); 82062306a36Sopenharmony_ci svc_rdma_build_arg_xdr(rqstp, ctxt); 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); 82362306a36Sopenharmony_ci if (ret < 0) 82462306a36Sopenharmony_ci goto out_err; 82562306a36Sopenharmony_ci if (ret == 0) 82662306a36Sopenharmony_ci goto out_drop; 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci if (svc_rdma_is_reverse_direction_reply(xprt, ctxt)) 82962306a36Sopenharmony_ci goto out_backchannel; 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci svc_rdma_get_inv_rkey(rdma_xprt, ctxt); 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci if (!pcl_is_empty(&ctxt->rc_read_pcl) || 83462306a36Sopenharmony_ci !pcl_is_empty(&ctxt->rc_call_pcl)) { 83562306a36Sopenharmony_ci ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt); 83662306a36Sopenharmony_ci if (ret < 0) 83762306a36Sopenharmony_ci goto out_readfail; 83862306a36Sopenharmony_ci } 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci rqstp->rq_xprt_ctxt = ctxt; 84162306a36Sopenharmony_ci rqstp->rq_prot = IPPROTO_MAX; 84262306a36Sopenharmony_ci svc_xprt_copy_addrs(rqstp, xprt); 84362306a36Sopenharmony_ci set_bit(RQ_SECURE, &rqstp->rq_flags); 84462306a36Sopenharmony_ci return rqstp->rq_arg.len; 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_ciout_err: 84762306a36Sopenharmony_ci svc_rdma_send_error(rdma_xprt, ctxt, ret); 84862306a36Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 84962306a36Sopenharmony_ci return 0; 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_ciout_readfail: 85262306a36Sopenharmony_ci if (ret == -EINVAL) 85362306a36Sopenharmony_ci svc_rdma_send_error(rdma_xprt, ctxt, ret); 85462306a36Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 85562306a36Sopenharmony_ci svc_xprt_deferred_close(xprt); 85662306a36Sopenharmony_ci return -ENOTCONN; 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ciout_backchannel: 85962306a36Sopenharmony_ci svc_rdma_handle_bc_reply(rqstp, ctxt); 86062306a36Sopenharmony_ciout_drop: 86162306a36Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 86262306a36Sopenharmony_ci return 0; 86362306a36Sopenharmony_ci} 864