18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (c) 2016-2018 Oracle. All rights reserved. 48c2ecf20Sopenharmony_ci * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 58c2ecf20Sopenharmony_ci * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two 88c2ecf20Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 98c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 108c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the BSD-type 118c2ecf20Sopenharmony_ci * license below: 128c2ecf20Sopenharmony_ci * 138c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 148c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions 158c2ecf20Sopenharmony_ci * are met: 168c2ecf20Sopenharmony_ci * 178c2ecf20Sopenharmony_ci * Redistributions of source code must retain the above copyright 188c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * Redistributions in binary form must reproduce the above 218c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 228c2ecf20Sopenharmony_ci * disclaimer in the documentation and/or other materials provided 238c2ecf20Sopenharmony_ci * with the distribution. 248c2ecf20Sopenharmony_ci * 258c2ecf20Sopenharmony_ci * Neither the name of the Network Appliance, Inc. nor the names of 268c2ecf20Sopenharmony_ci * its contributors may be used to endorse or promote products 278c2ecf20Sopenharmony_ci * derived from this software without specific prior written 288c2ecf20Sopenharmony_ci * permission. 298c2ecf20Sopenharmony_ci * 308c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 318c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 328c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 338c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 348c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 358c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 368c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 378c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 388c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 398c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 408c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 418c2ecf20Sopenharmony_ci * 428c2ecf20Sopenharmony_ci * Author: Tom Tucker <tom@opengridcomputing.com> 438c2ecf20Sopenharmony_ci */ 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci/* Operation 468c2ecf20Sopenharmony_ci * 478c2ecf20Sopenharmony_ci * The main entry point is svc_rdma_recvfrom. This is called from 488c2ecf20Sopenharmony_ci * svc_recv when the transport indicates there is incoming data to 498c2ecf20Sopenharmony_ci * be read. "Data Ready" is signaled when an RDMA Receive completes, 508c2ecf20Sopenharmony_ci * or when a set of RDMA Reads complete. 518c2ecf20Sopenharmony_ci * 528c2ecf20Sopenharmony_ci * An svc_rqst is passed in. This structure contains an array of 538c2ecf20Sopenharmony_ci * free pages (rq_pages) that will contain the incoming RPC message. 548c2ecf20Sopenharmony_ci * 558c2ecf20Sopenharmony_ci * Short messages are moved directly into svc_rqst::rq_arg, and 568c2ecf20Sopenharmony_ci * the RPC Call is ready to be processed by the Upper Layer. 578c2ecf20Sopenharmony_ci * svc_rdma_recvfrom returns the length of the RPC Call message, 588c2ecf20Sopenharmony_ci * completing the reception of the RPC Call. 598c2ecf20Sopenharmony_ci * 608c2ecf20Sopenharmony_ci * However, when an incoming message has Read chunks, 618c2ecf20Sopenharmony_ci * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's 628c2ecf20Sopenharmony_ci * data payload from the client. svc_rdma_recvfrom sets up the 638c2ecf20Sopenharmony_ci * RDMA Reads using pages in svc_rqst::rq_pages, which are 648c2ecf20Sopenharmony_ci * transferred to an svc_rdma_recv_ctxt for the duration of the 658c2ecf20Sopenharmony_ci * I/O. svc_rdma_recvfrom then returns zero, since the RPC message 668c2ecf20Sopenharmony_ci * is still not yet ready. 678c2ecf20Sopenharmony_ci * 688c2ecf20Sopenharmony_ci * When the Read chunk payloads have become available on the 698c2ecf20Sopenharmony_ci * server, "Data Ready" is raised again, and svc_recv calls 708c2ecf20Sopenharmony_ci * svc_rdma_recvfrom again. This second call may use a different 718c2ecf20Sopenharmony_ci * svc_rqst than the first one, thus any information that needs 728c2ecf20Sopenharmony_ci * to be preserved across these two calls is kept in an 738c2ecf20Sopenharmony_ci * svc_rdma_recv_ctxt. 748c2ecf20Sopenharmony_ci * 758c2ecf20Sopenharmony_ci * The second call to svc_rdma_recvfrom performs final assembly 768c2ecf20Sopenharmony_ci * of the RPC Call message, using the RDMA Read sink pages kept in 778c2ecf20Sopenharmony_ci * the svc_rdma_recv_ctxt. The xdr_buf is copied from the 788c2ecf20Sopenharmony_ci * svc_rdma_recv_ctxt to the second svc_rqst. The second call returns 798c2ecf20Sopenharmony_ci * the length of the completed RPC Call message. 808c2ecf20Sopenharmony_ci * 818c2ecf20Sopenharmony_ci * Page Management 828c2ecf20Sopenharmony_ci * 838c2ecf20Sopenharmony_ci * Pages under I/O must be transferred from the first svc_rqst to an 848c2ecf20Sopenharmony_ci * svc_rdma_recv_ctxt before the first svc_rdma_recvfrom call returns. 858c2ecf20Sopenharmony_ci * 868c2ecf20Sopenharmony_ci * The first svc_rqst supplies pages for RDMA Reads. These are moved 878c2ecf20Sopenharmony_ci * from rqstp::rq_pages into ctxt::pages. The consumed elements of 888c2ecf20Sopenharmony_ci * the rq_pages array are set to NULL and refilled with the first 898c2ecf20Sopenharmony_ci * svc_rdma_recvfrom call returns. 908c2ecf20Sopenharmony_ci * 918c2ecf20Sopenharmony_ci * During the second svc_rdma_recvfrom call, RDMA Read sink pages 928c2ecf20Sopenharmony_ci * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst 938c2ecf20Sopenharmony_ci * (see rdma_read_complete() below). 948c2ecf20Sopenharmony_ci */ 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 978c2ecf20Sopenharmony_ci#include <asm/unaligned.h> 988c2ecf20Sopenharmony_ci#include <rdma/ib_verbs.h> 998c2ecf20Sopenharmony_ci#include <rdma/rdma_cm.h> 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci#include <linux/sunrpc/xdr.h> 1028c2ecf20Sopenharmony_ci#include <linux/sunrpc/debug.h> 1038c2ecf20Sopenharmony_ci#include <linux/sunrpc/rpc_rdma.h> 1048c2ecf20Sopenharmony_ci#include <linux/sunrpc/svc_rdma.h> 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci#include "xprt_rdma.h" 1078c2ecf20Sopenharmony_ci#include <trace/events/rpcrdma.h> 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci#define RPCDBG_FACILITY RPCDBG_SVCXPRT 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_cistatic void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc); 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_cistatic inline struct svc_rdma_recv_ctxt * 1148c2ecf20Sopenharmony_cisvc_rdma_next_recv_ctxt(struct list_head *list) 1158c2ecf20Sopenharmony_ci{ 1168c2ecf20Sopenharmony_ci return list_first_entry_or_null(list, struct svc_rdma_recv_ctxt, 1178c2ecf20Sopenharmony_ci rc_list); 1188c2ecf20Sopenharmony_ci} 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_cistatic void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, 1218c2ecf20Sopenharmony_ci struct rpc_rdma_cid *cid) 1228c2ecf20Sopenharmony_ci{ 1238c2ecf20Sopenharmony_ci cid->ci_queue_id = rdma->sc_rq_cq->res.id; 1248c2ecf20Sopenharmony_ci cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); 1258c2ecf20Sopenharmony_ci} 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_cistatic struct svc_rdma_recv_ctxt * 1288c2ecf20Sopenharmony_cisvc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma) 1298c2ecf20Sopenharmony_ci{ 1308c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 1318c2ecf20Sopenharmony_ci dma_addr_t addr; 1328c2ecf20Sopenharmony_ci void *buffer; 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); 1358c2ecf20Sopenharmony_ci if (!ctxt) 1368c2ecf20Sopenharmony_ci goto fail0; 1378c2ecf20Sopenharmony_ci buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL); 1388c2ecf20Sopenharmony_ci if (!buffer) 1398c2ecf20Sopenharmony_ci goto fail1; 1408c2ecf20Sopenharmony_ci addr = ib_dma_map_single(rdma->sc_pd->device, buffer, 1418c2ecf20Sopenharmony_ci rdma->sc_max_req_size, DMA_FROM_DEVICE); 1428c2ecf20Sopenharmony_ci if (ib_dma_mapping_error(rdma->sc_pd->device, addr)) 1438c2ecf20Sopenharmony_ci goto fail2; 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid); 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci ctxt->rc_recv_wr.next = NULL; 1488c2ecf20Sopenharmony_ci ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe; 1498c2ecf20Sopenharmony_ci ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge; 1508c2ecf20Sopenharmony_ci ctxt->rc_recv_wr.num_sge = 1; 1518c2ecf20Sopenharmony_ci ctxt->rc_cqe.done = svc_rdma_wc_receive; 1528c2ecf20Sopenharmony_ci ctxt->rc_recv_sge.addr = addr; 1538c2ecf20Sopenharmony_ci ctxt->rc_recv_sge.length = rdma->sc_max_req_size; 1548c2ecf20Sopenharmony_ci ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey; 1558c2ecf20Sopenharmony_ci ctxt->rc_recv_buf = buffer; 1568c2ecf20Sopenharmony_ci ctxt->rc_temp = false; 1578c2ecf20Sopenharmony_ci return ctxt; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_cifail2: 1608c2ecf20Sopenharmony_ci kfree(buffer); 1618c2ecf20Sopenharmony_cifail1: 1628c2ecf20Sopenharmony_ci kfree(ctxt); 1638c2ecf20Sopenharmony_cifail0: 1648c2ecf20Sopenharmony_ci return NULL; 1658c2ecf20Sopenharmony_ci} 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_cistatic void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma, 1688c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt) 1698c2ecf20Sopenharmony_ci{ 1708c2ecf20Sopenharmony_ci ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr, 1718c2ecf20Sopenharmony_ci ctxt->rc_recv_sge.length, DMA_FROM_DEVICE); 1728c2ecf20Sopenharmony_ci kfree(ctxt->rc_recv_buf); 1738c2ecf20Sopenharmony_ci kfree(ctxt); 1748c2ecf20Sopenharmony_ci} 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci/** 1778c2ecf20Sopenharmony_ci * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt 1788c2ecf20Sopenharmony_ci * @rdma: svcxprt_rdma being torn down 1798c2ecf20Sopenharmony_ci * 1808c2ecf20Sopenharmony_ci */ 1818c2ecf20Sopenharmony_civoid svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma) 1828c2ecf20Sopenharmony_ci{ 1838c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 1848c2ecf20Sopenharmony_ci struct llist_node *node; 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci while ((node = llist_del_first(&rdma->sc_recv_ctxts))) { 1878c2ecf20Sopenharmony_ci ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node); 1888c2ecf20Sopenharmony_ci svc_rdma_recv_ctxt_destroy(rdma, ctxt); 1898c2ecf20Sopenharmony_ci } 1908c2ecf20Sopenharmony_ci} 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_cistatic struct svc_rdma_recv_ctxt * 1938c2ecf20Sopenharmony_cisvc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma) 1948c2ecf20Sopenharmony_ci{ 1958c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 1968c2ecf20Sopenharmony_ci struct llist_node *node; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci node = llist_del_first(&rdma->sc_recv_ctxts); 1998c2ecf20Sopenharmony_ci if (!node) 2008c2ecf20Sopenharmony_ci goto out_empty; 2018c2ecf20Sopenharmony_ci ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node); 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ciout: 2048c2ecf20Sopenharmony_ci ctxt->rc_page_count = 0; 2058c2ecf20Sopenharmony_ci ctxt->rc_read_payload_length = 0; 2068c2ecf20Sopenharmony_ci return ctxt; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ciout_empty: 2098c2ecf20Sopenharmony_ci ctxt = svc_rdma_recv_ctxt_alloc(rdma); 2108c2ecf20Sopenharmony_ci if (!ctxt) 2118c2ecf20Sopenharmony_ci return NULL; 2128c2ecf20Sopenharmony_ci goto out; 2138c2ecf20Sopenharmony_ci} 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci/** 2168c2ecf20Sopenharmony_ci * svc_rdma_recv_ctxt_put - Return recv_ctxt to free list 2178c2ecf20Sopenharmony_ci * @rdma: controlling svcxprt_rdma 2188c2ecf20Sopenharmony_ci * @ctxt: object to return to the free list 2198c2ecf20Sopenharmony_ci * 2208c2ecf20Sopenharmony_ci */ 2218c2ecf20Sopenharmony_civoid svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, 2228c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt) 2238c2ecf20Sopenharmony_ci{ 2248c2ecf20Sopenharmony_ci unsigned int i; 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci for (i = 0; i < ctxt->rc_page_count; i++) 2278c2ecf20Sopenharmony_ci put_page(ctxt->rc_pages[i]); 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci if (!ctxt->rc_temp) 2308c2ecf20Sopenharmony_ci llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts); 2318c2ecf20Sopenharmony_ci else 2328c2ecf20Sopenharmony_ci svc_rdma_recv_ctxt_destroy(rdma, ctxt); 2338c2ecf20Sopenharmony_ci} 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci/** 2368c2ecf20Sopenharmony_ci * svc_rdma_release_rqst - Release transport-specific per-rqst resources 2378c2ecf20Sopenharmony_ci * @rqstp: svc_rqst being released 2388c2ecf20Sopenharmony_ci * 2398c2ecf20Sopenharmony_ci * Ensure that the recv_ctxt is released whether or not a Reply 2408c2ecf20Sopenharmony_ci * was sent. For example, the client could close the connection, 2418c2ecf20Sopenharmony_ci * or svc_process could drop an RPC, before the Reply is sent. 2428c2ecf20Sopenharmony_ci */ 2438c2ecf20Sopenharmony_civoid svc_rdma_release_rqst(struct svc_rqst *rqstp) 2448c2ecf20Sopenharmony_ci{ 2458c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt; 2468c2ecf20Sopenharmony_ci struct svc_xprt *xprt = rqstp->rq_xprt; 2478c2ecf20Sopenharmony_ci struct svcxprt_rdma *rdma = 2488c2ecf20Sopenharmony_ci container_of(xprt, struct svcxprt_rdma, sc_xprt); 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci rqstp->rq_xprt_ctxt = NULL; 2518c2ecf20Sopenharmony_ci if (ctxt) 2528c2ecf20Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma, ctxt); 2538c2ecf20Sopenharmony_ci} 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_cistatic int __svc_rdma_post_recv(struct svcxprt_rdma *rdma, 2568c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt) 2578c2ecf20Sopenharmony_ci{ 2588c2ecf20Sopenharmony_ci int ret; 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci trace_svcrdma_post_recv(ctxt); 2618c2ecf20Sopenharmony_ci ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL); 2628c2ecf20Sopenharmony_ci if (ret) 2638c2ecf20Sopenharmony_ci goto err_post; 2648c2ecf20Sopenharmony_ci return 0; 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_cierr_post: 2678c2ecf20Sopenharmony_ci trace_svcrdma_rq_post_err(rdma, ret); 2688c2ecf20Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma, ctxt); 2698c2ecf20Sopenharmony_ci return ret; 2708c2ecf20Sopenharmony_ci} 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_cistatic int svc_rdma_post_recv(struct svcxprt_rdma *rdma) 2738c2ecf20Sopenharmony_ci{ 2748c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) 2778c2ecf20Sopenharmony_ci return 0; 2788c2ecf20Sopenharmony_ci ctxt = svc_rdma_recv_ctxt_get(rdma); 2798c2ecf20Sopenharmony_ci if (!ctxt) 2808c2ecf20Sopenharmony_ci return -ENOMEM; 2818c2ecf20Sopenharmony_ci return __svc_rdma_post_recv(rdma, ctxt); 2828c2ecf20Sopenharmony_ci} 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci/** 2858c2ecf20Sopenharmony_ci * svc_rdma_post_recvs - Post initial set of Recv WRs 2868c2ecf20Sopenharmony_ci * @rdma: fresh svcxprt_rdma 2878c2ecf20Sopenharmony_ci * 2888c2ecf20Sopenharmony_ci * Returns true if successful, otherwise false. 2898c2ecf20Sopenharmony_ci */ 2908c2ecf20Sopenharmony_cibool svc_rdma_post_recvs(struct svcxprt_rdma *rdma) 2918c2ecf20Sopenharmony_ci{ 2928c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 2938c2ecf20Sopenharmony_ci unsigned int i; 2948c2ecf20Sopenharmony_ci int ret; 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci for (i = 0; i < rdma->sc_max_requests; i++) { 2978c2ecf20Sopenharmony_ci ctxt = svc_rdma_recv_ctxt_get(rdma); 2988c2ecf20Sopenharmony_ci if (!ctxt) 2998c2ecf20Sopenharmony_ci return false; 3008c2ecf20Sopenharmony_ci ctxt->rc_temp = true; 3018c2ecf20Sopenharmony_ci ret = __svc_rdma_post_recv(rdma, ctxt); 3028c2ecf20Sopenharmony_ci if (ret) 3038c2ecf20Sopenharmony_ci return false; 3048c2ecf20Sopenharmony_ci } 3058c2ecf20Sopenharmony_ci return true; 3068c2ecf20Sopenharmony_ci} 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci/** 3098c2ecf20Sopenharmony_ci * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC 3108c2ecf20Sopenharmony_ci * @cq: Completion Queue context 3118c2ecf20Sopenharmony_ci * @wc: Work Completion object 3128c2ecf20Sopenharmony_ci * 3138c2ecf20Sopenharmony_ci * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that 3148c2ecf20Sopenharmony_ci * the Receive completion handler could be running. 3158c2ecf20Sopenharmony_ci */ 3168c2ecf20Sopenharmony_cistatic void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) 3178c2ecf20Sopenharmony_ci{ 3188c2ecf20Sopenharmony_ci struct svcxprt_rdma *rdma = cq->cq_context; 3198c2ecf20Sopenharmony_ci struct ib_cqe *cqe = wc->wr_cqe; 3208c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci /* WARNING: Only wc->wr_cqe and wc->status are reliable */ 3238c2ecf20Sopenharmony_ci ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe); 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci trace_svcrdma_wc_receive(wc, &ctxt->rc_cid); 3268c2ecf20Sopenharmony_ci if (wc->status != IB_WC_SUCCESS) 3278c2ecf20Sopenharmony_ci goto flushed; 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci if (svc_rdma_post_recv(rdma)) 3308c2ecf20Sopenharmony_ci goto post_err; 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci /* All wc fields are now known to be valid */ 3338c2ecf20Sopenharmony_ci ctxt->rc_byte_len = wc->byte_len; 3348c2ecf20Sopenharmony_ci ib_dma_sync_single_for_cpu(rdma->sc_pd->device, 3358c2ecf20Sopenharmony_ci ctxt->rc_recv_sge.addr, 3368c2ecf20Sopenharmony_ci wc->byte_len, DMA_FROM_DEVICE); 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci spin_lock(&rdma->sc_rq_dto_lock); 3398c2ecf20Sopenharmony_ci list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q); 3408c2ecf20Sopenharmony_ci /* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */ 3418c2ecf20Sopenharmony_ci set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags); 3428c2ecf20Sopenharmony_ci spin_unlock(&rdma->sc_rq_dto_lock); 3438c2ecf20Sopenharmony_ci if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags)) 3448c2ecf20Sopenharmony_ci svc_xprt_enqueue(&rdma->sc_xprt); 3458c2ecf20Sopenharmony_ci return; 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_ciflushed: 3488c2ecf20Sopenharmony_cipost_err: 3498c2ecf20Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma, ctxt); 3508c2ecf20Sopenharmony_ci set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); 3518c2ecf20Sopenharmony_ci svc_xprt_enqueue(&rdma->sc_xprt); 3528c2ecf20Sopenharmony_ci} 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci/** 3558c2ecf20Sopenharmony_ci * svc_rdma_flush_recv_queues - Drain pending Receive work 3568c2ecf20Sopenharmony_ci * @rdma: svcxprt_rdma being shut down 3578c2ecf20Sopenharmony_ci * 3588c2ecf20Sopenharmony_ci */ 3598c2ecf20Sopenharmony_civoid svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma) 3608c2ecf20Sopenharmony_ci{ 3618c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_ci while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) { 3648c2ecf20Sopenharmony_ci list_del(&ctxt->rc_list); 3658c2ecf20Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma, ctxt); 3668c2ecf20Sopenharmony_ci } 3678c2ecf20Sopenharmony_ci while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) { 3688c2ecf20Sopenharmony_ci list_del(&ctxt->rc_list); 3698c2ecf20Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma, ctxt); 3708c2ecf20Sopenharmony_ci } 3718c2ecf20Sopenharmony_ci} 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_cistatic void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp, 3748c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt) 3758c2ecf20Sopenharmony_ci{ 3768c2ecf20Sopenharmony_ci struct xdr_buf *arg = &rqstp->rq_arg; 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci arg->head[0].iov_base = ctxt->rc_recv_buf; 3798c2ecf20Sopenharmony_ci arg->head[0].iov_len = ctxt->rc_byte_len; 3808c2ecf20Sopenharmony_ci arg->tail[0].iov_base = NULL; 3818c2ecf20Sopenharmony_ci arg->tail[0].iov_len = 0; 3828c2ecf20Sopenharmony_ci arg->page_len = 0; 3838c2ecf20Sopenharmony_ci arg->page_base = 0; 3848c2ecf20Sopenharmony_ci arg->buflen = ctxt->rc_byte_len; 3858c2ecf20Sopenharmony_ci arg->len = ctxt->rc_byte_len; 3868c2ecf20Sopenharmony_ci} 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci/* This accommodates the largest possible Write chunk. 3898c2ecf20Sopenharmony_ci */ 3908c2ecf20Sopenharmony_ci#define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT)) 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci/* This accommodates the largest possible Position-Zero 3938c2ecf20Sopenharmony_ci * Read chunk or Reply chunk. 3948c2ecf20Sopenharmony_ci */ 3958c2ecf20Sopenharmony_ci#define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT)) 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci/* Sanity check the Read list. 3988c2ecf20Sopenharmony_ci * 3998c2ecf20Sopenharmony_ci * Implementation limits: 4008c2ecf20Sopenharmony_ci * - This implementation supports only one Read chunk. 4018c2ecf20Sopenharmony_ci * 4028c2ecf20Sopenharmony_ci * Sanity checks: 4038c2ecf20Sopenharmony_ci * - Read list does not overflow Receive buffer. 4048c2ecf20Sopenharmony_ci * - Segment size limited by largest NFS data payload. 4058c2ecf20Sopenharmony_ci * 4068c2ecf20Sopenharmony_ci * The segment count is limited to how many segments can 4078c2ecf20Sopenharmony_ci * fit in the transport header without overflowing the 4088c2ecf20Sopenharmony_ci * buffer. That's about 40 Read segments for a 1KB inline 4098c2ecf20Sopenharmony_ci * threshold. 4108c2ecf20Sopenharmony_ci * 4118c2ecf20Sopenharmony_ci * Return values: 4128c2ecf20Sopenharmony_ci * %true: Read list is valid. @rctxt's xdr_stream is updated 4138c2ecf20Sopenharmony_ci * to point to the first byte past the Read list. 4148c2ecf20Sopenharmony_ci * %false: Read list is corrupt. @rctxt's xdr_stream is left 4158c2ecf20Sopenharmony_ci * in an unknown state. 4168c2ecf20Sopenharmony_ci */ 4178c2ecf20Sopenharmony_cistatic bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt) 4188c2ecf20Sopenharmony_ci{ 4198c2ecf20Sopenharmony_ci u32 position, len; 4208c2ecf20Sopenharmony_ci bool first; 4218c2ecf20Sopenharmony_ci __be32 *p; 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 4248c2ecf20Sopenharmony_ci if (!p) 4258c2ecf20Sopenharmony_ci return false; 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci len = 0; 4288c2ecf20Sopenharmony_ci first = true; 4298c2ecf20Sopenharmony_ci while (xdr_item_is_present(p)) { 4308c2ecf20Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, 4318c2ecf20Sopenharmony_ci rpcrdma_readseg_maxsz * sizeof(*p)); 4328c2ecf20Sopenharmony_ci if (!p) 4338c2ecf20Sopenharmony_ci return false; 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci if (first) { 4368c2ecf20Sopenharmony_ci position = be32_to_cpup(p); 4378c2ecf20Sopenharmony_ci first = false; 4388c2ecf20Sopenharmony_ci } else if (be32_to_cpup(p) != position) { 4398c2ecf20Sopenharmony_ci return false; 4408c2ecf20Sopenharmony_ci } 4418c2ecf20Sopenharmony_ci p += 2; 4428c2ecf20Sopenharmony_ci len += be32_to_cpup(p); 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 4458c2ecf20Sopenharmony_ci if (!p) 4468c2ecf20Sopenharmony_ci return false; 4478c2ecf20Sopenharmony_ci } 4488c2ecf20Sopenharmony_ci return len <= MAX_BYTES_SPECIAL_CHUNK; 4498c2ecf20Sopenharmony_ci} 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci/* The segment count is limited to how many segments can 4528c2ecf20Sopenharmony_ci * fit in the transport header without overflowing the 4538c2ecf20Sopenharmony_ci * buffer. That's about 60 Write segments for a 1KB inline 4548c2ecf20Sopenharmony_ci * threshold. 4558c2ecf20Sopenharmony_ci */ 4568c2ecf20Sopenharmony_cistatic bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen) 4578c2ecf20Sopenharmony_ci{ 4588c2ecf20Sopenharmony_ci u32 i, segcount, total; 4598c2ecf20Sopenharmony_ci __be32 *p; 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 4628c2ecf20Sopenharmony_ci if (!p) 4638c2ecf20Sopenharmony_ci return false; 4648c2ecf20Sopenharmony_ci segcount = be32_to_cpup(p); 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_ci total = 0; 4678c2ecf20Sopenharmony_ci for (i = 0; i < segcount; i++) { 4688c2ecf20Sopenharmony_ci u32 handle, length; 4698c2ecf20Sopenharmony_ci u64 offset; 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, 4728c2ecf20Sopenharmony_ci rpcrdma_segment_maxsz * sizeof(*p)); 4738c2ecf20Sopenharmony_ci if (!p) 4748c2ecf20Sopenharmony_ci return false; 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci xdr_decode_rdma_segment(p, &handle, &length, &offset); 4778c2ecf20Sopenharmony_ci trace_svcrdma_decode_wseg(handle, length, offset); 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci total += length; 4808c2ecf20Sopenharmony_ci } 4818c2ecf20Sopenharmony_ci return total <= maxlen; 4828c2ecf20Sopenharmony_ci} 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci/* Sanity check the Write list. 4858c2ecf20Sopenharmony_ci * 4868c2ecf20Sopenharmony_ci * Implementation limits: 4878c2ecf20Sopenharmony_ci * - This implementation currently supports only one Write chunk. 4888c2ecf20Sopenharmony_ci * 4898c2ecf20Sopenharmony_ci * Sanity checks: 4908c2ecf20Sopenharmony_ci * - Write list does not overflow Receive buffer. 4918c2ecf20Sopenharmony_ci * - Chunk size limited by largest NFS data payload. 4928c2ecf20Sopenharmony_ci * 4938c2ecf20Sopenharmony_ci * Return values: 4948c2ecf20Sopenharmony_ci * %true: Write list is valid. @rctxt's xdr_stream is updated 4958c2ecf20Sopenharmony_ci * to point to the first byte past the Write list. 4968c2ecf20Sopenharmony_ci * %false: Write list is corrupt. @rctxt's xdr_stream is left 4978c2ecf20Sopenharmony_ci * in an unknown state. 4988c2ecf20Sopenharmony_ci */ 4998c2ecf20Sopenharmony_cistatic bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt) 5008c2ecf20Sopenharmony_ci{ 5018c2ecf20Sopenharmony_ci u32 chcount = 0; 5028c2ecf20Sopenharmony_ci __be32 *p; 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 5058c2ecf20Sopenharmony_ci if (!p) 5068c2ecf20Sopenharmony_ci return false; 5078c2ecf20Sopenharmony_ci rctxt->rc_write_list = p; 5088c2ecf20Sopenharmony_ci while (xdr_item_is_present(p)) { 5098c2ecf20Sopenharmony_ci if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK)) 5108c2ecf20Sopenharmony_ci return false; 5118c2ecf20Sopenharmony_ci ++chcount; 5128c2ecf20Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 5138c2ecf20Sopenharmony_ci if (!p) 5148c2ecf20Sopenharmony_ci return false; 5158c2ecf20Sopenharmony_ci } 5168c2ecf20Sopenharmony_ci if (!chcount) 5178c2ecf20Sopenharmony_ci rctxt->rc_write_list = NULL; 5188c2ecf20Sopenharmony_ci return chcount < 2; 5198c2ecf20Sopenharmony_ci} 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci/* Sanity check the Reply chunk. 5228c2ecf20Sopenharmony_ci * 5238c2ecf20Sopenharmony_ci * Sanity checks: 5248c2ecf20Sopenharmony_ci * - Reply chunk does not overflow Receive buffer. 5258c2ecf20Sopenharmony_ci * - Chunk size limited by largest NFS data payload. 5268c2ecf20Sopenharmony_ci * 5278c2ecf20Sopenharmony_ci * Return values: 5288c2ecf20Sopenharmony_ci * %true: Reply chunk is valid. @rctxt's xdr_stream is updated 5298c2ecf20Sopenharmony_ci * to point to the first byte past the Reply chunk. 5308c2ecf20Sopenharmony_ci * %false: Reply chunk is corrupt. @rctxt's xdr_stream is left 5318c2ecf20Sopenharmony_ci * in an unknown state. 5328c2ecf20Sopenharmony_ci */ 5338c2ecf20Sopenharmony_cistatic bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt) 5348c2ecf20Sopenharmony_ci{ 5358c2ecf20Sopenharmony_ci __be32 *p; 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p)); 5388c2ecf20Sopenharmony_ci if (!p) 5398c2ecf20Sopenharmony_ci return false; 5408c2ecf20Sopenharmony_ci rctxt->rc_reply_chunk = NULL; 5418c2ecf20Sopenharmony_ci if (xdr_item_is_present(p)) { 5428c2ecf20Sopenharmony_ci if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK)) 5438c2ecf20Sopenharmony_ci return false; 5448c2ecf20Sopenharmony_ci rctxt->rc_reply_chunk = p; 5458c2ecf20Sopenharmony_ci } 5468c2ecf20Sopenharmony_ci return true; 5478c2ecf20Sopenharmony_ci} 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci/* RPC-over-RDMA Version One private extension: Remote Invalidation. 5508c2ecf20Sopenharmony_ci * Responder's choice: requester signals it can handle Send With 5518c2ecf20Sopenharmony_ci * Invalidate, and responder chooses one R_key to invalidate. 5528c2ecf20Sopenharmony_ci * 5538c2ecf20Sopenharmony_ci * If there is exactly one distinct R_key in the received transport 5548c2ecf20Sopenharmony_ci * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero. 5558c2ecf20Sopenharmony_ci * 5568c2ecf20Sopenharmony_ci * Perform this operation while the received transport header is 5578c2ecf20Sopenharmony_ci * still in the CPU cache. 5588c2ecf20Sopenharmony_ci */ 5598c2ecf20Sopenharmony_cistatic void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma, 5608c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt) 5618c2ecf20Sopenharmony_ci{ 5628c2ecf20Sopenharmony_ci __be32 inv_rkey, *p; 5638c2ecf20Sopenharmony_ci u32 i, segcount; 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci ctxt->rc_inv_rkey = 0; 5668c2ecf20Sopenharmony_ci 5678c2ecf20Sopenharmony_ci if (!rdma->sc_snd_w_inv) 5688c2ecf20Sopenharmony_ci return; 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci inv_rkey = xdr_zero; 5718c2ecf20Sopenharmony_ci p = ctxt->rc_recv_buf; 5728c2ecf20Sopenharmony_ci p += rpcrdma_fixed_maxsz; 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci /* Read list */ 5758c2ecf20Sopenharmony_ci while (xdr_item_is_present(p++)) { 5768c2ecf20Sopenharmony_ci p++; /* position */ 5778c2ecf20Sopenharmony_ci if (inv_rkey == xdr_zero) 5788c2ecf20Sopenharmony_ci inv_rkey = *p; 5798c2ecf20Sopenharmony_ci else if (inv_rkey != *p) 5808c2ecf20Sopenharmony_ci return; 5818c2ecf20Sopenharmony_ci p += 4; 5828c2ecf20Sopenharmony_ci } 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci /* Write list */ 5858c2ecf20Sopenharmony_ci while (xdr_item_is_present(p++)) { 5868c2ecf20Sopenharmony_ci segcount = be32_to_cpup(p++); 5878c2ecf20Sopenharmony_ci for (i = 0; i < segcount; i++) { 5888c2ecf20Sopenharmony_ci if (inv_rkey == xdr_zero) 5898c2ecf20Sopenharmony_ci inv_rkey = *p; 5908c2ecf20Sopenharmony_ci else if (inv_rkey != *p) 5918c2ecf20Sopenharmony_ci return; 5928c2ecf20Sopenharmony_ci p += 4; 5938c2ecf20Sopenharmony_ci } 5948c2ecf20Sopenharmony_ci } 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci /* Reply chunk */ 5978c2ecf20Sopenharmony_ci if (xdr_item_is_present(p++)) { 5988c2ecf20Sopenharmony_ci segcount = be32_to_cpup(p++); 5998c2ecf20Sopenharmony_ci for (i = 0; i < segcount; i++) { 6008c2ecf20Sopenharmony_ci if (inv_rkey == xdr_zero) 6018c2ecf20Sopenharmony_ci inv_rkey = *p; 6028c2ecf20Sopenharmony_ci else if (inv_rkey != *p) 6038c2ecf20Sopenharmony_ci return; 6048c2ecf20Sopenharmony_ci p += 4; 6058c2ecf20Sopenharmony_ci } 6068c2ecf20Sopenharmony_ci } 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey); 6098c2ecf20Sopenharmony_ci} 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci/** 6128c2ecf20Sopenharmony_ci * svc_rdma_xdr_decode_req - Decode the transport header 6138c2ecf20Sopenharmony_ci * @rq_arg: xdr_buf containing ingress RPC/RDMA message 6148c2ecf20Sopenharmony_ci * @rctxt: state of decoding 6158c2ecf20Sopenharmony_ci * 6168c2ecf20Sopenharmony_ci * On entry, xdr->head[0].iov_base points to first byte of the 6178c2ecf20Sopenharmony_ci * RPC-over-RDMA transport header. 6188c2ecf20Sopenharmony_ci * 6198c2ecf20Sopenharmony_ci * On successful exit, head[0] points to first byte past the 6208c2ecf20Sopenharmony_ci * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message. 6218c2ecf20Sopenharmony_ci * 6228c2ecf20Sopenharmony_ci * The length of the RPC-over-RDMA header is returned. 6238c2ecf20Sopenharmony_ci * 6248c2ecf20Sopenharmony_ci * Assumptions: 6258c2ecf20Sopenharmony_ci * - The transport header is entirely contained in the head iovec. 6268c2ecf20Sopenharmony_ci */ 6278c2ecf20Sopenharmony_cistatic int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg, 6288c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *rctxt) 6298c2ecf20Sopenharmony_ci{ 6308c2ecf20Sopenharmony_ci __be32 *p, *rdma_argp; 6318c2ecf20Sopenharmony_ci unsigned int hdr_len; 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci rdma_argp = rq_arg->head[0].iov_base; 6348c2ecf20Sopenharmony_ci xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL); 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci p = xdr_inline_decode(&rctxt->rc_stream, 6378c2ecf20Sopenharmony_ci rpcrdma_fixed_maxsz * sizeof(*p)); 6388c2ecf20Sopenharmony_ci if (unlikely(!p)) 6398c2ecf20Sopenharmony_ci goto out_short; 6408c2ecf20Sopenharmony_ci p++; 6418c2ecf20Sopenharmony_ci if (*p != rpcrdma_version) 6428c2ecf20Sopenharmony_ci goto out_version; 6438c2ecf20Sopenharmony_ci p += 2; 6448c2ecf20Sopenharmony_ci switch (*p) { 6458c2ecf20Sopenharmony_ci case rdma_msg: 6468c2ecf20Sopenharmony_ci break; 6478c2ecf20Sopenharmony_ci case rdma_nomsg: 6488c2ecf20Sopenharmony_ci break; 6498c2ecf20Sopenharmony_ci case rdma_done: 6508c2ecf20Sopenharmony_ci goto out_drop; 6518c2ecf20Sopenharmony_ci case rdma_error: 6528c2ecf20Sopenharmony_ci goto out_drop; 6538c2ecf20Sopenharmony_ci default: 6548c2ecf20Sopenharmony_ci goto out_proc; 6558c2ecf20Sopenharmony_ci } 6568c2ecf20Sopenharmony_ci 6578c2ecf20Sopenharmony_ci if (!xdr_check_read_list(rctxt)) 6588c2ecf20Sopenharmony_ci goto out_inval; 6598c2ecf20Sopenharmony_ci if (!xdr_check_write_list(rctxt)) 6608c2ecf20Sopenharmony_ci goto out_inval; 6618c2ecf20Sopenharmony_ci if (!xdr_check_reply_chunk(rctxt)) 6628c2ecf20Sopenharmony_ci goto out_inval; 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_ci rq_arg->head[0].iov_base = rctxt->rc_stream.p; 6658c2ecf20Sopenharmony_ci hdr_len = xdr_stream_pos(&rctxt->rc_stream); 6668c2ecf20Sopenharmony_ci rq_arg->head[0].iov_len -= hdr_len; 6678c2ecf20Sopenharmony_ci rq_arg->len -= hdr_len; 6688c2ecf20Sopenharmony_ci trace_svcrdma_decode_rqst(rctxt, rdma_argp, hdr_len); 6698c2ecf20Sopenharmony_ci return hdr_len; 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ciout_short: 6728c2ecf20Sopenharmony_ci trace_svcrdma_decode_short_err(rctxt, rq_arg->len); 6738c2ecf20Sopenharmony_ci return -EINVAL; 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ciout_version: 6768c2ecf20Sopenharmony_ci trace_svcrdma_decode_badvers_err(rctxt, rdma_argp); 6778c2ecf20Sopenharmony_ci return -EPROTONOSUPPORT; 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ciout_drop: 6808c2ecf20Sopenharmony_ci trace_svcrdma_decode_drop_err(rctxt, rdma_argp); 6818c2ecf20Sopenharmony_ci return 0; 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ciout_proc: 6848c2ecf20Sopenharmony_ci trace_svcrdma_decode_badproc_err(rctxt, rdma_argp); 6858c2ecf20Sopenharmony_ci return -EINVAL; 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ciout_inval: 6888c2ecf20Sopenharmony_ci trace_svcrdma_decode_parse_err(rctxt, rdma_argp); 6898c2ecf20Sopenharmony_ci return -EINVAL; 6908c2ecf20Sopenharmony_ci} 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_cistatic void rdma_read_complete(struct svc_rqst *rqstp, 6938c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *head) 6948c2ecf20Sopenharmony_ci{ 6958c2ecf20Sopenharmony_ci int page_no; 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci /* Move Read chunk pages to rqstp so that they will be released 6988c2ecf20Sopenharmony_ci * when svc_process is done with them. 6998c2ecf20Sopenharmony_ci */ 7008c2ecf20Sopenharmony_ci for (page_no = 0; page_no < head->rc_page_count; page_no++) { 7018c2ecf20Sopenharmony_ci put_page(rqstp->rq_pages[page_no]); 7028c2ecf20Sopenharmony_ci rqstp->rq_pages[page_no] = head->rc_pages[page_no]; 7038c2ecf20Sopenharmony_ci } 7048c2ecf20Sopenharmony_ci head->rc_page_count = 0; 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci /* Point rq_arg.pages past header */ 7078c2ecf20Sopenharmony_ci rqstp->rq_arg.pages = &rqstp->rq_pages[head->rc_hdr_count]; 7088c2ecf20Sopenharmony_ci rqstp->rq_arg.page_len = head->rc_arg.page_len; 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_ci /* rq_respages starts after the last arg page */ 7118c2ecf20Sopenharmony_ci rqstp->rq_respages = &rqstp->rq_pages[page_no]; 7128c2ecf20Sopenharmony_ci rqstp->rq_next_page = rqstp->rq_respages + 1; 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_ci /* Rebuild rq_arg head and tail. */ 7158c2ecf20Sopenharmony_ci rqstp->rq_arg.head[0] = head->rc_arg.head[0]; 7168c2ecf20Sopenharmony_ci rqstp->rq_arg.tail[0] = head->rc_arg.tail[0]; 7178c2ecf20Sopenharmony_ci rqstp->rq_arg.len = head->rc_arg.len; 7188c2ecf20Sopenharmony_ci rqstp->rq_arg.buflen = head->rc_arg.buflen; 7198c2ecf20Sopenharmony_ci} 7208c2ecf20Sopenharmony_ci 7218c2ecf20Sopenharmony_cistatic void svc_rdma_send_error(struct svcxprt_rdma *rdma, 7228c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *rctxt, 7238c2ecf20Sopenharmony_ci int status) 7248c2ecf20Sopenharmony_ci{ 7258c2ecf20Sopenharmony_ci struct svc_rdma_send_ctxt *sctxt; 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci sctxt = svc_rdma_send_ctxt_get(rdma); 7288c2ecf20Sopenharmony_ci if (!sctxt) 7298c2ecf20Sopenharmony_ci return; 7308c2ecf20Sopenharmony_ci svc_rdma_send_error_msg(rdma, sctxt, rctxt, status); 7318c2ecf20Sopenharmony_ci} 7328c2ecf20Sopenharmony_ci 7338c2ecf20Sopenharmony_ci/* By convention, backchannel calls arrive via rdma_msg type 7348c2ecf20Sopenharmony_ci * messages, and never populate the chunk lists. This makes 7358c2ecf20Sopenharmony_ci * the RPC/RDMA header small and fixed in size, so it is 7368c2ecf20Sopenharmony_ci * straightforward to check the RPC header's direction field. 7378c2ecf20Sopenharmony_ci */ 7388c2ecf20Sopenharmony_cistatic bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, 7398c2ecf20Sopenharmony_ci __be32 *rdma_resp) 7408c2ecf20Sopenharmony_ci{ 7418c2ecf20Sopenharmony_ci __be32 *p; 7428c2ecf20Sopenharmony_ci 7438c2ecf20Sopenharmony_ci if (!xprt->xpt_bc_xprt) 7448c2ecf20Sopenharmony_ci return false; 7458c2ecf20Sopenharmony_ci 7468c2ecf20Sopenharmony_ci p = rdma_resp + 3; 7478c2ecf20Sopenharmony_ci if (*p++ != rdma_msg) 7488c2ecf20Sopenharmony_ci return false; 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_ci if (*p++ != xdr_zero) 7518c2ecf20Sopenharmony_ci return false; 7528c2ecf20Sopenharmony_ci if (*p++ != xdr_zero) 7538c2ecf20Sopenharmony_ci return false; 7548c2ecf20Sopenharmony_ci if (*p++ != xdr_zero) 7558c2ecf20Sopenharmony_ci return false; 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ci /* XID sanity */ 7588c2ecf20Sopenharmony_ci if (*p++ != *rdma_resp) 7598c2ecf20Sopenharmony_ci return false; 7608c2ecf20Sopenharmony_ci /* call direction */ 7618c2ecf20Sopenharmony_ci if (*p == cpu_to_be32(RPC_CALL)) 7628c2ecf20Sopenharmony_ci return false; 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_ci return true; 7658c2ecf20Sopenharmony_ci} 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci/** 7688c2ecf20Sopenharmony_ci * svc_rdma_recvfrom - Receive an RPC call 7698c2ecf20Sopenharmony_ci * @rqstp: request structure into which to receive an RPC Call 7708c2ecf20Sopenharmony_ci * 7718c2ecf20Sopenharmony_ci * Returns: 7728c2ecf20Sopenharmony_ci * The positive number of bytes in the RPC Call message, 7738c2ecf20Sopenharmony_ci * %0 if there were no Calls ready to return, 7748c2ecf20Sopenharmony_ci * %-EINVAL if the Read chunk data is too large, 7758c2ecf20Sopenharmony_ci * %-ENOMEM if rdma_rw context pool was exhausted, 7768c2ecf20Sopenharmony_ci * %-ENOTCONN if posting failed (connection is lost), 7778c2ecf20Sopenharmony_ci * %-EIO if rdma_rw initialization failed (DMA mapping, etc). 7788c2ecf20Sopenharmony_ci * 7798c2ecf20Sopenharmony_ci * Called in a loop when XPT_DATA is set. XPT_DATA is cleared only 7808c2ecf20Sopenharmony_ci * when there are no remaining ctxt's to process. 7818c2ecf20Sopenharmony_ci * 7828c2ecf20Sopenharmony_ci * The next ctxt is removed from the "receive" lists. 7838c2ecf20Sopenharmony_ci * 7848c2ecf20Sopenharmony_ci * - If the ctxt completes a Read, then finish assembling the Call 7858c2ecf20Sopenharmony_ci * message and return the number of bytes in the message. 7868c2ecf20Sopenharmony_ci * 7878c2ecf20Sopenharmony_ci * - If the ctxt completes a Receive, then construct the Call 7888c2ecf20Sopenharmony_ci * message from the contents of the Receive buffer. 7898c2ecf20Sopenharmony_ci * 7908c2ecf20Sopenharmony_ci * - If there are no Read chunks in this message, then finish 7918c2ecf20Sopenharmony_ci * assembling the Call message and return the number of bytes 7928c2ecf20Sopenharmony_ci * in the message. 7938c2ecf20Sopenharmony_ci * 7948c2ecf20Sopenharmony_ci * - If there are Read chunks in this message, post Read WRs to 7958c2ecf20Sopenharmony_ci * pull that payload and return 0. 7968c2ecf20Sopenharmony_ci */ 7978c2ecf20Sopenharmony_ciint svc_rdma_recvfrom(struct svc_rqst *rqstp) 7988c2ecf20Sopenharmony_ci{ 7998c2ecf20Sopenharmony_ci struct svc_xprt *xprt = rqstp->rq_xprt; 8008c2ecf20Sopenharmony_ci struct svcxprt_rdma *rdma_xprt = 8018c2ecf20Sopenharmony_ci container_of(xprt, struct svcxprt_rdma, sc_xprt); 8028c2ecf20Sopenharmony_ci struct svc_rdma_recv_ctxt *ctxt; 8038c2ecf20Sopenharmony_ci __be32 *p; 8048c2ecf20Sopenharmony_ci int ret; 8058c2ecf20Sopenharmony_ci 8068c2ecf20Sopenharmony_ci rqstp->rq_xprt_ctxt = NULL; 8078c2ecf20Sopenharmony_ci 8088c2ecf20Sopenharmony_ci spin_lock(&rdma_xprt->sc_rq_dto_lock); 8098c2ecf20Sopenharmony_ci ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q); 8108c2ecf20Sopenharmony_ci if (ctxt) { 8118c2ecf20Sopenharmony_ci list_del(&ctxt->rc_list); 8128c2ecf20Sopenharmony_ci spin_unlock(&rdma_xprt->sc_rq_dto_lock); 8138c2ecf20Sopenharmony_ci rdma_read_complete(rqstp, ctxt); 8148c2ecf20Sopenharmony_ci goto complete; 8158c2ecf20Sopenharmony_ci } 8168c2ecf20Sopenharmony_ci ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q); 8178c2ecf20Sopenharmony_ci if (!ctxt) { 8188c2ecf20Sopenharmony_ci /* No new incoming requests, terminate the loop */ 8198c2ecf20Sopenharmony_ci clear_bit(XPT_DATA, &xprt->xpt_flags); 8208c2ecf20Sopenharmony_ci spin_unlock(&rdma_xprt->sc_rq_dto_lock); 8218c2ecf20Sopenharmony_ci return 0; 8228c2ecf20Sopenharmony_ci } 8238c2ecf20Sopenharmony_ci list_del(&ctxt->rc_list); 8248c2ecf20Sopenharmony_ci spin_unlock(&rdma_xprt->sc_rq_dto_lock); 8258c2ecf20Sopenharmony_ci 8268c2ecf20Sopenharmony_ci atomic_inc(&rdma_stat_recv); 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci svc_rdma_build_arg_xdr(rqstp, ctxt); 8298c2ecf20Sopenharmony_ci 8308c2ecf20Sopenharmony_ci /* Prevent svc_xprt_release from releasing pages in rq_pages 8318c2ecf20Sopenharmony_ci * if we return 0 or an error. 8328c2ecf20Sopenharmony_ci */ 8338c2ecf20Sopenharmony_ci rqstp->rq_respages = rqstp->rq_pages; 8348c2ecf20Sopenharmony_ci rqstp->rq_next_page = rqstp->rq_respages; 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci p = (__be32 *)rqstp->rq_arg.head[0].iov_base; 8378c2ecf20Sopenharmony_ci ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt); 8388c2ecf20Sopenharmony_ci if (ret < 0) 8398c2ecf20Sopenharmony_ci goto out_err; 8408c2ecf20Sopenharmony_ci if (ret == 0) 8418c2ecf20Sopenharmony_ci goto out_drop; 8428c2ecf20Sopenharmony_ci rqstp->rq_xprt_hlen = ret; 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci if (svc_rdma_is_backchannel_reply(xprt, p)) 8458c2ecf20Sopenharmony_ci goto out_backchannel; 8468c2ecf20Sopenharmony_ci 8478c2ecf20Sopenharmony_ci svc_rdma_get_inv_rkey(rdma_xprt, ctxt); 8488c2ecf20Sopenharmony_ci 8498c2ecf20Sopenharmony_ci p += rpcrdma_fixed_maxsz; 8508c2ecf20Sopenharmony_ci if (*p != xdr_zero) 8518c2ecf20Sopenharmony_ci goto out_readchunk; 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_cicomplete: 8548c2ecf20Sopenharmony_ci rqstp->rq_xprt_ctxt = ctxt; 8558c2ecf20Sopenharmony_ci rqstp->rq_prot = IPPROTO_MAX; 8568c2ecf20Sopenharmony_ci svc_xprt_copy_addrs(rqstp, xprt); 8578c2ecf20Sopenharmony_ci return rqstp->rq_arg.len; 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ciout_readchunk: 8608c2ecf20Sopenharmony_ci ret = svc_rdma_recv_read_chunk(rdma_xprt, rqstp, ctxt, p); 8618c2ecf20Sopenharmony_ci if (ret < 0) 8628c2ecf20Sopenharmony_ci goto out_postfail; 8638c2ecf20Sopenharmony_ci return 0; 8648c2ecf20Sopenharmony_ci 8658c2ecf20Sopenharmony_ciout_err: 8668c2ecf20Sopenharmony_ci svc_rdma_send_error(rdma_xprt, ctxt, ret); 8678c2ecf20Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 8688c2ecf20Sopenharmony_ci return 0; 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_ciout_postfail: 8718c2ecf20Sopenharmony_ci if (ret == -EINVAL) 8728c2ecf20Sopenharmony_ci svc_rdma_send_error(rdma_xprt, ctxt, ret); 8738c2ecf20Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 8748c2ecf20Sopenharmony_ci return ret; 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ciout_backchannel: 8778c2ecf20Sopenharmony_ci svc_rdma_handle_bc_reply(rqstp, ctxt); 8788c2ecf20Sopenharmony_ciout_drop: 8798c2ecf20Sopenharmony_ci svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); 8808c2ecf20Sopenharmony_ci return 0; 8818c2ecf20Sopenharmony_ci} 882