162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2016-2018 Oracle. All rights reserved.
462306a36Sopenharmony_ci * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
562306a36Sopenharmony_ci * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * This software is available to you under a choice of one of two
862306a36Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
962306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
1062306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the BSD-type
1162306a36Sopenharmony_ci * license below:
1262306a36Sopenharmony_ci *
1362306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
1462306a36Sopenharmony_ci * modification, are permitted provided that the following conditions
1562306a36Sopenharmony_ci * are met:
1662306a36Sopenharmony_ci *
1762306a36Sopenharmony_ci *      Redistributions of source code must retain the above copyright
1862306a36Sopenharmony_ci *      notice, this list of conditions and the following disclaimer.
1962306a36Sopenharmony_ci *
2062306a36Sopenharmony_ci *      Redistributions in binary form must reproduce the above
2162306a36Sopenharmony_ci *      copyright notice, this list of conditions and the following
2262306a36Sopenharmony_ci *      disclaimer in the documentation and/or other materials provided
2362306a36Sopenharmony_ci *      with the distribution.
2462306a36Sopenharmony_ci *
2562306a36Sopenharmony_ci *      Neither the name of the Network Appliance, Inc. nor the names of
2662306a36Sopenharmony_ci *      its contributors may be used to endorse or promote products
2762306a36Sopenharmony_ci *      derived from this software without specific prior written
2862306a36Sopenharmony_ci *      permission.
2962306a36Sopenharmony_ci *
3062306a36Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
3162306a36Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
3262306a36Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
3362306a36Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
3462306a36Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
3562306a36Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
3662306a36Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
3762306a36Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
3862306a36Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
3962306a36Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
4062306a36Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4162306a36Sopenharmony_ci *
4262306a36Sopenharmony_ci * Author: Tom Tucker <tom@opengridcomputing.com>
4362306a36Sopenharmony_ci */
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci/* Operation
4662306a36Sopenharmony_ci *
4762306a36Sopenharmony_ci * The main entry point is svc_rdma_recvfrom. This is called from
4862306a36Sopenharmony_ci * svc_recv when the transport indicates there is incoming data to
4962306a36Sopenharmony_ci * be read. "Data Ready" is signaled when an RDMA Receive completes,
5062306a36Sopenharmony_ci * or when a set of RDMA Reads complete.
5162306a36Sopenharmony_ci *
5262306a36Sopenharmony_ci * An svc_rqst is passed in. This structure contains an array of
5362306a36Sopenharmony_ci * free pages (rq_pages) that will contain the incoming RPC message.
5462306a36Sopenharmony_ci *
5562306a36Sopenharmony_ci * Short messages are moved directly into svc_rqst::rq_arg, and
5662306a36Sopenharmony_ci * the RPC Call is ready to be processed by the Upper Layer.
5762306a36Sopenharmony_ci * svc_rdma_recvfrom returns the length of the RPC Call message,
5862306a36Sopenharmony_ci * completing the reception of the RPC Call.
5962306a36Sopenharmony_ci *
6062306a36Sopenharmony_ci * However, when an incoming message has Read chunks,
6162306a36Sopenharmony_ci * svc_rdma_recvfrom must post RDMA Reads to pull the RPC Call's
6262306a36Sopenharmony_ci * data payload from the client. svc_rdma_recvfrom sets up the
6362306a36Sopenharmony_ci * RDMA Reads using pages in svc_rqst::rq_pages, which are
6462306a36Sopenharmony_ci * transferred to an svc_rdma_recv_ctxt for the duration of the
6562306a36Sopenharmony_ci * I/O. svc_rdma_recvfrom then returns zero, since the RPC message
6662306a36Sopenharmony_ci * is still not yet ready.
6762306a36Sopenharmony_ci *
6862306a36Sopenharmony_ci * When the Read chunk payloads have become available on the
6962306a36Sopenharmony_ci * server, "Data Ready" is raised again, and svc_recv calls
7062306a36Sopenharmony_ci * svc_rdma_recvfrom again. This second call may use a different
7162306a36Sopenharmony_ci * svc_rqst than the first one, thus any information that needs
7262306a36Sopenharmony_ci * to be preserved across these two calls is kept in an
7362306a36Sopenharmony_ci * svc_rdma_recv_ctxt.
7462306a36Sopenharmony_ci *
7562306a36Sopenharmony_ci * The second call to svc_rdma_recvfrom performs final assembly
7662306a36Sopenharmony_ci * of the RPC Call message, using the RDMA Read sink pages kept in
7762306a36Sopenharmony_ci * the svc_rdma_recv_ctxt. The xdr_buf is copied from the
7862306a36Sopenharmony_ci * svc_rdma_recv_ctxt to the second svc_rqst. The second call returns
7962306a36Sopenharmony_ci * the length of the completed RPC Call message.
8062306a36Sopenharmony_ci *
8162306a36Sopenharmony_ci * Page Management
8262306a36Sopenharmony_ci *
8362306a36Sopenharmony_ci * Pages under I/O must be transferred from the first svc_rqst to an
8462306a36Sopenharmony_ci * svc_rdma_recv_ctxt before the first svc_rdma_recvfrom call returns.
8562306a36Sopenharmony_ci *
8662306a36Sopenharmony_ci * The first svc_rqst supplies pages for RDMA Reads. These are moved
8762306a36Sopenharmony_ci * from rqstp::rq_pages into ctxt::pages. The consumed elements of
8862306a36Sopenharmony_ci * the rq_pages array are set to NULL and refilled with the first
8962306a36Sopenharmony_ci * svc_rdma_recvfrom call returns.
9062306a36Sopenharmony_ci *
9162306a36Sopenharmony_ci * During the second svc_rdma_recvfrom call, RDMA Read sink pages
9262306a36Sopenharmony_ci * are transferred from the svc_rdma_recv_ctxt to the second svc_rqst.
9362306a36Sopenharmony_ci */
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci#include <linux/slab.h>
9662306a36Sopenharmony_ci#include <linux/spinlock.h>
9762306a36Sopenharmony_ci#include <asm/unaligned.h>
9862306a36Sopenharmony_ci#include <rdma/ib_verbs.h>
9962306a36Sopenharmony_ci#include <rdma/rdma_cm.h>
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci#include <linux/sunrpc/xdr.h>
10262306a36Sopenharmony_ci#include <linux/sunrpc/debug.h>
10362306a36Sopenharmony_ci#include <linux/sunrpc/rpc_rdma.h>
10462306a36Sopenharmony_ci#include <linux/sunrpc/svc_rdma.h>
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci#include "xprt_rdma.h"
10762306a36Sopenharmony_ci#include <trace/events/rpcrdma.h>
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_cistatic void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc);
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_cistatic inline struct svc_rdma_recv_ctxt *
11262306a36Sopenharmony_cisvc_rdma_next_recv_ctxt(struct list_head *list)
11362306a36Sopenharmony_ci{
11462306a36Sopenharmony_ci	return list_first_entry_or_null(list, struct svc_rdma_recv_ctxt,
11562306a36Sopenharmony_ci					rc_list);
11662306a36Sopenharmony_ci}
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_cistatic void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
11962306a36Sopenharmony_ci				   struct rpc_rdma_cid *cid)
12062306a36Sopenharmony_ci{
12162306a36Sopenharmony_ci	cid->ci_queue_id = rdma->sc_rq_cq->res.id;
12262306a36Sopenharmony_ci	cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
12362306a36Sopenharmony_ci}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_cistatic struct svc_rdma_recv_ctxt *
12662306a36Sopenharmony_cisvc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
12762306a36Sopenharmony_ci{
12862306a36Sopenharmony_ci	int node = ibdev_to_node(rdma->sc_cm_id->device);
12962306a36Sopenharmony_ci	struct svc_rdma_recv_ctxt *ctxt;
13062306a36Sopenharmony_ci	dma_addr_t addr;
13162306a36Sopenharmony_ci	void *buffer;
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
13462306a36Sopenharmony_ci	if (!ctxt)
13562306a36Sopenharmony_ci		goto fail0;
13662306a36Sopenharmony_ci	buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
13762306a36Sopenharmony_ci	if (!buffer)
13862306a36Sopenharmony_ci		goto fail1;
13962306a36Sopenharmony_ci	addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
14062306a36Sopenharmony_ci				 rdma->sc_max_req_size, DMA_FROM_DEVICE);
14162306a36Sopenharmony_ci	if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
14262306a36Sopenharmony_ci		goto fail2;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid);
14562306a36Sopenharmony_ci	pcl_init(&ctxt->rc_call_pcl);
14662306a36Sopenharmony_ci	pcl_init(&ctxt->rc_read_pcl);
14762306a36Sopenharmony_ci	pcl_init(&ctxt->rc_write_pcl);
14862306a36Sopenharmony_ci	pcl_init(&ctxt->rc_reply_pcl);
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	ctxt->rc_recv_wr.next = NULL;
15162306a36Sopenharmony_ci	ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe;
15262306a36Sopenharmony_ci	ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge;
15362306a36Sopenharmony_ci	ctxt->rc_recv_wr.num_sge = 1;
15462306a36Sopenharmony_ci	ctxt->rc_cqe.done = svc_rdma_wc_receive;
15562306a36Sopenharmony_ci	ctxt->rc_recv_sge.addr = addr;
15662306a36Sopenharmony_ci	ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
15762306a36Sopenharmony_ci	ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
15862306a36Sopenharmony_ci	ctxt->rc_recv_buf = buffer;
15962306a36Sopenharmony_ci	return ctxt;
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_cifail2:
16262306a36Sopenharmony_ci	kfree(buffer);
16362306a36Sopenharmony_cifail1:
16462306a36Sopenharmony_ci	kfree(ctxt);
16562306a36Sopenharmony_cifail0:
16662306a36Sopenharmony_ci	return NULL;
16762306a36Sopenharmony_ci}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_cistatic void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
17062306a36Sopenharmony_ci				       struct svc_rdma_recv_ctxt *ctxt)
17162306a36Sopenharmony_ci{
17262306a36Sopenharmony_ci	ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr,
17362306a36Sopenharmony_ci			    ctxt->rc_recv_sge.length, DMA_FROM_DEVICE);
17462306a36Sopenharmony_ci	kfree(ctxt->rc_recv_buf);
17562306a36Sopenharmony_ci	kfree(ctxt);
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci/**
17962306a36Sopenharmony_ci * svc_rdma_recv_ctxts_destroy - Release all recv_ctxt's for an xprt
18062306a36Sopenharmony_ci * @rdma: svcxprt_rdma being torn down
18162306a36Sopenharmony_ci *
18262306a36Sopenharmony_ci */
18362306a36Sopenharmony_civoid svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
18462306a36Sopenharmony_ci{
18562306a36Sopenharmony_ci	struct svc_rdma_recv_ctxt *ctxt;
18662306a36Sopenharmony_ci	struct llist_node *node;
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {
18962306a36Sopenharmony_ci		ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
19062306a36Sopenharmony_ci		svc_rdma_recv_ctxt_destroy(rdma, ctxt);
19162306a36Sopenharmony_ci	}
19262306a36Sopenharmony_ci}
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci/**
19562306a36Sopenharmony_ci * svc_rdma_recv_ctxt_get - Allocate a recv_ctxt
19662306a36Sopenharmony_ci * @rdma: controlling svcxprt_rdma
19762306a36Sopenharmony_ci *
19862306a36Sopenharmony_ci * Returns a recv_ctxt or (rarely) NULL if none are available.
19962306a36Sopenharmony_ci */
20062306a36Sopenharmony_cistruct svc_rdma_recv_ctxt *svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
20162306a36Sopenharmony_ci{
20262306a36Sopenharmony_ci	struct svc_rdma_recv_ctxt *ctxt;
20362306a36Sopenharmony_ci	struct llist_node *node;
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	node = llist_del_first(&rdma->sc_recv_ctxts);
20662306a36Sopenharmony_ci	if (!node)
20762306a36Sopenharmony_ci		goto out_empty;
20862306a36Sopenharmony_ci	ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ciout:
21162306a36Sopenharmony_ci	ctxt->rc_page_count = 0;
21262306a36Sopenharmony_ci	return ctxt;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ciout_empty:
21562306a36Sopenharmony_ci	ctxt = svc_rdma_recv_ctxt_alloc(rdma);
21662306a36Sopenharmony_ci	if (!ctxt)
21762306a36Sopenharmony_ci		return NULL;
21862306a36Sopenharmony_ci	goto out;
21962306a36Sopenharmony_ci}
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci/**
22262306a36Sopenharmony_ci * svc_rdma_recv_ctxt_put - Return recv_ctxt to free list
22362306a36Sopenharmony_ci * @rdma: controlling svcxprt_rdma
22462306a36Sopenharmony_ci * @ctxt: object to return to the free list
22562306a36Sopenharmony_ci *
22662306a36Sopenharmony_ci */
22762306a36Sopenharmony_civoid svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
22862306a36Sopenharmony_ci			    struct svc_rdma_recv_ctxt *ctxt)
22962306a36Sopenharmony_ci{
23062306a36Sopenharmony_ci	pcl_free(&ctxt->rc_call_pcl);
23162306a36Sopenharmony_ci	pcl_free(&ctxt->rc_read_pcl);
23262306a36Sopenharmony_ci	pcl_free(&ctxt->rc_write_pcl);
23362306a36Sopenharmony_ci	pcl_free(&ctxt->rc_reply_pcl);
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
23662306a36Sopenharmony_ci}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci/**
23962306a36Sopenharmony_ci * svc_rdma_release_ctxt - Release transport-specific per-rqst resources
24062306a36Sopenharmony_ci * @xprt: the transport which owned the context
24162306a36Sopenharmony_ci * @vctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
24262306a36Sopenharmony_ci *
24362306a36Sopenharmony_ci * Ensure that the recv_ctxt is released whether or not a Reply
24462306a36Sopenharmony_ci * was sent. For example, the client could close the connection,
24562306a36Sopenharmony_ci * or svc_process could drop an RPC, before the Reply is sent.
24662306a36Sopenharmony_ci */
24762306a36Sopenharmony_civoid svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
24862306a36Sopenharmony_ci{
24962306a36Sopenharmony_ci	struct svc_rdma_recv_ctxt *ctxt = vctxt;
25062306a36Sopenharmony_ci	struct svcxprt_rdma *rdma =
25162306a36Sopenharmony_ci		container_of(xprt, struct svcxprt_rdma, sc_xprt);
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	if (ctxt)
25462306a36Sopenharmony_ci		svc_rdma_recv_ctxt_put(rdma, ctxt);
25562306a36Sopenharmony_ci}
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_cistatic bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
25862306a36Sopenharmony_ci				   unsigned int wanted)
25962306a36Sopenharmony_ci{
26062306a36Sopenharmony_ci	const struct ib_recv_wr *bad_wr = NULL;
26162306a36Sopenharmony_ci	struct svc_rdma_recv_ctxt *ctxt;
26262306a36Sopenharmony_ci	struct ib_recv_wr *recv_chain;
26362306a36Sopenharmony_ci	int ret;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
26662306a36Sopenharmony_ci		return false;
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	recv_chain = NULL;
26962306a36Sopenharmony_ci	while (wanted--) {
27062306a36Sopenharmony_ci		ctxt = svc_rdma_recv_ctxt_get(rdma);
27162306a36Sopenharmony_ci		if (!ctxt)
27262306a36Sopenharmony_ci			break;
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci		trace_svcrdma_post_recv(ctxt);
27562306a36Sopenharmony_ci		ctxt->rc_recv_wr.next = recv_chain;
27662306a36Sopenharmony_ci		recv_chain = &ctxt->rc_recv_wr;
27762306a36Sopenharmony_ci		rdma->sc_pending_recvs++;
27862306a36Sopenharmony_ci	}
27962306a36Sopenharmony_ci	if (!recv_chain)
28062306a36Sopenharmony_ci		return false;
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci	ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
28362306a36Sopenharmony_ci	if (ret)
28462306a36Sopenharmony_ci		goto err_free;
28562306a36Sopenharmony_ci	return true;
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_cierr_free:
28862306a36Sopenharmony_ci	trace_svcrdma_rq_post_err(rdma, ret);
28962306a36Sopenharmony_ci	while (bad_wr) {
29062306a36Sopenharmony_ci		ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt,
29162306a36Sopenharmony_ci				    rc_recv_wr);
29262306a36Sopenharmony_ci		bad_wr = bad_wr->next;
29362306a36Sopenharmony_ci		svc_rdma_recv_ctxt_put(rdma, ctxt);
29462306a36Sopenharmony_ci	}
29562306a36Sopenharmony_ci	/* Since we're destroying the xprt, no need to reset
29662306a36Sopenharmony_ci	 * sc_pending_recvs. */
29762306a36Sopenharmony_ci	return false;
29862306a36Sopenharmony_ci}
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci/**
30162306a36Sopenharmony_ci * svc_rdma_post_recvs - Post initial set of Recv WRs
30262306a36Sopenharmony_ci * @rdma: fresh svcxprt_rdma
30362306a36Sopenharmony_ci *
30462306a36Sopenharmony_ci * Returns true if successful, otherwise false.
30562306a36Sopenharmony_ci */
30662306a36Sopenharmony_cibool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
30762306a36Sopenharmony_ci{
30862306a36Sopenharmony_ci	return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests);
30962306a36Sopenharmony_ci}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci/**
31262306a36Sopenharmony_ci * svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
31362306a36Sopenharmony_ci * @cq: Completion Queue context
31462306a36Sopenharmony_ci * @wc: Work Completion object
31562306a36Sopenharmony_ci *
31662306a36Sopenharmony_ci */
31762306a36Sopenharmony_cistatic void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
31862306a36Sopenharmony_ci{
31962306a36Sopenharmony_ci	struct svcxprt_rdma *rdma = cq->cq_context;
32062306a36Sopenharmony_ci	struct ib_cqe *cqe = wc->wr_cqe;
32162306a36Sopenharmony_ci	struct svc_rdma_recv_ctxt *ctxt;
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci	rdma->sc_pending_recvs--;
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	/* WARNING: Only wc->wr_cqe and wc->status are reliable */
32662306a36Sopenharmony_ci	ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	if (wc->status != IB_WC_SUCCESS)
32962306a36Sopenharmony_ci		goto flushed;
33062306a36Sopenharmony_ci	trace_svcrdma_wc_recv(wc, &ctxt->rc_cid);
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci	/* If receive posting fails, the connection is about to be
33362306a36Sopenharmony_ci	 * lost anyway. The server will not be able to send a reply
33462306a36Sopenharmony_ci	 * for this RPC, and the client will retransmit this RPC
33562306a36Sopenharmony_ci	 * anyway when it reconnects.
33662306a36Sopenharmony_ci	 *
33762306a36Sopenharmony_ci	 * Therefore we drop the Receive, even if status was SUCCESS
33862306a36Sopenharmony_ci	 * to reduce the likelihood of replayed requests once the
33962306a36Sopenharmony_ci	 * client reconnects.
34062306a36Sopenharmony_ci	 */
34162306a36Sopenharmony_ci	if (rdma->sc_pending_recvs < rdma->sc_max_requests)
34262306a36Sopenharmony_ci		if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch))
34362306a36Sopenharmony_ci			goto dropped;
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci	/* All wc fields are now known to be valid */
34662306a36Sopenharmony_ci	ctxt->rc_byte_len = wc->byte_len;
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	spin_lock(&rdma->sc_rq_dto_lock);
34962306a36Sopenharmony_ci	list_add_tail(&ctxt->rc_list, &rdma->sc_rq_dto_q);
35062306a36Sopenharmony_ci	/* Note the unlock pairs with the smp_rmb in svc_xprt_ready: */
35162306a36Sopenharmony_ci	set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
35262306a36Sopenharmony_ci	spin_unlock(&rdma->sc_rq_dto_lock);
35362306a36Sopenharmony_ci	if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
35462306a36Sopenharmony_ci		svc_xprt_enqueue(&rdma->sc_xprt);
35562306a36Sopenharmony_ci	return;
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ciflushed:
35862306a36Sopenharmony_ci	if (wc->status == IB_WC_WR_FLUSH_ERR)
35962306a36Sopenharmony_ci		trace_svcrdma_wc_recv_flush(wc, &ctxt->rc_cid);
36062306a36Sopenharmony_ci	else
36162306a36Sopenharmony_ci		trace_svcrdma_wc_recv_err(wc, &ctxt->rc_cid);
36262306a36Sopenharmony_cidropped:
36362306a36Sopenharmony_ci	svc_rdma_recv_ctxt_put(rdma, ctxt);
36462306a36Sopenharmony_ci	svc_xprt_deferred_close(&rdma->sc_xprt);
36562306a36Sopenharmony_ci}
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci/**
36862306a36Sopenharmony_ci * svc_rdma_flush_recv_queues - Drain pending Receive work
36962306a36Sopenharmony_ci * @rdma: svcxprt_rdma being shut down
37062306a36Sopenharmony_ci *
37162306a36Sopenharmony_ci */
37262306a36Sopenharmony_civoid svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma)
37362306a36Sopenharmony_ci{
37462306a36Sopenharmony_ci	struct svc_rdma_recv_ctxt *ctxt;
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_ci	while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) {
37762306a36Sopenharmony_ci		list_del(&ctxt->rc_list);
37862306a36Sopenharmony_ci		svc_rdma_recv_ctxt_put(rdma, ctxt);
37962306a36Sopenharmony_ci	}
38062306a36Sopenharmony_ci}
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_cistatic void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
38362306a36Sopenharmony_ci				   struct svc_rdma_recv_ctxt *ctxt)
38462306a36Sopenharmony_ci{
38562306a36Sopenharmony_ci	struct xdr_buf *arg = &rqstp->rq_arg;
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	arg->head[0].iov_base = ctxt->rc_recv_buf;
38862306a36Sopenharmony_ci	arg->head[0].iov_len = ctxt->rc_byte_len;
38962306a36Sopenharmony_ci	arg->tail[0].iov_base = NULL;
39062306a36Sopenharmony_ci	arg->tail[0].iov_len = 0;
39162306a36Sopenharmony_ci	arg->page_len = 0;
39262306a36Sopenharmony_ci	arg->page_base = 0;
39362306a36Sopenharmony_ci	arg->buflen = ctxt->rc_byte_len;
39462306a36Sopenharmony_ci	arg->len = ctxt->rc_byte_len;
39562306a36Sopenharmony_ci}
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci/**
39862306a36Sopenharmony_ci * xdr_count_read_segments - Count number of Read segments in Read list
39962306a36Sopenharmony_ci * @rctxt: Ingress receive context
40062306a36Sopenharmony_ci * @p: Start of an un-decoded Read list
40162306a36Sopenharmony_ci *
40262306a36Sopenharmony_ci * Before allocating anything, ensure the ingress Read list is safe
40362306a36Sopenharmony_ci * to use.
40462306a36Sopenharmony_ci *
40562306a36Sopenharmony_ci * The segment count is limited to how many segments can fit in the
40662306a36Sopenharmony_ci * transport header without overflowing the buffer. That's about 40
40762306a36Sopenharmony_ci * Read segments for a 1KB inline threshold.
40862306a36Sopenharmony_ci *
40962306a36Sopenharmony_ci * Return values:
41062306a36Sopenharmony_ci *   %true: Read list is valid. @rctxt's xdr_stream is updated to point
41162306a36Sopenharmony_ci *	    to the first byte past the Read list. rc_read_pcl and
41262306a36Sopenharmony_ci *	    rc_call_pcl cl_count fields are set to the number of
41362306a36Sopenharmony_ci *	    Read segments in the list.
41462306a36Sopenharmony_ci *  %false: Read list is corrupt. @rctxt's xdr_stream is left in an
41562306a36Sopenharmony_ci *	    unknown state.
41662306a36Sopenharmony_ci */
41762306a36Sopenharmony_cistatic bool xdr_count_read_segments(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
41862306a36Sopenharmony_ci{
41962306a36Sopenharmony_ci	rctxt->rc_call_pcl.cl_count = 0;
42062306a36Sopenharmony_ci	rctxt->rc_read_pcl.cl_count = 0;
42162306a36Sopenharmony_ci	while (xdr_item_is_present(p)) {
42262306a36Sopenharmony_ci		u32 position, handle, length;
42362306a36Sopenharmony_ci		u64 offset;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci		p = xdr_inline_decode(&rctxt->rc_stream,
42662306a36Sopenharmony_ci				      rpcrdma_readseg_maxsz * sizeof(*p));
42762306a36Sopenharmony_ci		if (!p)
42862306a36Sopenharmony_ci			return false;
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci		xdr_decode_read_segment(p, &position, &handle,
43162306a36Sopenharmony_ci					    &length, &offset);
43262306a36Sopenharmony_ci		if (position) {
43362306a36Sopenharmony_ci			if (position & 3)
43462306a36Sopenharmony_ci				return false;
43562306a36Sopenharmony_ci			++rctxt->rc_read_pcl.cl_count;
43662306a36Sopenharmony_ci		} else {
43762306a36Sopenharmony_ci			++rctxt->rc_call_pcl.cl_count;
43862306a36Sopenharmony_ci		}
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci		p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
44162306a36Sopenharmony_ci		if (!p)
44262306a36Sopenharmony_ci			return false;
44362306a36Sopenharmony_ci	}
44462306a36Sopenharmony_ci	return true;
44562306a36Sopenharmony_ci}
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci/* Sanity check the Read list.
44862306a36Sopenharmony_ci *
44962306a36Sopenharmony_ci * Sanity checks:
45062306a36Sopenharmony_ci * - Read list does not overflow Receive buffer.
45162306a36Sopenharmony_ci * - Chunk size limited by largest NFS data payload.
45262306a36Sopenharmony_ci *
45362306a36Sopenharmony_ci * Return values:
45462306a36Sopenharmony_ci *   %true: Read list is valid. @rctxt's xdr_stream is updated
45562306a36Sopenharmony_ci *	    to point to the first byte past the Read list.
45662306a36Sopenharmony_ci *  %false: Read list is corrupt. @rctxt's xdr_stream is left
45762306a36Sopenharmony_ci *	    in an unknown state.
45862306a36Sopenharmony_ci */
45962306a36Sopenharmony_cistatic bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)
46062306a36Sopenharmony_ci{
46162306a36Sopenharmony_ci	__be32 *p;
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
46462306a36Sopenharmony_ci	if (!p)
46562306a36Sopenharmony_ci		return false;
46662306a36Sopenharmony_ci	if (!xdr_count_read_segments(rctxt, p))
46762306a36Sopenharmony_ci		return false;
46862306a36Sopenharmony_ci	if (!pcl_alloc_call(rctxt, p))
46962306a36Sopenharmony_ci		return false;
47062306a36Sopenharmony_ci	return pcl_alloc_read(rctxt, p);
47162306a36Sopenharmony_ci}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_cistatic bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt)
47462306a36Sopenharmony_ci{
47562306a36Sopenharmony_ci	u32 segcount;
47662306a36Sopenharmony_ci	__be32 *p;
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	if (xdr_stream_decode_u32(&rctxt->rc_stream, &segcount))
47962306a36Sopenharmony_ci		return false;
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	/* A bogus segcount causes this buffer overflow check to fail. */
48262306a36Sopenharmony_ci	p = xdr_inline_decode(&rctxt->rc_stream,
48362306a36Sopenharmony_ci			      segcount * rpcrdma_segment_maxsz * sizeof(*p));
48462306a36Sopenharmony_ci	return p != NULL;
48562306a36Sopenharmony_ci}
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci/**
48862306a36Sopenharmony_ci * xdr_count_write_chunks - Count number of Write chunks in Write list
48962306a36Sopenharmony_ci * @rctxt: Received header and decoding state
49062306a36Sopenharmony_ci * @p: start of an un-decoded Write list
49162306a36Sopenharmony_ci *
49262306a36Sopenharmony_ci * Before allocating anything, ensure the ingress Write list is
49362306a36Sopenharmony_ci * safe to use.
49462306a36Sopenharmony_ci *
49562306a36Sopenharmony_ci * Return values:
49662306a36Sopenharmony_ci *       %true: Write list is valid. @rctxt's xdr_stream is updated
49762306a36Sopenharmony_ci *		to point to the first byte past the Write list, and
49862306a36Sopenharmony_ci *		the number of Write chunks is in rc_write_pcl.cl_count.
49962306a36Sopenharmony_ci *      %false: Write list is corrupt. @rctxt's xdr_stream is left
50062306a36Sopenharmony_ci *		in an indeterminate state.
50162306a36Sopenharmony_ci */
50262306a36Sopenharmony_cistatic bool xdr_count_write_chunks(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
50362306a36Sopenharmony_ci{
50462306a36Sopenharmony_ci	rctxt->rc_write_pcl.cl_count = 0;
50562306a36Sopenharmony_ci	while (xdr_item_is_present(p)) {
50662306a36Sopenharmony_ci		if (!xdr_check_write_chunk(rctxt))
50762306a36Sopenharmony_ci			return false;
50862306a36Sopenharmony_ci		++rctxt->rc_write_pcl.cl_count;
50962306a36Sopenharmony_ci		p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
51062306a36Sopenharmony_ci		if (!p)
51162306a36Sopenharmony_ci			return false;
51262306a36Sopenharmony_ci	}
51362306a36Sopenharmony_ci	return true;
51462306a36Sopenharmony_ci}
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci/* Sanity check the Write list.
51762306a36Sopenharmony_ci *
51862306a36Sopenharmony_ci * Implementation limits:
51962306a36Sopenharmony_ci * - This implementation currently supports only one Write chunk.
52062306a36Sopenharmony_ci *
52162306a36Sopenharmony_ci * Sanity checks:
52262306a36Sopenharmony_ci * - Write list does not overflow Receive buffer.
52362306a36Sopenharmony_ci * - Chunk size limited by largest NFS data payload.
52462306a36Sopenharmony_ci *
52562306a36Sopenharmony_ci * Return values:
52662306a36Sopenharmony_ci *       %true: Write list is valid. @rctxt's xdr_stream is updated
52762306a36Sopenharmony_ci *		to point to the first byte past the Write list.
52862306a36Sopenharmony_ci *      %false: Write list is corrupt. @rctxt's xdr_stream is left
52962306a36Sopenharmony_ci *		in an unknown state.
53062306a36Sopenharmony_ci */
53162306a36Sopenharmony_cistatic bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)
53262306a36Sopenharmony_ci{
53362306a36Sopenharmony_ci	__be32 *p;
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
53662306a36Sopenharmony_ci	if (!p)
53762306a36Sopenharmony_ci		return false;
53862306a36Sopenharmony_ci	if (!xdr_count_write_chunks(rctxt, p))
53962306a36Sopenharmony_ci		return false;
54062306a36Sopenharmony_ci	if (!pcl_alloc_write(rctxt, &rctxt->rc_write_pcl, p))
54162306a36Sopenharmony_ci		return false;
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	rctxt->rc_cur_result_payload = pcl_first_chunk(&rctxt->rc_write_pcl);
54462306a36Sopenharmony_ci	return true;
54562306a36Sopenharmony_ci}
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci/* Sanity check the Reply chunk.
54862306a36Sopenharmony_ci *
54962306a36Sopenharmony_ci * Sanity checks:
55062306a36Sopenharmony_ci * - Reply chunk does not overflow Receive buffer.
55162306a36Sopenharmony_ci * - Chunk size limited by largest NFS data payload.
55262306a36Sopenharmony_ci *
55362306a36Sopenharmony_ci * Return values:
55462306a36Sopenharmony_ci *       %true: Reply chunk is valid. @rctxt's xdr_stream is updated
55562306a36Sopenharmony_ci *		to point to the first byte past the Reply chunk.
55662306a36Sopenharmony_ci *      %false: Reply chunk is corrupt. @rctxt's xdr_stream is left
55762306a36Sopenharmony_ci *		in an unknown state.
55862306a36Sopenharmony_ci */
55962306a36Sopenharmony_cistatic bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)
56062306a36Sopenharmony_ci{
56162306a36Sopenharmony_ci	__be32 *p;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
56462306a36Sopenharmony_ci	if (!p)
56562306a36Sopenharmony_ci		return false;
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci	if (!xdr_item_is_present(p))
56862306a36Sopenharmony_ci		return true;
56962306a36Sopenharmony_ci	if (!xdr_check_write_chunk(rctxt))
57062306a36Sopenharmony_ci		return false;
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci	rctxt->rc_reply_pcl.cl_count = 1;
57362306a36Sopenharmony_ci	return pcl_alloc_write(rctxt, &rctxt->rc_reply_pcl, p);
57462306a36Sopenharmony_ci}
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci/* RPC-over-RDMA Version One private extension: Remote Invalidation.
57762306a36Sopenharmony_ci * Responder's choice: requester signals it can handle Send With
57862306a36Sopenharmony_ci * Invalidate, and responder chooses one R_key to invalidate.
57962306a36Sopenharmony_ci *
58062306a36Sopenharmony_ci * If there is exactly one distinct R_key in the received transport
58162306a36Sopenharmony_ci * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero.
58262306a36Sopenharmony_ci */
58362306a36Sopenharmony_cistatic void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
58462306a36Sopenharmony_ci				  struct svc_rdma_recv_ctxt *ctxt)
58562306a36Sopenharmony_ci{
58662306a36Sopenharmony_ci	struct svc_rdma_segment *segment;
58762306a36Sopenharmony_ci	struct svc_rdma_chunk *chunk;
58862306a36Sopenharmony_ci	u32 inv_rkey;
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci	ctxt->rc_inv_rkey = 0;
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ci	if (!rdma->sc_snd_w_inv)
59362306a36Sopenharmony_ci		return;
59462306a36Sopenharmony_ci
59562306a36Sopenharmony_ci	inv_rkey = 0;
59662306a36Sopenharmony_ci	pcl_for_each_chunk(chunk, &ctxt->rc_call_pcl) {
59762306a36Sopenharmony_ci		pcl_for_each_segment(segment, chunk) {
59862306a36Sopenharmony_ci			if (inv_rkey == 0)
59962306a36Sopenharmony_ci				inv_rkey = segment->rs_handle;
60062306a36Sopenharmony_ci			else if (inv_rkey != segment->rs_handle)
60162306a36Sopenharmony_ci				return;
60262306a36Sopenharmony_ci		}
60362306a36Sopenharmony_ci	}
60462306a36Sopenharmony_ci	pcl_for_each_chunk(chunk, &ctxt->rc_read_pcl) {
60562306a36Sopenharmony_ci		pcl_for_each_segment(segment, chunk) {
60662306a36Sopenharmony_ci			if (inv_rkey == 0)
60762306a36Sopenharmony_ci				inv_rkey = segment->rs_handle;
60862306a36Sopenharmony_ci			else if (inv_rkey != segment->rs_handle)
60962306a36Sopenharmony_ci				return;
61062306a36Sopenharmony_ci		}
61162306a36Sopenharmony_ci	}
61262306a36Sopenharmony_ci	pcl_for_each_chunk(chunk, &ctxt->rc_write_pcl) {
61362306a36Sopenharmony_ci		pcl_for_each_segment(segment, chunk) {
61462306a36Sopenharmony_ci			if (inv_rkey == 0)
61562306a36Sopenharmony_ci				inv_rkey = segment->rs_handle;
61662306a36Sopenharmony_ci			else if (inv_rkey != segment->rs_handle)
61762306a36Sopenharmony_ci				return;
61862306a36Sopenharmony_ci		}
61962306a36Sopenharmony_ci	}
62062306a36Sopenharmony_ci	pcl_for_each_chunk(chunk, &ctxt->rc_reply_pcl) {
62162306a36Sopenharmony_ci		pcl_for_each_segment(segment, chunk) {
62262306a36Sopenharmony_ci			if (inv_rkey == 0)
62362306a36Sopenharmony_ci				inv_rkey = segment->rs_handle;
62462306a36Sopenharmony_ci			else if (inv_rkey != segment->rs_handle)
62562306a36Sopenharmony_ci				return;
62662306a36Sopenharmony_ci		}
62762306a36Sopenharmony_ci	}
62862306a36Sopenharmony_ci	ctxt->rc_inv_rkey = inv_rkey;
62962306a36Sopenharmony_ci}
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_ci/**
63262306a36Sopenharmony_ci * svc_rdma_xdr_decode_req - Decode the transport header
63362306a36Sopenharmony_ci * @rq_arg: xdr_buf containing ingress RPC/RDMA message
63462306a36Sopenharmony_ci * @rctxt: state of decoding
63562306a36Sopenharmony_ci *
63662306a36Sopenharmony_ci * On entry, xdr->head[0].iov_base points to first byte of the
63762306a36Sopenharmony_ci * RPC-over-RDMA transport header.
63862306a36Sopenharmony_ci *
63962306a36Sopenharmony_ci * On successful exit, head[0] points to first byte past the
64062306a36Sopenharmony_ci * RPC-over-RDMA header. For RDMA_MSG, this is the RPC message.
64162306a36Sopenharmony_ci *
64262306a36Sopenharmony_ci * The length of the RPC-over-RDMA header is returned.
64362306a36Sopenharmony_ci *
64462306a36Sopenharmony_ci * Assumptions:
64562306a36Sopenharmony_ci * - The transport header is entirely contained in the head iovec.
64662306a36Sopenharmony_ci */
64762306a36Sopenharmony_cistatic int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
64862306a36Sopenharmony_ci				   struct svc_rdma_recv_ctxt *rctxt)
64962306a36Sopenharmony_ci{
65062306a36Sopenharmony_ci	__be32 *p, *rdma_argp;
65162306a36Sopenharmony_ci	unsigned int hdr_len;
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	rdma_argp = rq_arg->head[0].iov_base;
65462306a36Sopenharmony_ci	xdr_init_decode(&rctxt->rc_stream, rq_arg, rdma_argp, NULL);
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ci	p = xdr_inline_decode(&rctxt->rc_stream,
65762306a36Sopenharmony_ci			      rpcrdma_fixed_maxsz * sizeof(*p));
65862306a36Sopenharmony_ci	if (unlikely(!p))
65962306a36Sopenharmony_ci		goto out_short;
66062306a36Sopenharmony_ci	p++;
66162306a36Sopenharmony_ci	if (*p != rpcrdma_version)
66262306a36Sopenharmony_ci		goto out_version;
66362306a36Sopenharmony_ci	p += 2;
66462306a36Sopenharmony_ci	rctxt->rc_msgtype = *p;
66562306a36Sopenharmony_ci	switch (rctxt->rc_msgtype) {
66662306a36Sopenharmony_ci	case rdma_msg:
66762306a36Sopenharmony_ci		break;
66862306a36Sopenharmony_ci	case rdma_nomsg:
66962306a36Sopenharmony_ci		break;
67062306a36Sopenharmony_ci	case rdma_done:
67162306a36Sopenharmony_ci		goto out_drop;
67262306a36Sopenharmony_ci	case rdma_error:
67362306a36Sopenharmony_ci		goto out_drop;
67462306a36Sopenharmony_ci	default:
67562306a36Sopenharmony_ci		goto out_proc;
67662306a36Sopenharmony_ci	}
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	if (!xdr_check_read_list(rctxt))
67962306a36Sopenharmony_ci		goto out_inval;
68062306a36Sopenharmony_ci	if (!xdr_check_write_list(rctxt))
68162306a36Sopenharmony_ci		goto out_inval;
68262306a36Sopenharmony_ci	if (!xdr_check_reply_chunk(rctxt))
68362306a36Sopenharmony_ci		goto out_inval;
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci	rq_arg->head[0].iov_base = rctxt->rc_stream.p;
68662306a36Sopenharmony_ci	hdr_len = xdr_stream_pos(&rctxt->rc_stream);
68762306a36Sopenharmony_ci	rq_arg->head[0].iov_len -= hdr_len;
68862306a36Sopenharmony_ci	rq_arg->len -= hdr_len;
68962306a36Sopenharmony_ci	trace_svcrdma_decode_rqst(rctxt, rdma_argp, hdr_len);
69062306a36Sopenharmony_ci	return hdr_len;
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ciout_short:
69362306a36Sopenharmony_ci	trace_svcrdma_decode_short_err(rctxt, rq_arg->len);
69462306a36Sopenharmony_ci	return -EINVAL;
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ciout_version:
69762306a36Sopenharmony_ci	trace_svcrdma_decode_badvers_err(rctxt, rdma_argp);
69862306a36Sopenharmony_ci	return -EPROTONOSUPPORT;
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ciout_drop:
70162306a36Sopenharmony_ci	trace_svcrdma_decode_drop_err(rctxt, rdma_argp);
70262306a36Sopenharmony_ci	return 0;
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ciout_proc:
70562306a36Sopenharmony_ci	trace_svcrdma_decode_badproc_err(rctxt, rdma_argp);
70662306a36Sopenharmony_ci	return -EINVAL;
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ciout_inval:
70962306a36Sopenharmony_ci	trace_svcrdma_decode_parse_err(rctxt, rdma_argp);
71062306a36Sopenharmony_ci	return -EINVAL;
71162306a36Sopenharmony_ci}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_cistatic void svc_rdma_send_error(struct svcxprt_rdma *rdma,
71462306a36Sopenharmony_ci				struct svc_rdma_recv_ctxt *rctxt,
71562306a36Sopenharmony_ci				int status)
71662306a36Sopenharmony_ci{
71762306a36Sopenharmony_ci	struct svc_rdma_send_ctxt *sctxt;
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_ci	sctxt = svc_rdma_send_ctxt_get(rdma);
72062306a36Sopenharmony_ci	if (!sctxt)
72162306a36Sopenharmony_ci		return;
72262306a36Sopenharmony_ci	svc_rdma_send_error_msg(rdma, sctxt, rctxt, status);
72362306a36Sopenharmony_ci}
72462306a36Sopenharmony_ci
72562306a36Sopenharmony_ci/* By convention, backchannel calls arrive via rdma_msg type
72662306a36Sopenharmony_ci * messages, and never populate the chunk lists. This makes
72762306a36Sopenharmony_ci * the RPC/RDMA header small and fixed in size, so it is
72862306a36Sopenharmony_ci * straightforward to check the RPC header's direction field.
72962306a36Sopenharmony_ci */
73062306a36Sopenharmony_cistatic bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
73162306a36Sopenharmony_ci						struct svc_rdma_recv_ctxt *rctxt)
73262306a36Sopenharmony_ci{
73362306a36Sopenharmony_ci	__be32 *p = rctxt->rc_recv_buf;
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	if (!xprt->xpt_bc_xprt)
73662306a36Sopenharmony_ci		return false;
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	if (rctxt->rc_msgtype != rdma_msg)
73962306a36Sopenharmony_ci		return false;
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	if (!pcl_is_empty(&rctxt->rc_call_pcl))
74262306a36Sopenharmony_ci		return false;
74362306a36Sopenharmony_ci	if (!pcl_is_empty(&rctxt->rc_read_pcl))
74462306a36Sopenharmony_ci		return false;
74562306a36Sopenharmony_ci	if (!pcl_is_empty(&rctxt->rc_write_pcl))
74662306a36Sopenharmony_ci		return false;
74762306a36Sopenharmony_ci	if (!pcl_is_empty(&rctxt->rc_reply_pcl))
74862306a36Sopenharmony_ci		return false;
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	/* RPC call direction */
75162306a36Sopenharmony_ci	if (*(p + 8) == cpu_to_be32(RPC_CALL))
75262306a36Sopenharmony_ci		return false;
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	return true;
75562306a36Sopenharmony_ci}
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci/**
75862306a36Sopenharmony_ci * svc_rdma_recvfrom - Receive an RPC call
75962306a36Sopenharmony_ci * @rqstp: request structure into which to receive an RPC Call
76062306a36Sopenharmony_ci *
76162306a36Sopenharmony_ci * Returns:
76262306a36Sopenharmony_ci *	The positive number of bytes in the RPC Call message,
76362306a36Sopenharmony_ci *	%0 if there were no Calls ready to return,
76462306a36Sopenharmony_ci *	%-EINVAL if the Read chunk data is too large,
76562306a36Sopenharmony_ci *	%-ENOMEM if rdma_rw context pool was exhausted,
76662306a36Sopenharmony_ci *	%-ENOTCONN if posting failed (connection is lost),
76762306a36Sopenharmony_ci *	%-EIO if rdma_rw initialization failed (DMA mapping, etc).
76862306a36Sopenharmony_ci *
76962306a36Sopenharmony_ci * Called in a loop when XPT_DATA is set. XPT_DATA is cleared only
77062306a36Sopenharmony_ci * when there are no remaining ctxt's to process.
77162306a36Sopenharmony_ci *
77262306a36Sopenharmony_ci * The next ctxt is removed from the "receive" lists.
77362306a36Sopenharmony_ci *
77462306a36Sopenharmony_ci * - If the ctxt completes a Receive, then construct the Call
77562306a36Sopenharmony_ci *   message from the contents of the Receive buffer.
77662306a36Sopenharmony_ci *
77762306a36Sopenharmony_ci *   - If there are no Read chunks in this message, then finish
77862306a36Sopenharmony_ci *     assembling the Call message and return the number of bytes
77962306a36Sopenharmony_ci *     in the message.
78062306a36Sopenharmony_ci *
78162306a36Sopenharmony_ci *   - If there are Read chunks in this message, post Read WRs to
78262306a36Sopenharmony_ci *     pull that payload. When the Read WRs complete, build the
78362306a36Sopenharmony_ci *     full message and return the number of bytes in it.
78462306a36Sopenharmony_ci */
78562306a36Sopenharmony_ciint svc_rdma_recvfrom(struct svc_rqst *rqstp)
78662306a36Sopenharmony_ci{
78762306a36Sopenharmony_ci	struct svc_xprt *xprt = rqstp->rq_xprt;
78862306a36Sopenharmony_ci	struct svcxprt_rdma *rdma_xprt =
78962306a36Sopenharmony_ci		container_of(xprt, struct svcxprt_rdma, sc_xprt);
79062306a36Sopenharmony_ci	struct svc_rdma_recv_ctxt *ctxt;
79162306a36Sopenharmony_ci	int ret;
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci	/* Prevent svc_xprt_release() from releasing pages in rq_pages
79462306a36Sopenharmony_ci	 * when returning 0 or an error.
79562306a36Sopenharmony_ci	 */
79662306a36Sopenharmony_ci	rqstp->rq_respages = rqstp->rq_pages;
79762306a36Sopenharmony_ci	rqstp->rq_next_page = rqstp->rq_respages;
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci	rqstp->rq_xprt_ctxt = NULL;
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci	ctxt = NULL;
80262306a36Sopenharmony_ci	spin_lock(&rdma_xprt->sc_rq_dto_lock);
80362306a36Sopenharmony_ci	ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q);
80462306a36Sopenharmony_ci	if (ctxt)
80562306a36Sopenharmony_ci		list_del(&ctxt->rc_list);
80662306a36Sopenharmony_ci	else
80762306a36Sopenharmony_ci		/* No new incoming requests, terminate the loop */
80862306a36Sopenharmony_ci		clear_bit(XPT_DATA, &xprt->xpt_flags);
80962306a36Sopenharmony_ci	spin_unlock(&rdma_xprt->sc_rq_dto_lock);
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	/* Unblock the transport for the next receive */
81262306a36Sopenharmony_ci	svc_xprt_received(xprt);
81362306a36Sopenharmony_ci	if (!ctxt)
81462306a36Sopenharmony_ci		return 0;
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci	percpu_counter_inc(&svcrdma_stat_recv);
81762306a36Sopenharmony_ci	ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device,
81862306a36Sopenharmony_ci				   ctxt->rc_recv_sge.addr, ctxt->rc_byte_len,
81962306a36Sopenharmony_ci				   DMA_FROM_DEVICE);
82062306a36Sopenharmony_ci	svc_rdma_build_arg_xdr(rqstp, ctxt);
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
82362306a36Sopenharmony_ci	if (ret < 0)
82462306a36Sopenharmony_ci		goto out_err;
82562306a36Sopenharmony_ci	if (ret == 0)
82662306a36Sopenharmony_ci		goto out_drop;
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	if (svc_rdma_is_reverse_direction_reply(xprt, ctxt))
82962306a36Sopenharmony_ci		goto out_backchannel;
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci	svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ci	if (!pcl_is_empty(&ctxt->rc_read_pcl) ||
83462306a36Sopenharmony_ci	    !pcl_is_empty(&ctxt->rc_call_pcl)) {
83562306a36Sopenharmony_ci		ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
83662306a36Sopenharmony_ci		if (ret < 0)
83762306a36Sopenharmony_ci			goto out_readfail;
83862306a36Sopenharmony_ci	}
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci	rqstp->rq_xprt_ctxt = ctxt;
84162306a36Sopenharmony_ci	rqstp->rq_prot = IPPROTO_MAX;
84262306a36Sopenharmony_ci	svc_xprt_copy_addrs(rqstp, xprt);
84362306a36Sopenharmony_ci	set_bit(RQ_SECURE, &rqstp->rq_flags);
84462306a36Sopenharmony_ci	return rqstp->rq_arg.len;
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ciout_err:
84762306a36Sopenharmony_ci	svc_rdma_send_error(rdma_xprt, ctxt, ret);
84862306a36Sopenharmony_ci	svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
84962306a36Sopenharmony_ci	return 0;
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_ciout_readfail:
85262306a36Sopenharmony_ci	if (ret == -EINVAL)
85362306a36Sopenharmony_ci		svc_rdma_send_error(rdma_xprt, ctxt, ret);
85462306a36Sopenharmony_ci	svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
85562306a36Sopenharmony_ci	svc_xprt_deferred_close(xprt);
85662306a36Sopenharmony_ci	return -ENOTCONN;
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ciout_backchannel:
85962306a36Sopenharmony_ci	svc_rdma_handle_bc_reply(rqstp, ctxt);
86062306a36Sopenharmony_ciout_drop:
86162306a36Sopenharmony_ci	svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
86262306a36Sopenharmony_ci	return 0;
86362306a36Sopenharmony_ci}
864