162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2015-2018 Oracle. All rights reserved.
462306a36Sopenharmony_ci * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
562306a36Sopenharmony_ci * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * This software is available to you under a choice of one of two
862306a36Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
962306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
1062306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the BSD-type
1162306a36Sopenharmony_ci * license below:
1262306a36Sopenharmony_ci *
1362306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
1462306a36Sopenharmony_ci * modification, are permitted provided that the following conditions
1562306a36Sopenharmony_ci * are met:
1662306a36Sopenharmony_ci *
1762306a36Sopenharmony_ci *      Redistributions of source code must retain the above copyright
1862306a36Sopenharmony_ci *      notice, this list of conditions and the following disclaimer.
1962306a36Sopenharmony_ci *
2062306a36Sopenharmony_ci *      Redistributions in binary form must reproduce the above
2162306a36Sopenharmony_ci *      copyright notice, this list of conditions and the following
2262306a36Sopenharmony_ci *      disclaimer in the documentation and/or other materials provided
2362306a36Sopenharmony_ci *      with the distribution.
2462306a36Sopenharmony_ci *
2562306a36Sopenharmony_ci *      Neither the name of the Network Appliance, Inc. nor the names of
2662306a36Sopenharmony_ci *      its contributors may be used to endorse or promote products
2762306a36Sopenharmony_ci *      derived from this software without specific prior written
2862306a36Sopenharmony_ci *      permission.
2962306a36Sopenharmony_ci *
3062306a36Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
3162306a36Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
3262306a36Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
3362306a36Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
3462306a36Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
3562306a36Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
3662306a36Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
3762306a36Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
3862306a36Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
3962306a36Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
4062306a36Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4162306a36Sopenharmony_ci *
4262306a36Sopenharmony_ci * Author: Tom Tucker <tom@opengridcomputing.com>
4362306a36Sopenharmony_ci */
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci#include <linux/interrupt.h>
4662306a36Sopenharmony_ci#include <linux/sched.h>
4762306a36Sopenharmony_ci#include <linux/slab.h>
4862306a36Sopenharmony_ci#include <linux/spinlock.h>
4962306a36Sopenharmony_ci#include <linux/workqueue.h>
5062306a36Sopenharmony_ci#include <linux/export.h>
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci#include <rdma/ib_verbs.h>
5362306a36Sopenharmony_ci#include <rdma/rdma_cm.h>
5462306a36Sopenharmony_ci#include <rdma/rw.h>
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci#include <linux/sunrpc/addr.h>
5762306a36Sopenharmony_ci#include <linux/sunrpc/debug.h>
5862306a36Sopenharmony_ci#include <linux/sunrpc/svc_xprt.h>
5962306a36Sopenharmony_ci#include <linux/sunrpc/svc_rdma.h>
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci#include "xprt_rdma.h"
6262306a36Sopenharmony_ci#include <trace/events/rpcrdma.h>
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci#define RPCDBG_FACILITY	RPCDBG_SVCXPRT
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_cistatic struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
6762306a36Sopenharmony_ci						 struct net *net, int node);
6862306a36Sopenharmony_cistatic struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
6962306a36Sopenharmony_ci					struct net *net,
7062306a36Sopenharmony_ci					struct sockaddr *sa, int salen,
7162306a36Sopenharmony_ci					int flags);
7262306a36Sopenharmony_cistatic struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
7362306a36Sopenharmony_cistatic void svc_rdma_detach(struct svc_xprt *xprt);
7462306a36Sopenharmony_cistatic void svc_rdma_free(struct svc_xprt *xprt);
7562306a36Sopenharmony_cistatic int svc_rdma_has_wspace(struct svc_xprt *xprt);
7662306a36Sopenharmony_cistatic void svc_rdma_kill_temp_xprt(struct svc_xprt *);
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_cistatic const struct svc_xprt_ops svc_rdma_ops = {
7962306a36Sopenharmony_ci	.xpo_create = svc_rdma_create,
8062306a36Sopenharmony_ci	.xpo_recvfrom = svc_rdma_recvfrom,
8162306a36Sopenharmony_ci	.xpo_sendto = svc_rdma_sendto,
8262306a36Sopenharmony_ci	.xpo_result_payload = svc_rdma_result_payload,
8362306a36Sopenharmony_ci	.xpo_release_ctxt = svc_rdma_release_ctxt,
8462306a36Sopenharmony_ci	.xpo_detach = svc_rdma_detach,
8562306a36Sopenharmony_ci	.xpo_free = svc_rdma_free,
8662306a36Sopenharmony_ci	.xpo_has_wspace = svc_rdma_has_wspace,
8762306a36Sopenharmony_ci	.xpo_accept = svc_rdma_accept,
8862306a36Sopenharmony_ci	.xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt,
8962306a36Sopenharmony_ci};
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_cistruct svc_xprt_class svc_rdma_class = {
9262306a36Sopenharmony_ci	.xcl_name = "rdma",
9362306a36Sopenharmony_ci	.xcl_owner = THIS_MODULE,
9462306a36Sopenharmony_ci	.xcl_ops = &svc_rdma_ops,
9562306a36Sopenharmony_ci	.xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
9662306a36Sopenharmony_ci	.xcl_ident = XPRT_TRANSPORT_RDMA,
9762306a36Sopenharmony_ci};
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci/* QP event handler */
10062306a36Sopenharmony_cistatic void qp_event_handler(struct ib_event *event, void *context)
10162306a36Sopenharmony_ci{
10262306a36Sopenharmony_ci	struct svc_xprt *xprt = context;
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	trace_svcrdma_qp_error(event, (struct sockaddr *)&xprt->xpt_remote);
10562306a36Sopenharmony_ci	switch (event->event) {
10662306a36Sopenharmony_ci	/* These are considered benign events */
10762306a36Sopenharmony_ci	case IB_EVENT_PATH_MIG:
10862306a36Sopenharmony_ci	case IB_EVENT_COMM_EST:
10962306a36Sopenharmony_ci	case IB_EVENT_SQ_DRAINED:
11062306a36Sopenharmony_ci	case IB_EVENT_QP_LAST_WQE_REACHED:
11162306a36Sopenharmony_ci		break;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	/* These are considered fatal events */
11462306a36Sopenharmony_ci	case IB_EVENT_PATH_MIG_ERR:
11562306a36Sopenharmony_ci	case IB_EVENT_QP_FATAL:
11662306a36Sopenharmony_ci	case IB_EVENT_QP_REQ_ERR:
11762306a36Sopenharmony_ci	case IB_EVENT_QP_ACCESS_ERR:
11862306a36Sopenharmony_ci	case IB_EVENT_DEVICE_FATAL:
11962306a36Sopenharmony_ci	default:
12062306a36Sopenharmony_ci		svc_xprt_deferred_close(xprt);
12162306a36Sopenharmony_ci		break;
12262306a36Sopenharmony_ci	}
12362306a36Sopenharmony_ci}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_cistatic struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
12662306a36Sopenharmony_ci						 struct net *net, int node)
12762306a36Sopenharmony_ci{
12862306a36Sopenharmony_ci	struct svcxprt_rdma *cma_xprt;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node);
13162306a36Sopenharmony_ci	if (!cma_xprt)
13262306a36Sopenharmony_ci		return NULL;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
13562306a36Sopenharmony_ci	INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
13662306a36Sopenharmony_ci	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
13762306a36Sopenharmony_ci	init_llist_head(&cma_xprt->sc_send_ctxts);
13862306a36Sopenharmony_ci	init_llist_head(&cma_xprt->sc_recv_ctxts);
13962306a36Sopenharmony_ci	init_llist_head(&cma_xprt->sc_rw_ctxts);
14062306a36Sopenharmony_ci	init_waitqueue_head(&cma_xprt->sc_send_wait);
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	spin_lock_init(&cma_xprt->sc_lock);
14362306a36Sopenharmony_ci	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
14462306a36Sopenharmony_ci	spin_lock_init(&cma_xprt->sc_send_lock);
14562306a36Sopenharmony_ci	spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	/*
14862306a36Sopenharmony_ci	 * Note that this implies that the underlying transport support
14962306a36Sopenharmony_ci	 * has some form of congestion control (see RFC 7530 section 3.1
15062306a36Sopenharmony_ci	 * paragraph 2). For now, we assume that all supported RDMA
15162306a36Sopenharmony_ci	 * transports are suitable here.
15262306a36Sopenharmony_ci	 */
15362306a36Sopenharmony_ci	set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	return cma_xprt;
15662306a36Sopenharmony_ci}
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_cistatic void
15962306a36Sopenharmony_cisvc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
16062306a36Sopenharmony_ci			       struct rdma_conn_param *param)
16162306a36Sopenharmony_ci{
16262306a36Sopenharmony_ci	const struct rpcrdma_connect_private *pmsg = param->private_data;
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	if (pmsg &&
16562306a36Sopenharmony_ci	    pmsg->cp_magic == rpcrdma_cmp_magic &&
16662306a36Sopenharmony_ci	    pmsg->cp_version == RPCRDMA_CMP_VERSION) {
16762306a36Sopenharmony_ci		newxprt->sc_snd_w_inv = pmsg->cp_flags &
16862306a36Sopenharmony_ci					RPCRDMA_CMP_F_SND_W_INV_OK;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci		dprintk("svcrdma: client send_size %u, recv_size %u "
17162306a36Sopenharmony_ci			"remote inv %ssupported\n",
17262306a36Sopenharmony_ci			rpcrdma_decode_buffer_size(pmsg->cp_send_size),
17362306a36Sopenharmony_ci			rpcrdma_decode_buffer_size(pmsg->cp_recv_size),
17462306a36Sopenharmony_ci			newxprt->sc_snd_w_inv ? "" : "un");
17562306a36Sopenharmony_ci	}
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci/*
17962306a36Sopenharmony_ci * This function handles the CONNECT_REQUEST event on a listening
18062306a36Sopenharmony_ci * endpoint. It is passed the cma_id for the _new_ connection. The context in
18162306a36Sopenharmony_ci * this cma_id is inherited from the listening cma_id and is the svc_xprt
18262306a36Sopenharmony_ci * structure for the listening endpoint.
18362306a36Sopenharmony_ci *
18462306a36Sopenharmony_ci * This function creates a new xprt for the new connection and enqueues it on
18562306a36Sopenharmony_ci * the accept queue for the listent xprt. When the listen thread is kicked, it
18662306a36Sopenharmony_ci * will call the recvfrom method on the listen xprt which will accept the new
18762306a36Sopenharmony_ci * connection.
18862306a36Sopenharmony_ci */
18962306a36Sopenharmony_cistatic void handle_connect_req(struct rdma_cm_id *new_cma_id,
19062306a36Sopenharmony_ci			       struct rdma_conn_param *param)
19162306a36Sopenharmony_ci{
19262306a36Sopenharmony_ci	struct svcxprt_rdma *listen_xprt = new_cma_id->context;
19362306a36Sopenharmony_ci	struct svcxprt_rdma *newxprt;
19462306a36Sopenharmony_ci	struct sockaddr *sa;
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server,
19762306a36Sopenharmony_ci				       listen_xprt->sc_xprt.xpt_net,
19862306a36Sopenharmony_ci				       ibdev_to_node(new_cma_id->device));
19962306a36Sopenharmony_ci	if (!newxprt)
20062306a36Sopenharmony_ci		return;
20162306a36Sopenharmony_ci	newxprt->sc_cm_id = new_cma_id;
20262306a36Sopenharmony_ci	new_cma_id->context = newxprt;
20362306a36Sopenharmony_ci	svc_rdma_parse_connect_private(newxprt, param);
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	/* Save client advertised inbound read limit for use later in accept. */
20662306a36Sopenharmony_ci	newxprt->sc_ord = param->initiator_depth;
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
20962306a36Sopenharmony_ci	newxprt->sc_xprt.xpt_remotelen = svc_addr_len(sa);
21062306a36Sopenharmony_ci	memcpy(&newxprt->sc_xprt.xpt_remote, sa,
21162306a36Sopenharmony_ci	       newxprt->sc_xprt.xpt_remotelen);
21262306a36Sopenharmony_ci	snprintf(newxprt->sc_xprt.xpt_remotebuf,
21362306a36Sopenharmony_ci		 sizeof(newxprt->sc_xprt.xpt_remotebuf) - 1, "%pISc", sa);
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	/* The remote port is arbitrary and not under the control of the
21662306a36Sopenharmony_ci	 * client ULP. Set it to a fixed value so that the DRC continues
21762306a36Sopenharmony_ci	 * to be effective after a reconnect.
21862306a36Sopenharmony_ci	 */
21962306a36Sopenharmony_ci	rpc_set_port((struct sockaddr *)&newxprt->sc_xprt.xpt_remote, 0);
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
22262306a36Sopenharmony_ci	svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	/*
22562306a36Sopenharmony_ci	 * Enqueue the new transport on the accept queue of the listening
22662306a36Sopenharmony_ci	 * transport
22762306a36Sopenharmony_ci	 */
22862306a36Sopenharmony_ci	spin_lock(&listen_xprt->sc_lock);
22962306a36Sopenharmony_ci	list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q);
23062306a36Sopenharmony_ci	spin_unlock(&listen_xprt->sc_lock);
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags);
23362306a36Sopenharmony_ci	svc_xprt_enqueue(&listen_xprt->sc_xprt);
23462306a36Sopenharmony_ci}
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci/**
23762306a36Sopenharmony_ci * svc_rdma_listen_handler - Handle CM events generated on a listening endpoint
23862306a36Sopenharmony_ci * @cma_id: the server's listener rdma_cm_id
23962306a36Sopenharmony_ci * @event: details of the event
24062306a36Sopenharmony_ci *
24162306a36Sopenharmony_ci * Return values:
24262306a36Sopenharmony_ci *     %0: Do not destroy @cma_id
24362306a36Sopenharmony_ci *     %1: Destroy @cma_id (never returned here)
24462306a36Sopenharmony_ci *
24562306a36Sopenharmony_ci * NB: There is never a DEVICE_REMOVAL event for INADDR_ANY listeners.
24662306a36Sopenharmony_ci */
24762306a36Sopenharmony_cistatic int svc_rdma_listen_handler(struct rdma_cm_id *cma_id,
24862306a36Sopenharmony_ci				   struct rdma_cm_event *event)
24962306a36Sopenharmony_ci{
25062306a36Sopenharmony_ci	switch (event->event) {
25162306a36Sopenharmony_ci	case RDMA_CM_EVENT_CONNECT_REQUEST:
25262306a36Sopenharmony_ci		handle_connect_req(cma_id, &event->param.conn);
25362306a36Sopenharmony_ci		break;
25462306a36Sopenharmony_ci	default:
25562306a36Sopenharmony_ci		break;
25662306a36Sopenharmony_ci	}
25762306a36Sopenharmony_ci	return 0;
25862306a36Sopenharmony_ci}
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci/**
26162306a36Sopenharmony_ci * svc_rdma_cma_handler - Handle CM events on client connections
26262306a36Sopenharmony_ci * @cma_id: the server's listener rdma_cm_id
26362306a36Sopenharmony_ci * @event: details of the event
26462306a36Sopenharmony_ci *
26562306a36Sopenharmony_ci * Return values:
26662306a36Sopenharmony_ci *     %0: Do not destroy @cma_id
26762306a36Sopenharmony_ci *     %1: Destroy @cma_id (never returned here)
26862306a36Sopenharmony_ci */
26962306a36Sopenharmony_cistatic int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
27062306a36Sopenharmony_ci				struct rdma_cm_event *event)
27162306a36Sopenharmony_ci{
27262306a36Sopenharmony_ci	struct svcxprt_rdma *rdma = cma_id->context;
27362306a36Sopenharmony_ci	struct svc_xprt *xprt = &rdma->sc_xprt;
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	switch (event->event) {
27662306a36Sopenharmony_ci	case RDMA_CM_EVENT_ESTABLISHED:
27762306a36Sopenharmony_ci		clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci		/* Handle any requests that were received while
28062306a36Sopenharmony_ci		 * CONN_PENDING was set. */
28162306a36Sopenharmony_ci		svc_xprt_enqueue(xprt);
28262306a36Sopenharmony_ci		break;
28362306a36Sopenharmony_ci	case RDMA_CM_EVENT_DISCONNECTED:
28462306a36Sopenharmony_ci	case RDMA_CM_EVENT_DEVICE_REMOVAL:
28562306a36Sopenharmony_ci		svc_xprt_deferred_close(xprt);
28662306a36Sopenharmony_ci		break;
28762306a36Sopenharmony_ci	default:
28862306a36Sopenharmony_ci		break;
28962306a36Sopenharmony_ci	}
29062306a36Sopenharmony_ci	return 0;
29162306a36Sopenharmony_ci}
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci/*
29462306a36Sopenharmony_ci * Create a listening RDMA service endpoint.
29562306a36Sopenharmony_ci */
29662306a36Sopenharmony_cistatic struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
29762306a36Sopenharmony_ci					struct net *net,
29862306a36Sopenharmony_ci					struct sockaddr *sa, int salen,
29962306a36Sopenharmony_ci					int flags)
30062306a36Sopenharmony_ci{
30162306a36Sopenharmony_ci	struct rdma_cm_id *listen_id;
30262306a36Sopenharmony_ci	struct svcxprt_rdma *cma_xprt;
30362306a36Sopenharmony_ci	int ret;
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
30662306a36Sopenharmony_ci		return ERR_PTR(-EAFNOSUPPORT);
30762306a36Sopenharmony_ci	cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE);
30862306a36Sopenharmony_ci	if (!cma_xprt)
30962306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
31062306a36Sopenharmony_ci	set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
31162306a36Sopenharmony_ci	strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	listen_id = rdma_create_id(net, svc_rdma_listen_handler, cma_xprt,
31462306a36Sopenharmony_ci				   RDMA_PS_TCP, IB_QPT_RC);
31562306a36Sopenharmony_ci	if (IS_ERR(listen_id)) {
31662306a36Sopenharmony_ci		ret = PTR_ERR(listen_id);
31762306a36Sopenharmony_ci		goto err0;
31862306a36Sopenharmony_ci	}
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	/* Allow both IPv4 and IPv6 sockets to bind a single port
32162306a36Sopenharmony_ci	 * at the same time.
32262306a36Sopenharmony_ci	 */
32362306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
32462306a36Sopenharmony_ci	ret = rdma_set_afonly(listen_id, 1);
32562306a36Sopenharmony_ci	if (ret)
32662306a36Sopenharmony_ci		goto err1;
32762306a36Sopenharmony_ci#endif
32862306a36Sopenharmony_ci	ret = rdma_bind_addr(listen_id, sa);
32962306a36Sopenharmony_ci	if (ret)
33062306a36Sopenharmony_ci		goto err1;
33162306a36Sopenharmony_ci	cma_xprt->sc_cm_id = listen_id;
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
33462306a36Sopenharmony_ci	if (ret)
33562306a36Sopenharmony_ci		goto err1;
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	/*
33862306a36Sopenharmony_ci	 * We need to use the address from the cm_id in case the
33962306a36Sopenharmony_ci	 * caller specified 0 for the port number.
34062306a36Sopenharmony_ci	 */
34162306a36Sopenharmony_ci	sa = (struct sockaddr *)&cma_xprt->sc_cm_id->route.addr.src_addr;
34262306a36Sopenharmony_ci	svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	return &cma_xprt->sc_xprt;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci err1:
34762306a36Sopenharmony_ci	rdma_destroy_id(listen_id);
34862306a36Sopenharmony_ci err0:
34962306a36Sopenharmony_ci	kfree(cma_xprt);
35062306a36Sopenharmony_ci	return ERR_PTR(ret);
35162306a36Sopenharmony_ci}
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci/*
35462306a36Sopenharmony_ci * This is the xpo_recvfrom function for listening endpoints. Its
35562306a36Sopenharmony_ci * purpose is to accept incoming connections. The CMA callback handler
35662306a36Sopenharmony_ci * has already created a new transport and attached it to the new CMA
35762306a36Sopenharmony_ci * ID.
35862306a36Sopenharmony_ci *
35962306a36Sopenharmony_ci * There is a queue of pending connections hung on the listening
36062306a36Sopenharmony_ci * transport. This queue contains the new svc_xprt structure. This
36162306a36Sopenharmony_ci * function takes svc_xprt structures off the accept_q and completes
36262306a36Sopenharmony_ci * the connection.
36362306a36Sopenharmony_ci */
36462306a36Sopenharmony_cistatic struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
36562306a36Sopenharmony_ci{
36662306a36Sopenharmony_ci	struct svcxprt_rdma *listen_rdma;
36762306a36Sopenharmony_ci	struct svcxprt_rdma *newxprt = NULL;
36862306a36Sopenharmony_ci	struct rdma_conn_param conn_param;
36962306a36Sopenharmony_ci	struct rpcrdma_connect_private pmsg;
37062306a36Sopenharmony_ci	struct ib_qp_init_attr qp_attr;
37162306a36Sopenharmony_ci	unsigned int ctxts, rq_depth;
37262306a36Sopenharmony_ci	struct ib_device *dev;
37362306a36Sopenharmony_ci	int ret = 0;
37462306a36Sopenharmony_ci	RPC_IFDEBUG(struct sockaddr *sap);
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_ci	listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
37762306a36Sopenharmony_ci	clear_bit(XPT_CONN, &xprt->xpt_flags);
37862306a36Sopenharmony_ci	/* Get the next entry off the accept list */
37962306a36Sopenharmony_ci	spin_lock(&listen_rdma->sc_lock);
38062306a36Sopenharmony_ci	if (!list_empty(&listen_rdma->sc_accept_q)) {
38162306a36Sopenharmony_ci		newxprt = list_entry(listen_rdma->sc_accept_q.next,
38262306a36Sopenharmony_ci				     struct svcxprt_rdma, sc_accept_q);
38362306a36Sopenharmony_ci		list_del_init(&newxprt->sc_accept_q);
38462306a36Sopenharmony_ci	}
38562306a36Sopenharmony_ci	if (!list_empty(&listen_rdma->sc_accept_q))
38662306a36Sopenharmony_ci		set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags);
38762306a36Sopenharmony_ci	spin_unlock(&listen_rdma->sc_lock);
38862306a36Sopenharmony_ci	if (!newxprt)
38962306a36Sopenharmony_ci		return NULL;
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	dev = newxprt->sc_cm_id->device;
39262306a36Sopenharmony_ci	newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	/* Qualify the transport resource defaults with the
39562306a36Sopenharmony_ci	 * capabilities of this particular device */
39662306a36Sopenharmony_ci	/* Transport header, head iovec, tail iovec */
39762306a36Sopenharmony_ci	newxprt->sc_max_send_sges = 3;
39862306a36Sopenharmony_ci	/* Add one SGE per page list entry */
39962306a36Sopenharmony_ci	newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
40062306a36Sopenharmony_ci	if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
40162306a36Sopenharmony_ci		newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
40262306a36Sopenharmony_ci	newxprt->sc_max_req_size = svcrdma_max_req_size;
40362306a36Sopenharmony_ci	newxprt->sc_max_requests = svcrdma_max_requests;
40462306a36Sopenharmony_ci	newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
40562306a36Sopenharmony_ci	newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH;
40662306a36Sopenharmony_ci	rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
40762306a36Sopenharmony_ci		   newxprt->sc_recv_batch;
40862306a36Sopenharmony_ci	if (rq_depth > dev->attrs.max_qp_wr) {
40962306a36Sopenharmony_ci		pr_warn("svcrdma: reducing receive depth to %d\n",
41062306a36Sopenharmony_ci			dev->attrs.max_qp_wr);
41162306a36Sopenharmony_ci		rq_depth = dev->attrs.max_qp_wr;
41262306a36Sopenharmony_ci		newxprt->sc_recv_batch = 1;
41362306a36Sopenharmony_ci		newxprt->sc_max_requests = rq_depth - 2;
41462306a36Sopenharmony_ci		newxprt->sc_max_bc_requests = 2;
41562306a36Sopenharmony_ci	}
41662306a36Sopenharmony_ci	newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
41762306a36Sopenharmony_ci	ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
41862306a36Sopenharmony_ci	ctxts *= newxprt->sc_max_requests;
41962306a36Sopenharmony_ci	newxprt->sc_sq_depth = rq_depth + ctxts;
42062306a36Sopenharmony_ci	if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
42162306a36Sopenharmony_ci		pr_warn("svcrdma: reducing send depth to %d\n",
42262306a36Sopenharmony_ci			dev->attrs.max_qp_wr);
42362306a36Sopenharmony_ci		newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
42462306a36Sopenharmony_ci	}
42562306a36Sopenharmony_ci	atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci	newxprt->sc_pd = ib_alloc_pd(dev, 0);
42862306a36Sopenharmony_ci	if (IS_ERR(newxprt->sc_pd)) {
42962306a36Sopenharmony_ci		trace_svcrdma_pd_err(newxprt, PTR_ERR(newxprt->sc_pd));
43062306a36Sopenharmony_ci		goto errout;
43162306a36Sopenharmony_ci	}
43262306a36Sopenharmony_ci	newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth,
43362306a36Sopenharmony_ci					    IB_POLL_WORKQUEUE);
43462306a36Sopenharmony_ci	if (IS_ERR(newxprt->sc_sq_cq))
43562306a36Sopenharmony_ci		goto errout;
43662306a36Sopenharmony_ci	newxprt->sc_rq_cq =
43762306a36Sopenharmony_ci		ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE);
43862306a36Sopenharmony_ci	if (IS_ERR(newxprt->sc_rq_cq))
43962306a36Sopenharmony_ci		goto errout;
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	memset(&qp_attr, 0, sizeof qp_attr);
44262306a36Sopenharmony_ci	qp_attr.event_handler = qp_event_handler;
44362306a36Sopenharmony_ci	qp_attr.qp_context = &newxprt->sc_xprt;
44462306a36Sopenharmony_ci	qp_attr.port_num = newxprt->sc_port_num;
44562306a36Sopenharmony_ci	qp_attr.cap.max_rdma_ctxs = ctxts;
44662306a36Sopenharmony_ci	qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts;
44762306a36Sopenharmony_ci	qp_attr.cap.max_recv_wr = rq_depth;
44862306a36Sopenharmony_ci	qp_attr.cap.max_send_sge = newxprt->sc_max_send_sges;
44962306a36Sopenharmony_ci	qp_attr.cap.max_recv_sge = 1;
45062306a36Sopenharmony_ci	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
45162306a36Sopenharmony_ci	qp_attr.qp_type = IB_QPT_RC;
45262306a36Sopenharmony_ci	qp_attr.send_cq = newxprt->sc_sq_cq;
45362306a36Sopenharmony_ci	qp_attr.recv_cq = newxprt->sc_rq_cq;
45462306a36Sopenharmony_ci	dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n",
45562306a36Sopenharmony_ci		newxprt->sc_cm_id, newxprt->sc_pd);
45662306a36Sopenharmony_ci	dprintk("    cap.max_send_wr = %d, cap.max_recv_wr = %d\n",
45762306a36Sopenharmony_ci		qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
45862306a36Sopenharmony_ci	dprintk("    cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
45962306a36Sopenharmony_ci		qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci	ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
46262306a36Sopenharmony_ci	if (ret) {
46362306a36Sopenharmony_ci		trace_svcrdma_qp_err(newxprt, ret);
46462306a36Sopenharmony_ci		goto errout;
46562306a36Sopenharmony_ci	}
46662306a36Sopenharmony_ci	newxprt->sc_qp = newxprt->sc_cm_id->qp;
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
46962306a36Sopenharmony_ci		newxprt->sc_snd_w_inv = false;
47062306a36Sopenharmony_ci	if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) &&
47162306a36Sopenharmony_ci	    !rdma_ib_or_roce(dev, newxprt->sc_port_num)) {
47262306a36Sopenharmony_ci		trace_svcrdma_fabric_err(newxprt, -EINVAL);
47362306a36Sopenharmony_ci		goto errout;
47462306a36Sopenharmony_ci	}
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci	if (!svc_rdma_post_recvs(newxprt))
47762306a36Sopenharmony_ci		goto errout;
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	/* Construct RDMA-CM private message */
48062306a36Sopenharmony_ci	pmsg.cp_magic = rpcrdma_cmp_magic;
48162306a36Sopenharmony_ci	pmsg.cp_version = RPCRDMA_CMP_VERSION;
48262306a36Sopenharmony_ci	pmsg.cp_flags = 0;
48362306a36Sopenharmony_ci	pmsg.cp_send_size = pmsg.cp_recv_size =
48462306a36Sopenharmony_ci		rpcrdma_encode_buffer_size(newxprt->sc_max_req_size);
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci	/* Accept Connection */
48762306a36Sopenharmony_ci	set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
48862306a36Sopenharmony_ci	memset(&conn_param, 0, sizeof conn_param);
48962306a36Sopenharmony_ci	conn_param.responder_resources = 0;
49062306a36Sopenharmony_ci	conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
49162306a36Sopenharmony_ci					   dev->attrs.max_qp_init_rd_atom);
49262306a36Sopenharmony_ci	if (!conn_param.initiator_depth) {
49362306a36Sopenharmony_ci		ret = -EINVAL;
49462306a36Sopenharmony_ci		trace_svcrdma_initdepth_err(newxprt, ret);
49562306a36Sopenharmony_ci		goto errout;
49662306a36Sopenharmony_ci	}
49762306a36Sopenharmony_ci	conn_param.private_data = &pmsg;
49862306a36Sopenharmony_ci	conn_param.private_data_len = sizeof(pmsg);
49962306a36Sopenharmony_ci	rdma_lock_handler(newxprt->sc_cm_id);
50062306a36Sopenharmony_ci	newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler;
50162306a36Sopenharmony_ci	ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
50262306a36Sopenharmony_ci	rdma_unlock_handler(newxprt->sc_cm_id);
50362306a36Sopenharmony_ci	if (ret) {
50462306a36Sopenharmony_ci		trace_svcrdma_accept_err(newxprt, ret);
50562306a36Sopenharmony_ci		goto errout;
50662306a36Sopenharmony_ci	}
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
50962306a36Sopenharmony_ci	dprintk("svcrdma: new connection %p accepted:\n", newxprt);
51062306a36Sopenharmony_ci	sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
51162306a36Sopenharmony_ci	dprintk("    local address   : %pIS:%u\n", sap, rpc_get_port(sap));
51262306a36Sopenharmony_ci	sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
51362306a36Sopenharmony_ci	dprintk("    remote address  : %pIS:%u\n", sap, rpc_get_port(sap));
51462306a36Sopenharmony_ci	dprintk("    max_sge         : %d\n", newxprt->sc_max_send_sges);
51562306a36Sopenharmony_ci	dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
51662306a36Sopenharmony_ci	dprintk("    rdma_rw_ctxs    : %d\n", ctxts);
51762306a36Sopenharmony_ci	dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
51862306a36Sopenharmony_ci	dprintk("    ord             : %d\n", conn_param.initiator_depth);
51962306a36Sopenharmony_ci#endif
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci	return &newxprt->sc_xprt;
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci errout:
52462306a36Sopenharmony_ci	/* Take a reference in case the DTO handler runs */
52562306a36Sopenharmony_ci	svc_xprt_get(&newxprt->sc_xprt);
52662306a36Sopenharmony_ci	if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
52762306a36Sopenharmony_ci		ib_destroy_qp(newxprt->sc_qp);
52862306a36Sopenharmony_ci	rdma_destroy_id(newxprt->sc_cm_id);
52962306a36Sopenharmony_ci	/* This call to put will destroy the transport */
53062306a36Sopenharmony_ci	svc_xprt_put(&newxprt->sc_xprt);
53162306a36Sopenharmony_ci	return NULL;
53262306a36Sopenharmony_ci}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_cistatic void svc_rdma_detach(struct svc_xprt *xprt)
53562306a36Sopenharmony_ci{
53662306a36Sopenharmony_ci	struct svcxprt_rdma *rdma =
53762306a36Sopenharmony_ci		container_of(xprt, struct svcxprt_rdma, sc_xprt);
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci	rdma_disconnect(rdma->sc_cm_id);
54062306a36Sopenharmony_ci}
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_cistatic void __svc_rdma_free(struct work_struct *work)
54362306a36Sopenharmony_ci{
54462306a36Sopenharmony_ci	struct svcxprt_rdma *rdma =
54562306a36Sopenharmony_ci		container_of(work, struct svcxprt_rdma, sc_work);
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	/* This blocks until the Completion Queues are empty */
54862306a36Sopenharmony_ci	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
54962306a36Sopenharmony_ci		ib_drain_qp(rdma->sc_qp);
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ci	svc_rdma_flush_recv_queues(rdma);
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	svc_rdma_destroy_rw_ctxts(rdma);
55462306a36Sopenharmony_ci	svc_rdma_send_ctxts_destroy(rdma);
55562306a36Sopenharmony_ci	svc_rdma_recv_ctxts_destroy(rdma);
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	/* Destroy the QP if present (not a listener) */
55862306a36Sopenharmony_ci	if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
55962306a36Sopenharmony_ci		ib_destroy_qp(rdma->sc_qp);
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
56262306a36Sopenharmony_ci		ib_free_cq(rdma->sc_sq_cq);
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
56562306a36Sopenharmony_ci		ib_free_cq(rdma->sc_rq_cq);
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci	if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
56862306a36Sopenharmony_ci		ib_dealloc_pd(rdma->sc_pd);
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci	/* Destroy the CM ID */
57162306a36Sopenharmony_ci	rdma_destroy_id(rdma->sc_cm_id);
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci	kfree(rdma);
57462306a36Sopenharmony_ci}
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_cistatic void svc_rdma_free(struct svc_xprt *xprt)
57762306a36Sopenharmony_ci{
57862306a36Sopenharmony_ci	struct svcxprt_rdma *rdma =
57962306a36Sopenharmony_ci		container_of(xprt, struct svcxprt_rdma, sc_xprt);
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	INIT_WORK(&rdma->sc_work, __svc_rdma_free);
58262306a36Sopenharmony_ci	schedule_work(&rdma->sc_work);
58362306a36Sopenharmony_ci}
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_cistatic int svc_rdma_has_wspace(struct svc_xprt *xprt)
58662306a36Sopenharmony_ci{
58762306a36Sopenharmony_ci	struct svcxprt_rdma *rdma =
58862306a36Sopenharmony_ci		container_of(xprt, struct svcxprt_rdma, sc_xprt);
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci	/*
59162306a36Sopenharmony_ci	 * If there are already waiters on the SQ,
59262306a36Sopenharmony_ci	 * return false.
59362306a36Sopenharmony_ci	 */
59462306a36Sopenharmony_ci	if (waitqueue_active(&rdma->sc_send_wait))
59562306a36Sopenharmony_ci		return 0;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	/* Otherwise return true. */
59862306a36Sopenharmony_ci	return 1;
59962306a36Sopenharmony_ci}
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_cistatic void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
60262306a36Sopenharmony_ci{
60362306a36Sopenharmony_ci}
604