162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2015-2018 Oracle. All rights reserved. 462306a36Sopenharmony_ci * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. 562306a36Sopenharmony_ci * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * This software is available to you under a choice of one of two 862306a36Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 962306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 1062306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the BSD-type 1162306a36Sopenharmony_ci * license below: 1262306a36Sopenharmony_ci * 1362306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 1462306a36Sopenharmony_ci * modification, are permitted provided that the following conditions 1562306a36Sopenharmony_ci * are met: 1662306a36Sopenharmony_ci * 1762306a36Sopenharmony_ci * Redistributions of source code must retain the above copyright 1862306a36Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 1962306a36Sopenharmony_ci * 2062306a36Sopenharmony_ci * Redistributions in binary form must reproduce the above 2162306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 2262306a36Sopenharmony_ci * disclaimer in the documentation and/or other materials provided 2362306a36Sopenharmony_ci * with the distribution. 2462306a36Sopenharmony_ci * 2562306a36Sopenharmony_ci * Neither the name of the Network Appliance, Inc. nor the names of 2662306a36Sopenharmony_ci * its contributors may be used to endorse or promote products 2762306a36Sopenharmony_ci * derived from this software without specific prior written 2862306a36Sopenharmony_ci * permission. 2962306a36Sopenharmony_ci * 3062306a36Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 3162306a36Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 3262306a36Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 3362306a36Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 3462306a36Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 3562306a36Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 3662306a36Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 3762306a36Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 3862306a36Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 3962306a36Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 4062306a36Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4162306a36Sopenharmony_ci * 4262306a36Sopenharmony_ci * Author: Tom Tucker <tom@opengridcomputing.com> 4362306a36Sopenharmony_ci */ 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci#include <linux/interrupt.h> 4662306a36Sopenharmony_ci#include <linux/sched.h> 4762306a36Sopenharmony_ci#include <linux/slab.h> 4862306a36Sopenharmony_ci#include <linux/spinlock.h> 4962306a36Sopenharmony_ci#include <linux/workqueue.h> 5062306a36Sopenharmony_ci#include <linux/export.h> 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci#include <rdma/ib_verbs.h> 5362306a36Sopenharmony_ci#include <rdma/rdma_cm.h> 5462306a36Sopenharmony_ci#include <rdma/rw.h> 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci#include <linux/sunrpc/addr.h> 5762306a36Sopenharmony_ci#include <linux/sunrpc/debug.h> 5862306a36Sopenharmony_ci#include <linux/sunrpc/svc_xprt.h> 5962306a36Sopenharmony_ci#include <linux/sunrpc/svc_rdma.h> 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci#include "xprt_rdma.h" 6262306a36Sopenharmony_ci#include <trace/events/rpcrdma.h> 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci#define RPCDBG_FACILITY RPCDBG_SVCXPRT 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_cistatic struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, 6762306a36Sopenharmony_ci struct net *net, int node); 6862306a36Sopenharmony_cistatic struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 6962306a36Sopenharmony_ci struct net *net, 7062306a36Sopenharmony_ci struct sockaddr *sa, int salen, 7162306a36Sopenharmony_ci int flags); 7262306a36Sopenharmony_cistatic struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); 7362306a36Sopenharmony_cistatic void svc_rdma_detach(struct svc_xprt *xprt); 7462306a36Sopenharmony_cistatic void svc_rdma_free(struct svc_xprt *xprt); 7562306a36Sopenharmony_cistatic int svc_rdma_has_wspace(struct svc_xprt *xprt); 7662306a36Sopenharmony_cistatic void svc_rdma_kill_temp_xprt(struct svc_xprt *); 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_cistatic const struct svc_xprt_ops svc_rdma_ops = { 7962306a36Sopenharmony_ci .xpo_create = svc_rdma_create, 8062306a36Sopenharmony_ci .xpo_recvfrom = svc_rdma_recvfrom, 8162306a36Sopenharmony_ci .xpo_sendto = svc_rdma_sendto, 8262306a36Sopenharmony_ci .xpo_result_payload = svc_rdma_result_payload, 8362306a36Sopenharmony_ci .xpo_release_ctxt = svc_rdma_release_ctxt, 8462306a36Sopenharmony_ci .xpo_detach = svc_rdma_detach, 8562306a36Sopenharmony_ci .xpo_free = svc_rdma_free, 8662306a36Sopenharmony_ci .xpo_has_wspace = svc_rdma_has_wspace, 8762306a36Sopenharmony_ci .xpo_accept = svc_rdma_accept, 8862306a36Sopenharmony_ci .xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt, 8962306a36Sopenharmony_ci}; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_cistruct svc_xprt_class svc_rdma_class = { 9262306a36Sopenharmony_ci .xcl_name = "rdma", 9362306a36Sopenharmony_ci .xcl_owner = THIS_MODULE, 9462306a36Sopenharmony_ci .xcl_ops = &svc_rdma_ops, 9562306a36Sopenharmony_ci .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA, 9662306a36Sopenharmony_ci .xcl_ident = XPRT_TRANSPORT_RDMA, 9762306a36Sopenharmony_ci}; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci/* QP event handler */ 10062306a36Sopenharmony_cistatic void qp_event_handler(struct ib_event *event, void *context) 10162306a36Sopenharmony_ci{ 10262306a36Sopenharmony_ci struct svc_xprt *xprt = context; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci trace_svcrdma_qp_error(event, (struct sockaddr *)&xprt->xpt_remote); 10562306a36Sopenharmony_ci switch (event->event) { 10662306a36Sopenharmony_ci /* These are considered benign events */ 10762306a36Sopenharmony_ci case IB_EVENT_PATH_MIG: 10862306a36Sopenharmony_ci case IB_EVENT_COMM_EST: 10962306a36Sopenharmony_ci case IB_EVENT_SQ_DRAINED: 11062306a36Sopenharmony_ci case IB_EVENT_QP_LAST_WQE_REACHED: 11162306a36Sopenharmony_ci break; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci /* These are considered fatal events */ 11462306a36Sopenharmony_ci case IB_EVENT_PATH_MIG_ERR: 11562306a36Sopenharmony_ci case IB_EVENT_QP_FATAL: 11662306a36Sopenharmony_ci case IB_EVENT_QP_REQ_ERR: 11762306a36Sopenharmony_ci case IB_EVENT_QP_ACCESS_ERR: 11862306a36Sopenharmony_ci case IB_EVENT_DEVICE_FATAL: 11962306a36Sopenharmony_ci default: 12062306a36Sopenharmony_ci svc_xprt_deferred_close(xprt); 12162306a36Sopenharmony_ci break; 12262306a36Sopenharmony_ci } 12362306a36Sopenharmony_ci} 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_cistatic struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, 12662306a36Sopenharmony_ci struct net *net, int node) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci struct svcxprt_rdma *cma_xprt; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node); 13162306a36Sopenharmony_ci if (!cma_xprt) 13262306a36Sopenharmony_ci return NULL; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv); 13562306a36Sopenharmony_ci INIT_LIST_HEAD(&cma_xprt->sc_accept_q); 13662306a36Sopenharmony_ci INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); 13762306a36Sopenharmony_ci init_llist_head(&cma_xprt->sc_send_ctxts); 13862306a36Sopenharmony_ci init_llist_head(&cma_xprt->sc_recv_ctxts); 13962306a36Sopenharmony_ci init_llist_head(&cma_xprt->sc_rw_ctxts); 14062306a36Sopenharmony_ci init_waitqueue_head(&cma_xprt->sc_send_wait); 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci spin_lock_init(&cma_xprt->sc_lock); 14362306a36Sopenharmony_ci spin_lock_init(&cma_xprt->sc_rq_dto_lock); 14462306a36Sopenharmony_ci spin_lock_init(&cma_xprt->sc_send_lock); 14562306a36Sopenharmony_ci spin_lock_init(&cma_xprt->sc_rw_ctxt_lock); 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci /* 14862306a36Sopenharmony_ci * Note that this implies that the underlying transport support 14962306a36Sopenharmony_ci * has some form of congestion control (see RFC 7530 section 3.1 15062306a36Sopenharmony_ci * paragraph 2). For now, we assume that all supported RDMA 15162306a36Sopenharmony_ci * transports are suitable here. 15262306a36Sopenharmony_ci */ 15362306a36Sopenharmony_ci set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags); 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci return cma_xprt; 15662306a36Sopenharmony_ci} 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_cistatic void 15962306a36Sopenharmony_cisvc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt, 16062306a36Sopenharmony_ci struct rdma_conn_param *param) 16162306a36Sopenharmony_ci{ 16262306a36Sopenharmony_ci const struct rpcrdma_connect_private *pmsg = param->private_data; 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci if (pmsg && 16562306a36Sopenharmony_ci pmsg->cp_magic == rpcrdma_cmp_magic && 16662306a36Sopenharmony_ci pmsg->cp_version == RPCRDMA_CMP_VERSION) { 16762306a36Sopenharmony_ci newxprt->sc_snd_w_inv = pmsg->cp_flags & 16862306a36Sopenharmony_ci RPCRDMA_CMP_F_SND_W_INV_OK; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci dprintk("svcrdma: client send_size %u, recv_size %u " 17162306a36Sopenharmony_ci "remote inv %ssupported\n", 17262306a36Sopenharmony_ci rpcrdma_decode_buffer_size(pmsg->cp_send_size), 17362306a36Sopenharmony_ci rpcrdma_decode_buffer_size(pmsg->cp_recv_size), 17462306a36Sopenharmony_ci newxprt->sc_snd_w_inv ? "" : "un"); 17562306a36Sopenharmony_ci } 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci/* 17962306a36Sopenharmony_ci * This function handles the CONNECT_REQUEST event on a listening 18062306a36Sopenharmony_ci * endpoint. It is passed the cma_id for the _new_ connection. The context in 18162306a36Sopenharmony_ci * this cma_id is inherited from the listening cma_id and is the svc_xprt 18262306a36Sopenharmony_ci * structure for the listening endpoint. 18362306a36Sopenharmony_ci * 18462306a36Sopenharmony_ci * This function creates a new xprt for the new connection and enqueues it on 18562306a36Sopenharmony_ci * the accept queue for the listent xprt. When the listen thread is kicked, it 18662306a36Sopenharmony_ci * will call the recvfrom method on the listen xprt which will accept the new 18762306a36Sopenharmony_ci * connection. 18862306a36Sopenharmony_ci */ 18962306a36Sopenharmony_cistatic void handle_connect_req(struct rdma_cm_id *new_cma_id, 19062306a36Sopenharmony_ci struct rdma_conn_param *param) 19162306a36Sopenharmony_ci{ 19262306a36Sopenharmony_ci struct svcxprt_rdma *listen_xprt = new_cma_id->context; 19362306a36Sopenharmony_ci struct svcxprt_rdma *newxprt; 19462306a36Sopenharmony_ci struct sockaddr *sa; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 19762306a36Sopenharmony_ci listen_xprt->sc_xprt.xpt_net, 19862306a36Sopenharmony_ci ibdev_to_node(new_cma_id->device)); 19962306a36Sopenharmony_ci if (!newxprt) 20062306a36Sopenharmony_ci return; 20162306a36Sopenharmony_ci newxprt->sc_cm_id = new_cma_id; 20262306a36Sopenharmony_ci new_cma_id->context = newxprt; 20362306a36Sopenharmony_ci svc_rdma_parse_connect_private(newxprt, param); 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci /* Save client advertised inbound read limit for use later in accept. */ 20662306a36Sopenharmony_ci newxprt->sc_ord = param->initiator_depth; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; 20962306a36Sopenharmony_ci newxprt->sc_xprt.xpt_remotelen = svc_addr_len(sa); 21062306a36Sopenharmony_ci memcpy(&newxprt->sc_xprt.xpt_remote, sa, 21162306a36Sopenharmony_ci newxprt->sc_xprt.xpt_remotelen); 21262306a36Sopenharmony_ci snprintf(newxprt->sc_xprt.xpt_remotebuf, 21362306a36Sopenharmony_ci sizeof(newxprt->sc_xprt.xpt_remotebuf) - 1, "%pISc", sa); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci /* The remote port is arbitrary and not under the control of the 21662306a36Sopenharmony_ci * client ULP. Set it to a fixed value so that the DRC continues 21762306a36Sopenharmony_ci * to be effective after a reconnect. 21862306a36Sopenharmony_ci */ 21962306a36Sopenharmony_ci rpc_set_port((struct sockaddr *)&newxprt->sc_xprt.xpt_remote, 0); 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; 22262306a36Sopenharmony_ci svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci /* 22562306a36Sopenharmony_ci * Enqueue the new transport on the accept queue of the listening 22662306a36Sopenharmony_ci * transport 22762306a36Sopenharmony_ci */ 22862306a36Sopenharmony_ci spin_lock(&listen_xprt->sc_lock); 22962306a36Sopenharmony_ci list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q); 23062306a36Sopenharmony_ci spin_unlock(&listen_xprt->sc_lock); 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags); 23362306a36Sopenharmony_ci svc_xprt_enqueue(&listen_xprt->sc_xprt); 23462306a36Sopenharmony_ci} 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci/** 23762306a36Sopenharmony_ci * svc_rdma_listen_handler - Handle CM events generated on a listening endpoint 23862306a36Sopenharmony_ci * @cma_id: the server's listener rdma_cm_id 23962306a36Sopenharmony_ci * @event: details of the event 24062306a36Sopenharmony_ci * 24162306a36Sopenharmony_ci * Return values: 24262306a36Sopenharmony_ci * %0: Do not destroy @cma_id 24362306a36Sopenharmony_ci * %1: Destroy @cma_id (never returned here) 24462306a36Sopenharmony_ci * 24562306a36Sopenharmony_ci * NB: There is never a DEVICE_REMOVAL event for INADDR_ANY listeners. 24662306a36Sopenharmony_ci */ 24762306a36Sopenharmony_cistatic int svc_rdma_listen_handler(struct rdma_cm_id *cma_id, 24862306a36Sopenharmony_ci struct rdma_cm_event *event) 24962306a36Sopenharmony_ci{ 25062306a36Sopenharmony_ci switch (event->event) { 25162306a36Sopenharmony_ci case RDMA_CM_EVENT_CONNECT_REQUEST: 25262306a36Sopenharmony_ci handle_connect_req(cma_id, &event->param.conn); 25362306a36Sopenharmony_ci break; 25462306a36Sopenharmony_ci default: 25562306a36Sopenharmony_ci break; 25662306a36Sopenharmony_ci } 25762306a36Sopenharmony_ci return 0; 25862306a36Sopenharmony_ci} 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci/** 26162306a36Sopenharmony_ci * svc_rdma_cma_handler - Handle CM events on client connections 26262306a36Sopenharmony_ci * @cma_id: the server's listener rdma_cm_id 26362306a36Sopenharmony_ci * @event: details of the event 26462306a36Sopenharmony_ci * 26562306a36Sopenharmony_ci * Return values: 26662306a36Sopenharmony_ci * %0: Do not destroy @cma_id 26762306a36Sopenharmony_ci * %1: Destroy @cma_id (never returned here) 26862306a36Sopenharmony_ci */ 26962306a36Sopenharmony_cistatic int svc_rdma_cma_handler(struct rdma_cm_id *cma_id, 27062306a36Sopenharmony_ci struct rdma_cm_event *event) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci struct svcxprt_rdma *rdma = cma_id->context; 27362306a36Sopenharmony_ci struct svc_xprt *xprt = &rdma->sc_xprt; 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci switch (event->event) { 27662306a36Sopenharmony_ci case RDMA_CM_EVENT_ESTABLISHED: 27762306a36Sopenharmony_ci clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci /* Handle any requests that were received while 28062306a36Sopenharmony_ci * CONN_PENDING was set. */ 28162306a36Sopenharmony_ci svc_xprt_enqueue(xprt); 28262306a36Sopenharmony_ci break; 28362306a36Sopenharmony_ci case RDMA_CM_EVENT_DISCONNECTED: 28462306a36Sopenharmony_ci case RDMA_CM_EVENT_DEVICE_REMOVAL: 28562306a36Sopenharmony_ci svc_xprt_deferred_close(xprt); 28662306a36Sopenharmony_ci break; 28762306a36Sopenharmony_ci default: 28862306a36Sopenharmony_ci break; 28962306a36Sopenharmony_ci } 29062306a36Sopenharmony_ci return 0; 29162306a36Sopenharmony_ci} 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci/* 29462306a36Sopenharmony_ci * Create a listening RDMA service endpoint. 29562306a36Sopenharmony_ci */ 29662306a36Sopenharmony_cistatic struct svc_xprt *svc_rdma_create(struct svc_serv *serv, 29762306a36Sopenharmony_ci struct net *net, 29862306a36Sopenharmony_ci struct sockaddr *sa, int salen, 29962306a36Sopenharmony_ci int flags) 30062306a36Sopenharmony_ci{ 30162306a36Sopenharmony_ci struct rdma_cm_id *listen_id; 30262306a36Sopenharmony_ci struct svcxprt_rdma *cma_xprt; 30362306a36Sopenharmony_ci int ret; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) 30662306a36Sopenharmony_ci return ERR_PTR(-EAFNOSUPPORT); 30762306a36Sopenharmony_ci cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE); 30862306a36Sopenharmony_ci if (!cma_xprt) 30962306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 31062306a36Sopenharmony_ci set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); 31162306a36Sopenharmony_ci strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener"); 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci listen_id = rdma_create_id(net, svc_rdma_listen_handler, cma_xprt, 31462306a36Sopenharmony_ci RDMA_PS_TCP, IB_QPT_RC); 31562306a36Sopenharmony_ci if (IS_ERR(listen_id)) { 31662306a36Sopenharmony_ci ret = PTR_ERR(listen_id); 31762306a36Sopenharmony_ci goto err0; 31862306a36Sopenharmony_ci } 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci /* Allow both IPv4 and IPv6 sockets to bind a single port 32162306a36Sopenharmony_ci * at the same time. 32262306a36Sopenharmony_ci */ 32362306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 32462306a36Sopenharmony_ci ret = rdma_set_afonly(listen_id, 1); 32562306a36Sopenharmony_ci if (ret) 32662306a36Sopenharmony_ci goto err1; 32762306a36Sopenharmony_ci#endif 32862306a36Sopenharmony_ci ret = rdma_bind_addr(listen_id, sa); 32962306a36Sopenharmony_ci if (ret) 33062306a36Sopenharmony_ci goto err1; 33162306a36Sopenharmony_ci cma_xprt->sc_cm_id = listen_id; 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); 33462306a36Sopenharmony_ci if (ret) 33562306a36Sopenharmony_ci goto err1; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci /* 33862306a36Sopenharmony_ci * We need to use the address from the cm_id in case the 33962306a36Sopenharmony_ci * caller specified 0 for the port number. 34062306a36Sopenharmony_ci */ 34162306a36Sopenharmony_ci sa = (struct sockaddr *)&cma_xprt->sc_cm_id->route.addr.src_addr; 34262306a36Sopenharmony_ci svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci return &cma_xprt->sc_xprt; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci err1: 34762306a36Sopenharmony_ci rdma_destroy_id(listen_id); 34862306a36Sopenharmony_ci err0: 34962306a36Sopenharmony_ci kfree(cma_xprt); 35062306a36Sopenharmony_ci return ERR_PTR(ret); 35162306a36Sopenharmony_ci} 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci/* 35462306a36Sopenharmony_ci * This is the xpo_recvfrom function for listening endpoints. Its 35562306a36Sopenharmony_ci * purpose is to accept incoming connections. The CMA callback handler 35662306a36Sopenharmony_ci * has already created a new transport and attached it to the new CMA 35762306a36Sopenharmony_ci * ID. 35862306a36Sopenharmony_ci * 35962306a36Sopenharmony_ci * There is a queue of pending connections hung on the listening 36062306a36Sopenharmony_ci * transport. This queue contains the new svc_xprt structure. This 36162306a36Sopenharmony_ci * function takes svc_xprt structures off the accept_q and completes 36262306a36Sopenharmony_ci * the connection. 36362306a36Sopenharmony_ci */ 36462306a36Sopenharmony_cistatic struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) 36562306a36Sopenharmony_ci{ 36662306a36Sopenharmony_ci struct svcxprt_rdma *listen_rdma; 36762306a36Sopenharmony_ci struct svcxprt_rdma *newxprt = NULL; 36862306a36Sopenharmony_ci struct rdma_conn_param conn_param; 36962306a36Sopenharmony_ci struct rpcrdma_connect_private pmsg; 37062306a36Sopenharmony_ci struct ib_qp_init_attr qp_attr; 37162306a36Sopenharmony_ci unsigned int ctxts, rq_depth; 37262306a36Sopenharmony_ci struct ib_device *dev; 37362306a36Sopenharmony_ci int ret = 0; 37462306a36Sopenharmony_ci RPC_IFDEBUG(struct sockaddr *sap); 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); 37762306a36Sopenharmony_ci clear_bit(XPT_CONN, &xprt->xpt_flags); 37862306a36Sopenharmony_ci /* Get the next entry off the accept list */ 37962306a36Sopenharmony_ci spin_lock(&listen_rdma->sc_lock); 38062306a36Sopenharmony_ci if (!list_empty(&listen_rdma->sc_accept_q)) { 38162306a36Sopenharmony_ci newxprt = list_entry(listen_rdma->sc_accept_q.next, 38262306a36Sopenharmony_ci struct svcxprt_rdma, sc_accept_q); 38362306a36Sopenharmony_ci list_del_init(&newxprt->sc_accept_q); 38462306a36Sopenharmony_ci } 38562306a36Sopenharmony_ci if (!list_empty(&listen_rdma->sc_accept_q)) 38662306a36Sopenharmony_ci set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags); 38762306a36Sopenharmony_ci spin_unlock(&listen_rdma->sc_lock); 38862306a36Sopenharmony_ci if (!newxprt) 38962306a36Sopenharmony_ci return NULL; 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci dev = newxprt->sc_cm_id->device; 39262306a36Sopenharmony_ci newxprt->sc_port_num = newxprt->sc_cm_id->port_num; 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci /* Qualify the transport resource defaults with the 39562306a36Sopenharmony_ci * capabilities of this particular device */ 39662306a36Sopenharmony_ci /* Transport header, head iovec, tail iovec */ 39762306a36Sopenharmony_ci newxprt->sc_max_send_sges = 3; 39862306a36Sopenharmony_ci /* Add one SGE per page list entry */ 39962306a36Sopenharmony_ci newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1; 40062306a36Sopenharmony_ci if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) 40162306a36Sopenharmony_ci newxprt->sc_max_send_sges = dev->attrs.max_send_sge; 40262306a36Sopenharmony_ci newxprt->sc_max_req_size = svcrdma_max_req_size; 40362306a36Sopenharmony_ci newxprt->sc_max_requests = svcrdma_max_requests; 40462306a36Sopenharmony_ci newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; 40562306a36Sopenharmony_ci newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH; 40662306a36Sopenharmony_ci rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests + 40762306a36Sopenharmony_ci newxprt->sc_recv_batch; 40862306a36Sopenharmony_ci if (rq_depth > dev->attrs.max_qp_wr) { 40962306a36Sopenharmony_ci pr_warn("svcrdma: reducing receive depth to %d\n", 41062306a36Sopenharmony_ci dev->attrs.max_qp_wr); 41162306a36Sopenharmony_ci rq_depth = dev->attrs.max_qp_wr; 41262306a36Sopenharmony_ci newxprt->sc_recv_batch = 1; 41362306a36Sopenharmony_ci newxprt->sc_max_requests = rq_depth - 2; 41462306a36Sopenharmony_ci newxprt->sc_max_bc_requests = 2; 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); 41762306a36Sopenharmony_ci ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES); 41862306a36Sopenharmony_ci ctxts *= newxprt->sc_max_requests; 41962306a36Sopenharmony_ci newxprt->sc_sq_depth = rq_depth + ctxts; 42062306a36Sopenharmony_ci if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) { 42162306a36Sopenharmony_ci pr_warn("svcrdma: reducing send depth to %d\n", 42262306a36Sopenharmony_ci dev->attrs.max_qp_wr); 42362306a36Sopenharmony_ci newxprt->sc_sq_depth = dev->attrs.max_qp_wr; 42462306a36Sopenharmony_ci } 42562306a36Sopenharmony_ci atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci newxprt->sc_pd = ib_alloc_pd(dev, 0); 42862306a36Sopenharmony_ci if (IS_ERR(newxprt->sc_pd)) { 42962306a36Sopenharmony_ci trace_svcrdma_pd_err(newxprt, PTR_ERR(newxprt->sc_pd)); 43062306a36Sopenharmony_ci goto errout; 43162306a36Sopenharmony_ci } 43262306a36Sopenharmony_ci newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth, 43362306a36Sopenharmony_ci IB_POLL_WORKQUEUE); 43462306a36Sopenharmony_ci if (IS_ERR(newxprt->sc_sq_cq)) 43562306a36Sopenharmony_ci goto errout; 43662306a36Sopenharmony_ci newxprt->sc_rq_cq = 43762306a36Sopenharmony_ci ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE); 43862306a36Sopenharmony_ci if (IS_ERR(newxprt->sc_rq_cq)) 43962306a36Sopenharmony_ci goto errout; 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci memset(&qp_attr, 0, sizeof qp_attr); 44262306a36Sopenharmony_ci qp_attr.event_handler = qp_event_handler; 44362306a36Sopenharmony_ci qp_attr.qp_context = &newxprt->sc_xprt; 44462306a36Sopenharmony_ci qp_attr.port_num = newxprt->sc_port_num; 44562306a36Sopenharmony_ci qp_attr.cap.max_rdma_ctxs = ctxts; 44662306a36Sopenharmony_ci qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts; 44762306a36Sopenharmony_ci qp_attr.cap.max_recv_wr = rq_depth; 44862306a36Sopenharmony_ci qp_attr.cap.max_send_sge = newxprt->sc_max_send_sges; 44962306a36Sopenharmony_ci qp_attr.cap.max_recv_sge = 1; 45062306a36Sopenharmony_ci qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 45162306a36Sopenharmony_ci qp_attr.qp_type = IB_QPT_RC; 45262306a36Sopenharmony_ci qp_attr.send_cq = newxprt->sc_sq_cq; 45362306a36Sopenharmony_ci qp_attr.recv_cq = newxprt->sc_rq_cq; 45462306a36Sopenharmony_ci dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n", 45562306a36Sopenharmony_ci newxprt->sc_cm_id, newxprt->sc_pd); 45662306a36Sopenharmony_ci dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n", 45762306a36Sopenharmony_ci qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr); 45862306a36Sopenharmony_ci dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n", 45962306a36Sopenharmony_ci qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge); 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); 46262306a36Sopenharmony_ci if (ret) { 46362306a36Sopenharmony_ci trace_svcrdma_qp_err(newxprt, ret); 46462306a36Sopenharmony_ci goto errout; 46562306a36Sopenharmony_ci } 46662306a36Sopenharmony_ci newxprt->sc_qp = newxprt->sc_cm_id->qp; 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) 46962306a36Sopenharmony_ci newxprt->sc_snd_w_inv = false; 47062306a36Sopenharmony_ci if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) && 47162306a36Sopenharmony_ci !rdma_ib_or_roce(dev, newxprt->sc_port_num)) { 47262306a36Sopenharmony_ci trace_svcrdma_fabric_err(newxprt, -EINVAL); 47362306a36Sopenharmony_ci goto errout; 47462306a36Sopenharmony_ci } 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci if (!svc_rdma_post_recvs(newxprt)) 47762306a36Sopenharmony_ci goto errout; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci /* Construct RDMA-CM private message */ 48062306a36Sopenharmony_ci pmsg.cp_magic = rpcrdma_cmp_magic; 48162306a36Sopenharmony_ci pmsg.cp_version = RPCRDMA_CMP_VERSION; 48262306a36Sopenharmony_ci pmsg.cp_flags = 0; 48362306a36Sopenharmony_ci pmsg.cp_send_size = pmsg.cp_recv_size = 48462306a36Sopenharmony_ci rpcrdma_encode_buffer_size(newxprt->sc_max_req_size); 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci /* Accept Connection */ 48762306a36Sopenharmony_ci set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); 48862306a36Sopenharmony_ci memset(&conn_param, 0, sizeof conn_param); 48962306a36Sopenharmony_ci conn_param.responder_resources = 0; 49062306a36Sopenharmony_ci conn_param.initiator_depth = min_t(int, newxprt->sc_ord, 49162306a36Sopenharmony_ci dev->attrs.max_qp_init_rd_atom); 49262306a36Sopenharmony_ci if (!conn_param.initiator_depth) { 49362306a36Sopenharmony_ci ret = -EINVAL; 49462306a36Sopenharmony_ci trace_svcrdma_initdepth_err(newxprt, ret); 49562306a36Sopenharmony_ci goto errout; 49662306a36Sopenharmony_ci } 49762306a36Sopenharmony_ci conn_param.private_data = &pmsg; 49862306a36Sopenharmony_ci conn_param.private_data_len = sizeof(pmsg); 49962306a36Sopenharmony_ci rdma_lock_handler(newxprt->sc_cm_id); 50062306a36Sopenharmony_ci newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler; 50162306a36Sopenharmony_ci ret = rdma_accept(newxprt->sc_cm_id, &conn_param); 50262306a36Sopenharmony_ci rdma_unlock_handler(newxprt->sc_cm_id); 50362306a36Sopenharmony_ci if (ret) { 50462306a36Sopenharmony_ci trace_svcrdma_accept_err(newxprt, ret); 50562306a36Sopenharmony_ci goto errout; 50662306a36Sopenharmony_ci } 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 50962306a36Sopenharmony_ci dprintk("svcrdma: new connection %p accepted:\n", newxprt); 51062306a36Sopenharmony_ci sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; 51162306a36Sopenharmony_ci dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap)); 51262306a36Sopenharmony_ci sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; 51362306a36Sopenharmony_ci dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); 51462306a36Sopenharmony_ci dprintk(" max_sge : %d\n", newxprt->sc_max_send_sges); 51562306a36Sopenharmony_ci dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); 51662306a36Sopenharmony_ci dprintk(" rdma_rw_ctxs : %d\n", ctxts); 51762306a36Sopenharmony_ci dprintk(" max_requests : %d\n", newxprt->sc_max_requests); 51862306a36Sopenharmony_ci dprintk(" ord : %d\n", conn_param.initiator_depth); 51962306a36Sopenharmony_ci#endif 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci return &newxprt->sc_xprt; 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci errout: 52462306a36Sopenharmony_ci /* Take a reference in case the DTO handler runs */ 52562306a36Sopenharmony_ci svc_xprt_get(&newxprt->sc_xprt); 52662306a36Sopenharmony_ci if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) 52762306a36Sopenharmony_ci ib_destroy_qp(newxprt->sc_qp); 52862306a36Sopenharmony_ci rdma_destroy_id(newxprt->sc_cm_id); 52962306a36Sopenharmony_ci /* This call to put will destroy the transport */ 53062306a36Sopenharmony_ci svc_xprt_put(&newxprt->sc_xprt); 53162306a36Sopenharmony_ci return NULL; 53262306a36Sopenharmony_ci} 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_cistatic void svc_rdma_detach(struct svc_xprt *xprt) 53562306a36Sopenharmony_ci{ 53662306a36Sopenharmony_ci struct svcxprt_rdma *rdma = 53762306a36Sopenharmony_ci container_of(xprt, struct svcxprt_rdma, sc_xprt); 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci rdma_disconnect(rdma->sc_cm_id); 54062306a36Sopenharmony_ci} 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_cistatic void __svc_rdma_free(struct work_struct *work) 54362306a36Sopenharmony_ci{ 54462306a36Sopenharmony_ci struct svcxprt_rdma *rdma = 54562306a36Sopenharmony_ci container_of(work, struct svcxprt_rdma, sc_work); 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci /* This blocks until the Completion Queues are empty */ 54862306a36Sopenharmony_ci if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 54962306a36Sopenharmony_ci ib_drain_qp(rdma->sc_qp); 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci svc_rdma_flush_recv_queues(rdma); 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci svc_rdma_destroy_rw_ctxts(rdma); 55462306a36Sopenharmony_ci svc_rdma_send_ctxts_destroy(rdma); 55562306a36Sopenharmony_ci svc_rdma_recv_ctxts_destroy(rdma); 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci /* Destroy the QP if present (not a listener) */ 55862306a36Sopenharmony_ci if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) 55962306a36Sopenharmony_ci ib_destroy_qp(rdma->sc_qp); 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) 56262306a36Sopenharmony_ci ib_free_cq(rdma->sc_sq_cq); 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq)) 56562306a36Sopenharmony_ci ib_free_cq(rdma->sc_rq_cq); 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) 56862306a36Sopenharmony_ci ib_dealloc_pd(rdma->sc_pd); 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci /* Destroy the CM ID */ 57162306a36Sopenharmony_ci rdma_destroy_id(rdma->sc_cm_id); 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci kfree(rdma); 57462306a36Sopenharmony_ci} 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_cistatic void svc_rdma_free(struct svc_xprt *xprt) 57762306a36Sopenharmony_ci{ 57862306a36Sopenharmony_ci struct svcxprt_rdma *rdma = 57962306a36Sopenharmony_ci container_of(xprt, struct svcxprt_rdma, sc_xprt); 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci INIT_WORK(&rdma->sc_work, __svc_rdma_free); 58262306a36Sopenharmony_ci schedule_work(&rdma->sc_work); 58362306a36Sopenharmony_ci} 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_cistatic int svc_rdma_has_wspace(struct svc_xprt *xprt) 58662306a36Sopenharmony_ci{ 58762306a36Sopenharmony_ci struct svcxprt_rdma *rdma = 58862306a36Sopenharmony_ci container_of(xprt, struct svcxprt_rdma, sc_xprt); 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci /* 59162306a36Sopenharmony_ci * If there are already waiters on the SQ, 59262306a36Sopenharmony_ci * return false. 59362306a36Sopenharmony_ci */ 59462306a36Sopenharmony_ci if (waitqueue_active(&rdma->sc_send_wait)) 59562306a36Sopenharmony_ci return 0; 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci /* Otherwise return true. */ 59862306a36Sopenharmony_ci return 1; 59962306a36Sopenharmony_ci} 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_cistatic void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) 60262306a36Sopenharmony_ci{ 60362306a36Sopenharmony_ci} 604