18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two
58c2ecf20Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the
88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below:
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
118c2ecf20Sopenharmony_ci *     without modification, are permitted provided that the following
128c2ecf20Sopenharmony_ci *     conditions are met:
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci *      - Redistributions of source code must retain the above
158c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
168c2ecf20Sopenharmony_ci *        disclaimer.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
198c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
208c2ecf20Sopenharmony_ci *        disclaimer in the documentation and/or other materials
218c2ecf20Sopenharmony_ci *        provided with the distribution.
228c2ecf20Sopenharmony_ci *
238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
308c2ecf20Sopenharmony_ci * SOFTWARE.
318c2ecf20Sopenharmony_ci *
328c2ecf20Sopenharmony_ci */
338c2ecf20Sopenharmony_ci#include <linux/kernel.h>
348c2ecf20Sopenharmony_ci#include <linux/in.h>
358c2ecf20Sopenharmony_ci#include <linux/slab.h>
368c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
378c2ecf20Sopenharmony_ci#include <linux/ratelimit.h>
388c2ecf20Sopenharmony_ci#include <net/addrconf.h>
398c2ecf20Sopenharmony_ci#include <rdma/ib_cm.h>
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci#include "rds_single_path.h"
428c2ecf20Sopenharmony_ci#include "rds.h"
438c2ecf20Sopenharmony_ci#include "ib.h"
448c2ecf20Sopenharmony_ci#include "ib_mr.h"
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci/*
478c2ecf20Sopenharmony_ci * Set the selected protocol version
488c2ecf20Sopenharmony_ci */
498c2ecf20Sopenharmony_cistatic void rds_ib_set_protocol(struct rds_connection *conn, unsigned int version)
508c2ecf20Sopenharmony_ci{
518c2ecf20Sopenharmony_ci	conn->c_version = version;
528c2ecf20Sopenharmony_ci}
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci/*
558c2ecf20Sopenharmony_ci * Set up flow control
568c2ecf20Sopenharmony_ci */
578c2ecf20Sopenharmony_cistatic void rds_ib_set_flow_control(struct rds_connection *conn, u32 credits)
588c2ecf20Sopenharmony_ci{
598c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	if (rds_ib_sysctl_flow_control && credits != 0) {
628c2ecf20Sopenharmony_ci		/* We're doing flow control */
638c2ecf20Sopenharmony_ci		ic->i_flowctl = 1;
648c2ecf20Sopenharmony_ci		rds_ib_send_add_credits(conn, credits);
658c2ecf20Sopenharmony_ci	} else {
668c2ecf20Sopenharmony_ci		ic->i_flowctl = 0;
678c2ecf20Sopenharmony_ci	}
688c2ecf20Sopenharmony_ci}
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci/*
718c2ecf20Sopenharmony_ci * Tune RNR behavior. Without flow control, we use a rather
728c2ecf20Sopenharmony_ci * low timeout, but not the absolute minimum - this should
738c2ecf20Sopenharmony_ci * be tunable.
748c2ecf20Sopenharmony_ci *
758c2ecf20Sopenharmony_ci * We already set the RNR retry count to 7 (which is the
768c2ecf20Sopenharmony_ci * smallest infinite number :-) above.
778c2ecf20Sopenharmony_ci * If flow control is off, we want to change this back to 0
788c2ecf20Sopenharmony_ci * so that we learn quickly when our credit accounting is
798c2ecf20Sopenharmony_ci * buggy.
808c2ecf20Sopenharmony_ci *
818c2ecf20Sopenharmony_ci * Caller passes in a qp_attr pointer - don't waste stack spacv
828c2ecf20Sopenharmony_ci * by allocation this twice.
838c2ecf20Sopenharmony_ci */
848c2ecf20Sopenharmony_cistatic void
858c2ecf20Sopenharmony_cirds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr)
868c2ecf20Sopenharmony_ci{
878c2ecf20Sopenharmony_ci	int ret;
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci	attr->min_rnr_timer = IB_RNR_TIMER_000_32;
908c2ecf20Sopenharmony_ci	ret = ib_modify_qp(ic->i_cm_id->qp, attr, IB_QP_MIN_RNR_TIMER);
918c2ecf20Sopenharmony_ci	if (ret)
928c2ecf20Sopenharmony_ci		printk(KERN_NOTICE "ib_modify_qp(IB_QP_MIN_RNR_TIMER): err=%d\n", -ret);
938c2ecf20Sopenharmony_ci}
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci/*
968c2ecf20Sopenharmony_ci * Connection established.
978c2ecf20Sopenharmony_ci * We get here for both outgoing and incoming connection.
988c2ecf20Sopenharmony_ci */
998c2ecf20Sopenharmony_civoid rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
1008c2ecf20Sopenharmony_ci{
1018c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
1028c2ecf20Sopenharmony_ci	const union rds_ib_conn_priv *dp = NULL;
1038c2ecf20Sopenharmony_ci	struct ib_qp_attr qp_attr;
1048c2ecf20Sopenharmony_ci	__be64 ack_seq = 0;
1058c2ecf20Sopenharmony_ci	__be32 credit = 0;
1068c2ecf20Sopenharmony_ci	u8 major = 0;
1078c2ecf20Sopenharmony_ci	u8 minor = 0;
1088c2ecf20Sopenharmony_ci	int err;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	dp = event->param.conn.private_data;
1118c2ecf20Sopenharmony_ci	if (conn->c_isv6) {
1128c2ecf20Sopenharmony_ci		if (event->param.conn.private_data_len >=
1138c2ecf20Sopenharmony_ci		    sizeof(struct rds6_ib_connect_private)) {
1148c2ecf20Sopenharmony_ci			major = dp->ricp_v6.dp_protocol_major;
1158c2ecf20Sopenharmony_ci			minor = dp->ricp_v6.dp_protocol_minor;
1168c2ecf20Sopenharmony_ci			credit = dp->ricp_v6.dp_credit;
1178c2ecf20Sopenharmony_ci			/* dp structure start is not guaranteed to be 8 bytes
1188c2ecf20Sopenharmony_ci			 * aligned.  Since dp_ack_seq is 64-bit extended load
1198c2ecf20Sopenharmony_ci			 * operations can be used so go through get_unaligned
1208c2ecf20Sopenharmony_ci			 * to avoid unaligned errors.
1218c2ecf20Sopenharmony_ci			 */
1228c2ecf20Sopenharmony_ci			ack_seq = get_unaligned(&dp->ricp_v6.dp_ack_seq);
1238c2ecf20Sopenharmony_ci		}
1248c2ecf20Sopenharmony_ci	} else if (event->param.conn.private_data_len >=
1258c2ecf20Sopenharmony_ci		   sizeof(struct rds_ib_connect_private)) {
1268c2ecf20Sopenharmony_ci		major = dp->ricp_v4.dp_protocol_major;
1278c2ecf20Sopenharmony_ci		minor = dp->ricp_v4.dp_protocol_minor;
1288c2ecf20Sopenharmony_ci		credit = dp->ricp_v4.dp_credit;
1298c2ecf20Sopenharmony_ci		ack_seq = get_unaligned(&dp->ricp_v4.dp_ack_seq);
1308c2ecf20Sopenharmony_ci	}
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	/* make sure it isn't empty data */
1338c2ecf20Sopenharmony_ci	if (major) {
1348c2ecf20Sopenharmony_ci		rds_ib_set_protocol(conn, RDS_PROTOCOL(major, minor));
1358c2ecf20Sopenharmony_ci		rds_ib_set_flow_control(conn, be32_to_cpu(credit));
1368c2ecf20Sopenharmony_ci	}
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	if (conn->c_version < RDS_PROTOCOL_VERSION) {
1398c2ecf20Sopenharmony_ci		if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) {
1408c2ecf20Sopenharmony_ci			pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n",
1418c2ecf20Sopenharmony_ci				  &conn->c_laddr, &conn->c_faddr,
1428c2ecf20Sopenharmony_ci				  RDS_PROTOCOL_MAJOR(conn->c_version),
1438c2ecf20Sopenharmony_ci				  RDS_PROTOCOL_MINOR(conn->c_version));
1448c2ecf20Sopenharmony_ci			rds_conn_destroy(conn);
1458c2ecf20Sopenharmony_ci			return;
1468c2ecf20Sopenharmony_ci		}
1478c2ecf20Sopenharmony_ci	}
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci	pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n",
1508c2ecf20Sopenharmony_ci		  ic->i_active_side ? "Active" : "Passive",
1518c2ecf20Sopenharmony_ci		  &conn->c_laddr, &conn->c_faddr, conn->c_tos,
1528c2ecf20Sopenharmony_ci		  RDS_PROTOCOL_MAJOR(conn->c_version),
1538c2ecf20Sopenharmony_ci		  RDS_PROTOCOL_MINOR(conn->c_version),
1548c2ecf20Sopenharmony_ci		  ic->i_flowctl ? ", flow control" : "");
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	/* receive sl from the peer */
1578c2ecf20Sopenharmony_ci	ic->i_sl = ic->i_cm_id->route.path_rec->sl;
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci	atomic_set(&ic->i_cq_quiesce, 0);
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	/* Init rings and fill recv. this needs to wait until protocol
1628c2ecf20Sopenharmony_ci	 * negotiation is complete, since ring layout is different
1638c2ecf20Sopenharmony_ci	 * from 3.1 to 4.1.
1648c2ecf20Sopenharmony_ci	 */
1658c2ecf20Sopenharmony_ci	rds_ib_send_init_ring(ic);
1668c2ecf20Sopenharmony_ci	rds_ib_recv_init_ring(ic);
1678c2ecf20Sopenharmony_ci	/* Post receive buffers - as a side effect, this will update
1688c2ecf20Sopenharmony_ci	 * the posted credit count. */
1698c2ecf20Sopenharmony_ci	rds_ib_recv_refill(conn, 1, GFP_KERNEL);
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	/* Tune RNR behavior */
1728c2ecf20Sopenharmony_ci	rds_ib_tune_rnr(ic, &qp_attr);
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci	qp_attr.qp_state = IB_QPS_RTS;
1758c2ecf20Sopenharmony_ci	err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
1768c2ecf20Sopenharmony_ci	if (err)
1778c2ecf20Sopenharmony_ci		printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	/* update ib_device with this local ipaddr */
1808c2ecf20Sopenharmony_ci	err = rds_ib_update_ipaddr(ic->rds_ibdev, &conn->c_laddr);
1818c2ecf20Sopenharmony_ci	if (err)
1828c2ecf20Sopenharmony_ci		printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
1838c2ecf20Sopenharmony_ci			err);
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci	/* If the peer gave us the last packet it saw, process this as if
1868c2ecf20Sopenharmony_ci	 * we had received a regular ACK. */
1878c2ecf20Sopenharmony_ci	if (dp) {
1888c2ecf20Sopenharmony_ci		if (ack_seq)
1898c2ecf20Sopenharmony_ci			rds_send_drop_acked(conn, be64_to_cpu(ack_seq),
1908c2ecf20Sopenharmony_ci					    NULL);
1918c2ecf20Sopenharmony_ci	}
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci	conn->c_proposed_version = conn->c_version;
1948c2ecf20Sopenharmony_ci	rds_connect_complete(conn);
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_cistatic void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
1988c2ecf20Sopenharmony_ci				      struct rdma_conn_param *conn_param,
1998c2ecf20Sopenharmony_ci				      union rds_ib_conn_priv *dp,
2008c2ecf20Sopenharmony_ci				      u32 protocol_version,
2018c2ecf20Sopenharmony_ci				      u32 max_responder_resources,
2028c2ecf20Sopenharmony_ci				      u32 max_initiator_depth,
2038c2ecf20Sopenharmony_ci				      bool isv6)
2048c2ecf20Sopenharmony_ci{
2058c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
2068c2ecf20Sopenharmony_ci	struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	memset(conn_param, 0, sizeof(struct rdma_conn_param));
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci	conn_param->responder_resources =
2118c2ecf20Sopenharmony_ci		min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources);
2128c2ecf20Sopenharmony_ci	conn_param->initiator_depth =
2138c2ecf20Sopenharmony_ci		min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth);
2148c2ecf20Sopenharmony_ci	conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7);
2158c2ecf20Sopenharmony_ci	conn_param->rnr_retry_count = 7;
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci	if (dp) {
2188c2ecf20Sopenharmony_ci		memset(dp, 0, sizeof(*dp));
2198c2ecf20Sopenharmony_ci		if (isv6) {
2208c2ecf20Sopenharmony_ci			dp->ricp_v6.dp_saddr = conn->c_laddr;
2218c2ecf20Sopenharmony_ci			dp->ricp_v6.dp_daddr = conn->c_faddr;
2228c2ecf20Sopenharmony_ci			dp->ricp_v6.dp_protocol_major =
2238c2ecf20Sopenharmony_ci			    RDS_PROTOCOL_MAJOR(protocol_version);
2248c2ecf20Sopenharmony_ci			dp->ricp_v6.dp_protocol_minor =
2258c2ecf20Sopenharmony_ci			    RDS_PROTOCOL_MINOR(protocol_version);
2268c2ecf20Sopenharmony_ci			dp->ricp_v6.dp_protocol_minor_mask =
2278c2ecf20Sopenharmony_ci			    cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
2288c2ecf20Sopenharmony_ci			dp->ricp_v6.dp_ack_seq =
2298c2ecf20Sopenharmony_ci			    cpu_to_be64(rds_ib_piggyb_ack(ic));
2308c2ecf20Sopenharmony_ci			dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos;
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci			conn_param->private_data = &dp->ricp_v6;
2338c2ecf20Sopenharmony_ci			conn_param->private_data_len = sizeof(dp->ricp_v6);
2348c2ecf20Sopenharmony_ci		} else {
2358c2ecf20Sopenharmony_ci			dp->ricp_v4.dp_saddr = conn->c_laddr.s6_addr32[3];
2368c2ecf20Sopenharmony_ci			dp->ricp_v4.dp_daddr = conn->c_faddr.s6_addr32[3];
2378c2ecf20Sopenharmony_ci			dp->ricp_v4.dp_protocol_major =
2388c2ecf20Sopenharmony_ci			    RDS_PROTOCOL_MAJOR(protocol_version);
2398c2ecf20Sopenharmony_ci			dp->ricp_v4.dp_protocol_minor =
2408c2ecf20Sopenharmony_ci			    RDS_PROTOCOL_MINOR(protocol_version);
2418c2ecf20Sopenharmony_ci			dp->ricp_v4.dp_protocol_minor_mask =
2428c2ecf20Sopenharmony_ci			    cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
2438c2ecf20Sopenharmony_ci			dp->ricp_v4.dp_ack_seq =
2448c2ecf20Sopenharmony_ci			    cpu_to_be64(rds_ib_piggyb_ack(ic));
2458c2ecf20Sopenharmony_ci			dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos;
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci			conn_param->private_data = &dp->ricp_v4;
2488c2ecf20Sopenharmony_ci			conn_param->private_data_len = sizeof(dp->ricp_v4);
2498c2ecf20Sopenharmony_ci		}
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci		/* Advertise flow control */
2528c2ecf20Sopenharmony_ci		if (ic->i_flowctl) {
2538c2ecf20Sopenharmony_ci			unsigned int credits;
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci			credits = IB_GET_POST_CREDITS
2568c2ecf20Sopenharmony_ci				(atomic_read(&ic->i_credits));
2578c2ecf20Sopenharmony_ci			if (isv6)
2588c2ecf20Sopenharmony_ci				dp->ricp_v6.dp_credit = cpu_to_be32(credits);
2598c2ecf20Sopenharmony_ci			else
2608c2ecf20Sopenharmony_ci				dp->ricp_v4.dp_credit = cpu_to_be32(credits);
2618c2ecf20Sopenharmony_ci			atomic_sub(IB_SET_POST_CREDITS(credits),
2628c2ecf20Sopenharmony_ci				   &ic->i_credits);
2638c2ecf20Sopenharmony_ci		}
2648c2ecf20Sopenharmony_ci	}
2658c2ecf20Sopenharmony_ci}
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_cistatic void rds_ib_cq_event_handler(struct ib_event *event, void *data)
2688c2ecf20Sopenharmony_ci{
2698c2ecf20Sopenharmony_ci	rdsdebug("event %u (%s) data %p\n",
2708c2ecf20Sopenharmony_ci		 event->event, ib_event_msg(event->event), data);
2718c2ecf20Sopenharmony_ci}
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci/* Plucking the oldest entry from the ring can be done concurrently with
2748c2ecf20Sopenharmony_ci * the thread refilling the ring.  Each ring operation is protected by
2758c2ecf20Sopenharmony_ci * spinlocks and the transient state of refilling doesn't change the
2768c2ecf20Sopenharmony_ci * recording of which entry is oldest.
2778c2ecf20Sopenharmony_ci *
2788c2ecf20Sopenharmony_ci * This relies on IB only calling one cq comp_handler for each cq so that
2798c2ecf20Sopenharmony_ci * there will only be one caller of rds_recv_incoming() per RDS connection.
2808c2ecf20Sopenharmony_ci */
2818c2ecf20Sopenharmony_cistatic void rds_ib_cq_comp_handler_recv(struct ib_cq *cq, void *context)
2828c2ecf20Sopenharmony_ci{
2838c2ecf20Sopenharmony_ci	struct rds_connection *conn = context;
2848c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci	rdsdebug("conn %p cq %p\n", conn, cq);
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci	rds_ib_stats_inc(s_ib_evt_handler_call);
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci	tasklet_schedule(&ic->i_recv_tasklet);
2918c2ecf20Sopenharmony_ci}
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_cistatic void poll_scq(struct rds_ib_connection *ic, struct ib_cq *cq,
2948c2ecf20Sopenharmony_ci		     struct ib_wc *wcs)
2958c2ecf20Sopenharmony_ci{
2968c2ecf20Sopenharmony_ci	int nr, i;
2978c2ecf20Sopenharmony_ci	struct ib_wc *wc;
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci	while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
3008c2ecf20Sopenharmony_ci		for (i = 0; i < nr; i++) {
3018c2ecf20Sopenharmony_ci			wc = wcs + i;
3028c2ecf20Sopenharmony_ci			rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
3038c2ecf20Sopenharmony_ci				 (unsigned long long)wc->wr_id, wc->status,
3048c2ecf20Sopenharmony_ci				 wc->byte_len, be32_to_cpu(wc->ex.imm_data));
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci			if (wc->wr_id <= ic->i_send_ring.w_nr ||
3078c2ecf20Sopenharmony_ci			    wc->wr_id == RDS_IB_ACK_WR_ID)
3088c2ecf20Sopenharmony_ci				rds_ib_send_cqe_handler(ic, wc);
3098c2ecf20Sopenharmony_ci			else
3108c2ecf20Sopenharmony_ci				rds_ib_mr_cqe_handler(ic, wc);
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci		}
3138c2ecf20Sopenharmony_ci	}
3148c2ecf20Sopenharmony_ci}
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_cistatic void rds_ib_tasklet_fn_send(unsigned long data)
3178c2ecf20Sopenharmony_ci{
3188c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
3198c2ecf20Sopenharmony_ci	struct rds_connection *conn = ic->conn;
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	rds_ib_stats_inc(s_ib_tasklet_call);
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci	/* if cq has been already reaped, ignore incoming cq event */
3248c2ecf20Sopenharmony_ci	if (atomic_read(&ic->i_cq_quiesce))
3258c2ecf20Sopenharmony_ci		return;
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
3288c2ecf20Sopenharmony_ci	ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
3298c2ecf20Sopenharmony_ci	poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	if (rds_conn_up(conn) &&
3328c2ecf20Sopenharmony_ci	    (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
3338c2ecf20Sopenharmony_ci	    test_bit(0, &conn->c_map_queued)))
3348c2ecf20Sopenharmony_ci		rds_send_xmit(&ic->conn->c_path[0]);
3358c2ecf20Sopenharmony_ci}
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_cistatic void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq,
3388c2ecf20Sopenharmony_ci		     struct ib_wc *wcs,
3398c2ecf20Sopenharmony_ci		     struct rds_ib_ack_state *ack_state)
3408c2ecf20Sopenharmony_ci{
3418c2ecf20Sopenharmony_ci	int nr, i;
3428c2ecf20Sopenharmony_ci	struct ib_wc *wc;
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
3458c2ecf20Sopenharmony_ci		for (i = 0; i < nr; i++) {
3468c2ecf20Sopenharmony_ci			wc = wcs + i;
3478c2ecf20Sopenharmony_ci			rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
3488c2ecf20Sopenharmony_ci				 (unsigned long long)wc->wr_id, wc->status,
3498c2ecf20Sopenharmony_ci				 wc->byte_len, be32_to_cpu(wc->ex.imm_data));
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci			rds_ib_recv_cqe_handler(ic, wc, ack_state);
3528c2ecf20Sopenharmony_ci		}
3538c2ecf20Sopenharmony_ci	}
3548c2ecf20Sopenharmony_ci}
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_cistatic void rds_ib_tasklet_fn_recv(unsigned long data)
3578c2ecf20Sopenharmony_ci{
3588c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
3598c2ecf20Sopenharmony_ci	struct rds_connection *conn = ic->conn;
3608c2ecf20Sopenharmony_ci	struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
3618c2ecf20Sopenharmony_ci	struct rds_ib_ack_state state;
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci	if (!rds_ibdev)
3648c2ecf20Sopenharmony_ci		rds_conn_drop(conn);
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	rds_ib_stats_inc(s_ib_tasklet_call);
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci	/* if cq has been already reaped, ignore incoming cq event */
3698c2ecf20Sopenharmony_ci	if (atomic_read(&ic->i_cq_quiesce))
3708c2ecf20Sopenharmony_ci		return;
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci	memset(&state, 0, sizeof(state));
3738c2ecf20Sopenharmony_ci	poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
3748c2ecf20Sopenharmony_ci	ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
3758c2ecf20Sopenharmony_ci	poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci	if (state.ack_next_valid)
3788c2ecf20Sopenharmony_ci		rds_ib_set_ack(ic, state.ack_next, state.ack_required);
3798c2ecf20Sopenharmony_ci	if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
3808c2ecf20Sopenharmony_ci		rds_send_drop_acked(conn, state.ack_recv, NULL);
3818c2ecf20Sopenharmony_ci		ic->i_ack_recv = state.ack_recv;
3828c2ecf20Sopenharmony_ci	}
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci	if (rds_conn_up(conn))
3858c2ecf20Sopenharmony_ci		rds_ib_attempt_ack(ic);
3868c2ecf20Sopenharmony_ci}
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_cistatic void rds_ib_qp_event_handler(struct ib_event *event, void *data)
3898c2ecf20Sopenharmony_ci{
3908c2ecf20Sopenharmony_ci	struct rds_connection *conn = data;
3918c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_ci	rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event,
3948c2ecf20Sopenharmony_ci		 ib_event_msg(event->event));
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci	switch (event->event) {
3978c2ecf20Sopenharmony_ci	case IB_EVENT_COMM_EST:
3988c2ecf20Sopenharmony_ci		rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
3998c2ecf20Sopenharmony_ci		break;
4008c2ecf20Sopenharmony_ci	default:
4018c2ecf20Sopenharmony_ci		rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n",
4028c2ecf20Sopenharmony_ci			 event->event, ib_event_msg(event->event),
4038c2ecf20Sopenharmony_ci			 &conn->c_laddr, &conn->c_faddr);
4048c2ecf20Sopenharmony_ci		rds_conn_drop(conn);
4058c2ecf20Sopenharmony_ci		break;
4068c2ecf20Sopenharmony_ci	}
4078c2ecf20Sopenharmony_ci}
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_cistatic void rds_ib_cq_comp_handler_send(struct ib_cq *cq, void *context)
4108c2ecf20Sopenharmony_ci{
4118c2ecf20Sopenharmony_ci	struct rds_connection *conn = context;
4128c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci	rdsdebug("conn %p cq %p\n", conn, cq);
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci	rds_ib_stats_inc(s_ib_evt_handler_call);
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_ci	tasklet_schedule(&ic->i_send_tasklet);
4198c2ecf20Sopenharmony_ci}
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_cistatic inline int ibdev_get_unused_vector(struct rds_ib_device *rds_ibdev)
4228c2ecf20Sopenharmony_ci{
4238c2ecf20Sopenharmony_ci	int min = rds_ibdev->vector_load[rds_ibdev->dev->num_comp_vectors - 1];
4248c2ecf20Sopenharmony_ci	int index = rds_ibdev->dev->num_comp_vectors - 1;
4258c2ecf20Sopenharmony_ci	int i;
4268c2ecf20Sopenharmony_ci
4278c2ecf20Sopenharmony_ci	for (i = rds_ibdev->dev->num_comp_vectors - 1; i >= 0; i--) {
4288c2ecf20Sopenharmony_ci		if (rds_ibdev->vector_load[i] < min) {
4298c2ecf20Sopenharmony_ci			index = i;
4308c2ecf20Sopenharmony_ci			min = rds_ibdev->vector_load[i];
4318c2ecf20Sopenharmony_ci		}
4328c2ecf20Sopenharmony_ci	}
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci	rds_ibdev->vector_load[index]++;
4358c2ecf20Sopenharmony_ci	return index;
4368c2ecf20Sopenharmony_ci}
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_cistatic inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index)
4398c2ecf20Sopenharmony_ci{
4408c2ecf20Sopenharmony_ci	rds_ibdev->vector_load[index]--;
4418c2ecf20Sopenharmony_ci}
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_cistatic void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr,
4448c2ecf20Sopenharmony_ci		dma_addr_t dma_addr, enum dma_data_direction dir)
4458c2ecf20Sopenharmony_ci{
4468c2ecf20Sopenharmony_ci	ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir);
4478c2ecf20Sopenharmony_ci	kfree(hdr);
4488c2ecf20Sopenharmony_ci}
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_cistatic struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev,
4518c2ecf20Sopenharmony_ci		dma_addr_t *dma_addr, enum dma_data_direction dir)
4528c2ecf20Sopenharmony_ci{
4538c2ecf20Sopenharmony_ci	struct rds_header *hdr;
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci	hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev));
4568c2ecf20Sopenharmony_ci	if (!hdr)
4578c2ecf20Sopenharmony_ci		return NULL;
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	*dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr),
4608c2ecf20Sopenharmony_ci				      DMA_BIDIRECTIONAL);
4618c2ecf20Sopenharmony_ci	if (ib_dma_mapping_error(dev, *dma_addr)) {
4628c2ecf20Sopenharmony_ci		kfree(hdr);
4638c2ecf20Sopenharmony_ci		return NULL;
4648c2ecf20Sopenharmony_ci	}
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci	return hdr;
4678c2ecf20Sopenharmony_ci}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci/* Free the DMA memory used to store struct rds_header.
4708c2ecf20Sopenharmony_ci *
4718c2ecf20Sopenharmony_ci * @dev: the RDS IB device
4728c2ecf20Sopenharmony_ci * @hdrs: pointer to the array storing DMA memory pointers
4738c2ecf20Sopenharmony_ci * @dma_addrs: pointer to the array storing DMA addresses
4748c2ecf20Sopenharmony_ci * @num_hdars: number of headers to free.
4758c2ecf20Sopenharmony_ci */
4768c2ecf20Sopenharmony_cistatic void rds_dma_hdrs_free(struct rds_ib_device *dev,
4778c2ecf20Sopenharmony_ci		struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs,
4788c2ecf20Sopenharmony_ci		enum dma_data_direction dir)
4798c2ecf20Sopenharmony_ci{
4808c2ecf20Sopenharmony_ci	u32 i;
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci	for (i = 0; i < num_hdrs; i++)
4838c2ecf20Sopenharmony_ci		rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir);
4848c2ecf20Sopenharmony_ci	kvfree(hdrs);
4858c2ecf20Sopenharmony_ci	kvfree(dma_addrs);
4868c2ecf20Sopenharmony_ci}
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci/* Allocate DMA coherent memory to be used to store struct rds_header for
4908c2ecf20Sopenharmony_ci * sending/receiving packets.  The pointers to the DMA memory and the
4918c2ecf20Sopenharmony_ci * associated DMA addresses are stored in two arrays.
4928c2ecf20Sopenharmony_ci *
4938c2ecf20Sopenharmony_ci * @dev: the RDS IB device
4948c2ecf20Sopenharmony_ci * @dma_addrs: pointer to the array for storing DMA addresses
4958c2ecf20Sopenharmony_ci * @num_hdrs: number of headers to allocate
4968c2ecf20Sopenharmony_ci *
4978c2ecf20Sopenharmony_ci * It returns the pointer to the array storing the DMA memory pointers.  On
4988c2ecf20Sopenharmony_ci * error, NULL pointer is returned.
4998c2ecf20Sopenharmony_ci */
5008c2ecf20Sopenharmony_cistatic struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev,
5018c2ecf20Sopenharmony_ci		dma_addr_t **dma_addrs, u32 num_hdrs,
5028c2ecf20Sopenharmony_ci		enum dma_data_direction dir)
5038c2ecf20Sopenharmony_ci{
5048c2ecf20Sopenharmony_ci	struct rds_header **hdrs;
5058c2ecf20Sopenharmony_ci	dma_addr_t *hdr_daddrs;
5068c2ecf20Sopenharmony_ci	u32 i;
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_ci	hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL,
5098c2ecf20Sopenharmony_ci			     ibdev_to_node(dev->dev));
5108c2ecf20Sopenharmony_ci	if (!hdrs)
5118c2ecf20Sopenharmony_ci		return NULL;
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL,
5148c2ecf20Sopenharmony_ci				   ibdev_to_node(dev->dev));
5158c2ecf20Sopenharmony_ci	if (!hdr_daddrs) {
5168c2ecf20Sopenharmony_ci		kvfree(hdrs);
5178c2ecf20Sopenharmony_ci		return NULL;
5188c2ecf20Sopenharmony_ci	}
5198c2ecf20Sopenharmony_ci
5208c2ecf20Sopenharmony_ci	for (i = 0; i < num_hdrs; i++) {
5218c2ecf20Sopenharmony_ci		hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir);
5228c2ecf20Sopenharmony_ci		if (!hdrs[i]) {
5238c2ecf20Sopenharmony_ci			rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir);
5248c2ecf20Sopenharmony_ci			return NULL;
5258c2ecf20Sopenharmony_ci		}
5268c2ecf20Sopenharmony_ci	}
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	*dma_addrs = hdr_daddrs;
5298c2ecf20Sopenharmony_ci	return hdrs;
5308c2ecf20Sopenharmony_ci}
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci/*
5338c2ecf20Sopenharmony_ci * This needs to be very careful to not leave IS_ERR pointers around for
5348c2ecf20Sopenharmony_ci * cleanup to trip over.
5358c2ecf20Sopenharmony_ci */
5368c2ecf20Sopenharmony_cistatic int rds_ib_setup_qp(struct rds_connection *conn)
5378c2ecf20Sopenharmony_ci{
5388c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
5398c2ecf20Sopenharmony_ci	struct ib_device *dev = ic->i_cm_id->device;
5408c2ecf20Sopenharmony_ci	struct ib_qp_init_attr attr;
5418c2ecf20Sopenharmony_ci	struct ib_cq_init_attr cq_attr = {};
5428c2ecf20Sopenharmony_ci	struct rds_ib_device *rds_ibdev;
5438c2ecf20Sopenharmony_ci	unsigned long max_wrs;
5448c2ecf20Sopenharmony_ci	int ret, fr_queue_space;
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_ci	/*
5478c2ecf20Sopenharmony_ci	 * It's normal to see a null device if an incoming connection races
5488c2ecf20Sopenharmony_ci	 * with device removal, so we don't print a warning.
5498c2ecf20Sopenharmony_ci	 */
5508c2ecf20Sopenharmony_ci	rds_ibdev = rds_ib_get_client_data(dev);
5518c2ecf20Sopenharmony_ci	if (!rds_ibdev)
5528c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_ci	/* The fr_queue_space is currently set to 512, to add extra space on
5558c2ecf20Sopenharmony_ci	 * completion queue and send queue. This extra space is used for FRWR
5568c2ecf20Sopenharmony_ci	 * registration and invalidation work requests
5578c2ecf20Sopenharmony_ci	 */
5588c2ecf20Sopenharmony_ci	fr_queue_space = RDS_IB_DEFAULT_FR_WR;
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	/* add the conn now so that connection establishment has the dev */
5618c2ecf20Sopenharmony_ci	rds_ib_add_conn(rds_ibdev, conn);
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_ci	max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ?
5648c2ecf20Sopenharmony_ci		rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr;
5658c2ecf20Sopenharmony_ci	if (ic->i_send_ring.w_nr != max_wrs)
5668c2ecf20Sopenharmony_ci		rds_ib_ring_resize(&ic->i_send_ring, max_wrs);
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci	max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ?
5698c2ecf20Sopenharmony_ci		rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr;
5708c2ecf20Sopenharmony_ci	if (ic->i_recv_ring.w_nr != max_wrs)
5718c2ecf20Sopenharmony_ci		rds_ib_ring_resize(&ic->i_recv_ring, max_wrs);
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	/* Protection domain and memory range */
5748c2ecf20Sopenharmony_ci	ic->i_pd = rds_ibdev->pd;
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci	ic->i_scq_vector = ibdev_get_unused_vector(rds_ibdev);
5778c2ecf20Sopenharmony_ci	cq_attr.cqe = ic->i_send_ring.w_nr + fr_queue_space + 1;
5788c2ecf20Sopenharmony_ci	cq_attr.comp_vector = ic->i_scq_vector;
5798c2ecf20Sopenharmony_ci	ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send,
5808c2ecf20Sopenharmony_ci				     rds_ib_cq_event_handler, conn,
5818c2ecf20Sopenharmony_ci				     &cq_attr);
5828c2ecf20Sopenharmony_ci	if (IS_ERR(ic->i_send_cq)) {
5838c2ecf20Sopenharmony_ci		ret = PTR_ERR(ic->i_send_cq);
5848c2ecf20Sopenharmony_ci		ic->i_send_cq = NULL;
5858c2ecf20Sopenharmony_ci		ibdev_put_vector(rds_ibdev, ic->i_scq_vector);
5868c2ecf20Sopenharmony_ci		rdsdebug("ib_create_cq send failed: %d\n", ret);
5878c2ecf20Sopenharmony_ci		goto rds_ibdev_out;
5888c2ecf20Sopenharmony_ci	}
5898c2ecf20Sopenharmony_ci
5908c2ecf20Sopenharmony_ci	ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev);
5918c2ecf20Sopenharmony_ci	cq_attr.cqe = ic->i_recv_ring.w_nr;
5928c2ecf20Sopenharmony_ci	cq_attr.comp_vector = ic->i_rcq_vector;
5938c2ecf20Sopenharmony_ci	ic->i_recv_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv,
5948c2ecf20Sopenharmony_ci				     rds_ib_cq_event_handler, conn,
5958c2ecf20Sopenharmony_ci				     &cq_attr);
5968c2ecf20Sopenharmony_ci	if (IS_ERR(ic->i_recv_cq)) {
5978c2ecf20Sopenharmony_ci		ret = PTR_ERR(ic->i_recv_cq);
5988c2ecf20Sopenharmony_ci		ic->i_recv_cq = NULL;
5998c2ecf20Sopenharmony_ci		ibdev_put_vector(rds_ibdev, ic->i_rcq_vector);
6008c2ecf20Sopenharmony_ci		rdsdebug("ib_create_cq recv failed: %d\n", ret);
6018c2ecf20Sopenharmony_ci		goto send_cq_out;
6028c2ecf20Sopenharmony_ci	}
6038c2ecf20Sopenharmony_ci
6048c2ecf20Sopenharmony_ci	ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
6058c2ecf20Sopenharmony_ci	if (ret) {
6068c2ecf20Sopenharmony_ci		rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
6078c2ecf20Sopenharmony_ci		goto recv_cq_out;
6088c2ecf20Sopenharmony_ci	}
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_ci	ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
6118c2ecf20Sopenharmony_ci	if (ret) {
6128c2ecf20Sopenharmony_ci		rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
6138c2ecf20Sopenharmony_ci		goto recv_cq_out;
6148c2ecf20Sopenharmony_ci	}
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci	/* XXX negotiate max send/recv with remote? */
6178c2ecf20Sopenharmony_ci	memset(&attr, 0, sizeof(attr));
6188c2ecf20Sopenharmony_ci	attr.event_handler = rds_ib_qp_event_handler;
6198c2ecf20Sopenharmony_ci	attr.qp_context = conn;
6208c2ecf20Sopenharmony_ci	/* + 1 to allow for the single ack message */
6218c2ecf20Sopenharmony_ci	attr.cap.max_send_wr = ic->i_send_ring.w_nr + fr_queue_space + 1;
6228c2ecf20Sopenharmony_ci	attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1;
6238c2ecf20Sopenharmony_ci	attr.cap.max_send_sge = rds_ibdev->max_sge;
6248c2ecf20Sopenharmony_ci	attr.cap.max_recv_sge = RDS_IB_RECV_SGE;
6258c2ecf20Sopenharmony_ci	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
6268c2ecf20Sopenharmony_ci	attr.qp_type = IB_QPT_RC;
6278c2ecf20Sopenharmony_ci	attr.send_cq = ic->i_send_cq;
6288c2ecf20Sopenharmony_ci	attr.recv_cq = ic->i_recv_cq;
6298c2ecf20Sopenharmony_ci
6308c2ecf20Sopenharmony_ci	/*
6318c2ecf20Sopenharmony_ci	 * XXX this can fail if max_*_wr is too large?  Are we supposed
6328c2ecf20Sopenharmony_ci	 * to back off until we get a value that the hardware can support?
6338c2ecf20Sopenharmony_ci	 */
6348c2ecf20Sopenharmony_ci	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
6358c2ecf20Sopenharmony_ci	if (ret) {
6368c2ecf20Sopenharmony_ci		rdsdebug("rdma_create_qp failed: %d\n", ret);
6378c2ecf20Sopenharmony_ci		goto recv_cq_out;
6388c2ecf20Sopenharmony_ci	}
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_ci	ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma,
6418c2ecf20Sopenharmony_ci					     ic->i_send_ring.w_nr,
6428c2ecf20Sopenharmony_ci					     DMA_TO_DEVICE);
6438c2ecf20Sopenharmony_ci	if (!ic->i_send_hdrs) {
6448c2ecf20Sopenharmony_ci		ret = -ENOMEM;
6458c2ecf20Sopenharmony_ci		rdsdebug("DMA send hdrs alloc failed\n");
6468c2ecf20Sopenharmony_ci		goto qp_out;
6478c2ecf20Sopenharmony_ci	}
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ci	ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma,
6508c2ecf20Sopenharmony_ci					     ic->i_recv_ring.w_nr,
6518c2ecf20Sopenharmony_ci					     DMA_FROM_DEVICE);
6528c2ecf20Sopenharmony_ci	if (!ic->i_recv_hdrs) {
6538c2ecf20Sopenharmony_ci		ret = -ENOMEM;
6548c2ecf20Sopenharmony_ci		rdsdebug("DMA recv hdrs alloc failed\n");
6558c2ecf20Sopenharmony_ci		goto send_hdrs_dma_out;
6568c2ecf20Sopenharmony_ci	}
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_ci	ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma,
6598c2ecf20Sopenharmony_ci				      DMA_TO_DEVICE);
6608c2ecf20Sopenharmony_ci	if (!ic->i_ack) {
6618c2ecf20Sopenharmony_ci		ret = -ENOMEM;
6628c2ecf20Sopenharmony_ci		rdsdebug("DMA ack header alloc failed\n");
6638c2ecf20Sopenharmony_ci		goto recv_hdrs_dma_out;
6648c2ecf20Sopenharmony_ci	}
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci	ic->i_sends = vzalloc_node(array_size(sizeof(struct rds_ib_send_work),
6678c2ecf20Sopenharmony_ci					      ic->i_send_ring.w_nr),
6688c2ecf20Sopenharmony_ci				   ibdev_to_node(dev));
6698c2ecf20Sopenharmony_ci	if (!ic->i_sends) {
6708c2ecf20Sopenharmony_ci		ret = -ENOMEM;
6718c2ecf20Sopenharmony_ci		rdsdebug("send allocation failed\n");
6728c2ecf20Sopenharmony_ci		goto ack_dma_out;
6738c2ecf20Sopenharmony_ci	}
6748c2ecf20Sopenharmony_ci
6758c2ecf20Sopenharmony_ci	ic->i_recvs = vzalloc_node(array_size(sizeof(struct rds_ib_recv_work),
6768c2ecf20Sopenharmony_ci					      ic->i_recv_ring.w_nr),
6778c2ecf20Sopenharmony_ci				   ibdev_to_node(dev));
6788c2ecf20Sopenharmony_ci	if (!ic->i_recvs) {
6798c2ecf20Sopenharmony_ci		ret = -ENOMEM;
6808c2ecf20Sopenharmony_ci		rdsdebug("recv allocation failed\n");
6818c2ecf20Sopenharmony_ci		goto sends_out;
6828c2ecf20Sopenharmony_ci	}
6838c2ecf20Sopenharmony_ci
6848c2ecf20Sopenharmony_ci	rds_ib_recv_init_ack(ic);
6858c2ecf20Sopenharmony_ci
6868c2ecf20Sopenharmony_ci	rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
6878c2ecf20Sopenharmony_ci		 ic->i_send_cq, ic->i_recv_cq);
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci	goto out;
6908c2ecf20Sopenharmony_ci
6918c2ecf20Sopenharmony_cisends_out:
6928c2ecf20Sopenharmony_ci	vfree(ic->i_sends);
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_ciack_dma_out:
6958c2ecf20Sopenharmony_ci	rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma,
6968c2ecf20Sopenharmony_ci			 DMA_TO_DEVICE);
6978c2ecf20Sopenharmony_ci	ic->i_ack = NULL;
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_cirecv_hdrs_dma_out:
7008c2ecf20Sopenharmony_ci	rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma,
7018c2ecf20Sopenharmony_ci			  ic->i_recv_ring.w_nr, DMA_FROM_DEVICE);
7028c2ecf20Sopenharmony_ci	ic->i_recv_hdrs = NULL;
7038c2ecf20Sopenharmony_ci	ic->i_recv_hdrs_dma = NULL;
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_cisend_hdrs_dma_out:
7068c2ecf20Sopenharmony_ci	rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma,
7078c2ecf20Sopenharmony_ci			  ic->i_send_ring.w_nr, DMA_TO_DEVICE);
7088c2ecf20Sopenharmony_ci	ic->i_send_hdrs = NULL;
7098c2ecf20Sopenharmony_ci	ic->i_send_hdrs_dma = NULL;
7108c2ecf20Sopenharmony_ci
7118c2ecf20Sopenharmony_ciqp_out:
7128c2ecf20Sopenharmony_ci	rdma_destroy_qp(ic->i_cm_id);
7138c2ecf20Sopenharmony_cirecv_cq_out:
7148c2ecf20Sopenharmony_ci	ib_destroy_cq(ic->i_recv_cq);
7158c2ecf20Sopenharmony_ci	ic->i_recv_cq = NULL;
7168c2ecf20Sopenharmony_cisend_cq_out:
7178c2ecf20Sopenharmony_ci	ib_destroy_cq(ic->i_send_cq);
7188c2ecf20Sopenharmony_ci	ic->i_send_cq = NULL;
7198c2ecf20Sopenharmony_cirds_ibdev_out:
7208c2ecf20Sopenharmony_ci	rds_ib_remove_conn(rds_ibdev, conn);
7218c2ecf20Sopenharmony_ciout:
7228c2ecf20Sopenharmony_ci	rds_ib_dev_put(rds_ibdev);
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci	return ret;
7258c2ecf20Sopenharmony_ci}
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_cistatic u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
7288c2ecf20Sopenharmony_ci{
7298c2ecf20Sopenharmony_ci	const union rds_ib_conn_priv *dp = event->param.conn.private_data;
7308c2ecf20Sopenharmony_ci	u8 data_len, major, minor;
7318c2ecf20Sopenharmony_ci	u32 version = 0;
7328c2ecf20Sopenharmony_ci	__be16 mask;
7338c2ecf20Sopenharmony_ci	u16 common;
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_ci	/*
7368c2ecf20Sopenharmony_ci	 * rdma_cm private data is odd - when there is any private data in the
7378c2ecf20Sopenharmony_ci	 * request, we will be given a pretty large buffer without telling us the
7388c2ecf20Sopenharmony_ci	 * original size. The only way to tell the difference is by looking at
7398c2ecf20Sopenharmony_ci	 * the contents, which are initialized to zero.
7408c2ecf20Sopenharmony_ci	 * If the protocol version fields aren't set, this is a connection attempt
7418c2ecf20Sopenharmony_ci	 * from an older version. This could be 3.0 or 2.0 - we can't tell.
7428c2ecf20Sopenharmony_ci	 * We really should have changed this for OFED 1.3 :-(
7438c2ecf20Sopenharmony_ci	 */
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci	/* Be paranoid. RDS always has privdata */
7468c2ecf20Sopenharmony_ci	if (!event->param.conn.private_data_len) {
7478c2ecf20Sopenharmony_ci		printk(KERN_NOTICE "RDS incoming connection has no private data, "
7488c2ecf20Sopenharmony_ci			"rejecting\n");
7498c2ecf20Sopenharmony_ci		return 0;
7508c2ecf20Sopenharmony_ci	}
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci	if (isv6) {
7538c2ecf20Sopenharmony_ci		data_len = sizeof(struct rds6_ib_connect_private);
7548c2ecf20Sopenharmony_ci		major = dp->ricp_v6.dp_protocol_major;
7558c2ecf20Sopenharmony_ci		minor = dp->ricp_v6.dp_protocol_minor;
7568c2ecf20Sopenharmony_ci		mask = dp->ricp_v6.dp_protocol_minor_mask;
7578c2ecf20Sopenharmony_ci	} else {
7588c2ecf20Sopenharmony_ci		data_len = sizeof(struct rds_ib_connect_private);
7598c2ecf20Sopenharmony_ci		major = dp->ricp_v4.dp_protocol_major;
7608c2ecf20Sopenharmony_ci		minor = dp->ricp_v4.dp_protocol_minor;
7618c2ecf20Sopenharmony_ci		mask = dp->ricp_v4.dp_protocol_minor_mask;
7628c2ecf20Sopenharmony_ci	}
7638c2ecf20Sopenharmony_ci
7648c2ecf20Sopenharmony_ci	/* Even if len is crap *now* I still want to check it. -ASG */
7658c2ecf20Sopenharmony_ci	if (event->param.conn.private_data_len < data_len || major == 0)
7668c2ecf20Sopenharmony_ci		return RDS_PROTOCOL_4_0;
7678c2ecf20Sopenharmony_ci
7688c2ecf20Sopenharmony_ci	common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS;
7698c2ecf20Sopenharmony_ci	if (major == 4 && common) {
7708c2ecf20Sopenharmony_ci		version = RDS_PROTOCOL_4_0;
7718c2ecf20Sopenharmony_ci		while ((common >>= 1) != 0)
7728c2ecf20Sopenharmony_ci			version++;
7738c2ecf20Sopenharmony_ci	} else if (RDS_PROTOCOL_COMPAT_VERSION ==
7748c2ecf20Sopenharmony_ci		   RDS_PROTOCOL(major, minor)) {
7758c2ecf20Sopenharmony_ci		version = RDS_PROTOCOL_COMPAT_VERSION;
7768c2ecf20Sopenharmony_ci	} else {
7778c2ecf20Sopenharmony_ci		if (isv6)
7788c2ecf20Sopenharmony_ci			printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n",
7798c2ecf20Sopenharmony_ci					   &dp->ricp_v6.dp_saddr, major, minor);
7808c2ecf20Sopenharmony_ci		else
7818c2ecf20Sopenharmony_ci			printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n",
7828c2ecf20Sopenharmony_ci					   &dp->ricp_v4.dp_saddr, major, minor);
7838c2ecf20Sopenharmony_ci	}
7848c2ecf20Sopenharmony_ci	return version;
7858c2ecf20Sopenharmony_ci}
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
7888c2ecf20Sopenharmony_ci/* Given an IPv6 address, find the net_device which hosts that address and
7898c2ecf20Sopenharmony_ci * return its index.  This is used by the rds_ib_cm_handle_connect() code to
7908c2ecf20Sopenharmony_ci * find the interface index of where an incoming request comes from when
7918c2ecf20Sopenharmony_ci * the request is using a link local address.
7928c2ecf20Sopenharmony_ci *
7938c2ecf20Sopenharmony_ci * Note one problem in this search.  It is possible that two interfaces have
7948c2ecf20Sopenharmony_ci * the same link local address.  Unfortunately, this cannot be solved unless
7958c2ecf20Sopenharmony_ci * the underlying layer gives us the interface which an incoming RDMA connect
7968c2ecf20Sopenharmony_ci * request comes from.
7978c2ecf20Sopenharmony_ci */
7988c2ecf20Sopenharmony_cistatic u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr)
7998c2ecf20Sopenharmony_ci{
8008c2ecf20Sopenharmony_ci	struct net_device *dev;
8018c2ecf20Sopenharmony_ci	int idx = 0;
8028c2ecf20Sopenharmony_ci
8038c2ecf20Sopenharmony_ci	rcu_read_lock();
8048c2ecf20Sopenharmony_ci	for_each_netdev_rcu(net, dev) {
8058c2ecf20Sopenharmony_ci		if (ipv6_chk_addr(net, addr, dev, 1)) {
8068c2ecf20Sopenharmony_ci			idx = dev->ifindex;
8078c2ecf20Sopenharmony_ci			break;
8088c2ecf20Sopenharmony_ci		}
8098c2ecf20Sopenharmony_ci	}
8108c2ecf20Sopenharmony_ci	rcu_read_unlock();
8118c2ecf20Sopenharmony_ci
8128c2ecf20Sopenharmony_ci	return idx;
8138c2ecf20Sopenharmony_ci}
8148c2ecf20Sopenharmony_ci#endif
8158c2ecf20Sopenharmony_ci
8168c2ecf20Sopenharmony_ciint rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
8178c2ecf20Sopenharmony_ci			     struct rdma_cm_event *event, bool isv6)
8188c2ecf20Sopenharmony_ci{
8198c2ecf20Sopenharmony_ci	__be64 lguid = cm_id->route.path_rec->sgid.global.interface_id;
8208c2ecf20Sopenharmony_ci	__be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
8218c2ecf20Sopenharmony_ci	const struct rds_ib_conn_priv_cmn *dp_cmn;
8228c2ecf20Sopenharmony_ci	struct rds_connection *conn = NULL;
8238c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = NULL;
8248c2ecf20Sopenharmony_ci	struct rdma_conn_param conn_param;
8258c2ecf20Sopenharmony_ci	const union rds_ib_conn_priv *dp;
8268c2ecf20Sopenharmony_ci	union rds_ib_conn_priv dp_rep;
8278c2ecf20Sopenharmony_ci	struct in6_addr s_mapped_addr;
8288c2ecf20Sopenharmony_ci	struct in6_addr d_mapped_addr;
8298c2ecf20Sopenharmony_ci	const struct in6_addr *saddr6;
8308c2ecf20Sopenharmony_ci	const struct in6_addr *daddr6;
8318c2ecf20Sopenharmony_ci	int destroy = 1;
8328c2ecf20Sopenharmony_ci	u32 ifindex = 0;
8338c2ecf20Sopenharmony_ci	u32 version;
8348c2ecf20Sopenharmony_ci	int err = 1;
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_ci	/* Check whether the remote protocol version matches ours. */
8378c2ecf20Sopenharmony_ci	version = rds_ib_protocol_compatible(event, isv6);
8388c2ecf20Sopenharmony_ci	if (!version) {
8398c2ecf20Sopenharmony_ci		err = RDS_RDMA_REJ_INCOMPAT;
8408c2ecf20Sopenharmony_ci		goto out;
8418c2ecf20Sopenharmony_ci	}
8428c2ecf20Sopenharmony_ci
8438c2ecf20Sopenharmony_ci	dp = event->param.conn.private_data;
8448c2ecf20Sopenharmony_ci	if (isv6) {
8458c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
8468c2ecf20Sopenharmony_ci		dp_cmn = &dp->ricp_v6.dp_cmn;
8478c2ecf20Sopenharmony_ci		saddr6 = &dp->ricp_v6.dp_saddr;
8488c2ecf20Sopenharmony_ci		daddr6 = &dp->ricp_v6.dp_daddr;
8498c2ecf20Sopenharmony_ci		/* If either address is link local, need to find the
8508c2ecf20Sopenharmony_ci		 * interface index in order to create a proper RDS
8518c2ecf20Sopenharmony_ci		 * connection.
8528c2ecf20Sopenharmony_ci		 */
8538c2ecf20Sopenharmony_ci		if (ipv6_addr_type(daddr6) & IPV6_ADDR_LINKLOCAL) {
8548c2ecf20Sopenharmony_ci			/* Using init_net for now ..  */
8558c2ecf20Sopenharmony_ci			ifindex = __rds_find_ifindex(&init_net, daddr6);
8568c2ecf20Sopenharmony_ci			/* No index found...  Need to bail out. */
8578c2ecf20Sopenharmony_ci			if (ifindex == 0) {
8588c2ecf20Sopenharmony_ci				err = -EOPNOTSUPP;
8598c2ecf20Sopenharmony_ci				goto out;
8608c2ecf20Sopenharmony_ci			}
8618c2ecf20Sopenharmony_ci		} else if (ipv6_addr_type(saddr6) & IPV6_ADDR_LINKLOCAL) {
8628c2ecf20Sopenharmony_ci			/* Use our address to find the correct index. */
8638c2ecf20Sopenharmony_ci			ifindex = __rds_find_ifindex(&init_net, daddr6);
8648c2ecf20Sopenharmony_ci			/* No index found...  Need to bail out. */
8658c2ecf20Sopenharmony_ci			if (ifindex == 0) {
8668c2ecf20Sopenharmony_ci				err = -EOPNOTSUPP;
8678c2ecf20Sopenharmony_ci				goto out;
8688c2ecf20Sopenharmony_ci			}
8698c2ecf20Sopenharmony_ci		}
8708c2ecf20Sopenharmony_ci#else
8718c2ecf20Sopenharmony_ci		err = -EOPNOTSUPP;
8728c2ecf20Sopenharmony_ci		goto out;
8738c2ecf20Sopenharmony_ci#endif
8748c2ecf20Sopenharmony_ci	} else {
8758c2ecf20Sopenharmony_ci		dp_cmn = &dp->ricp_v4.dp_cmn;
8768c2ecf20Sopenharmony_ci		ipv6_addr_set_v4mapped(dp->ricp_v4.dp_saddr, &s_mapped_addr);
8778c2ecf20Sopenharmony_ci		ipv6_addr_set_v4mapped(dp->ricp_v4.dp_daddr, &d_mapped_addr);
8788c2ecf20Sopenharmony_ci		saddr6 = &s_mapped_addr;
8798c2ecf20Sopenharmony_ci		daddr6 = &d_mapped_addr;
8808c2ecf20Sopenharmony_ci	}
8818c2ecf20Sopenharmony_ci
8828c2ecf20Sopenharmony_ci	rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n",
8838c2ecf20Sopenharmony_ci		 saddr6, daddr6, RDS_PROTOCOL_MAJOR(version),
8848c2ecf20Sopenharmony_ci		 RDS_PROTOCOL_MINOR(version),
8858c2ecf20Sopenharmony_ci		 (unsigned long long)be64_to_cpu(lguid),
8868c2ecf20Sopenharmony_ci		 (unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss);
8878c2ecf20Sopenharmony_ci
8888c2ecf20Sopenharmony_ci	/* RDS/IB is not currently netns aware, thus init_net */
8898c2ecf20Sopenharmony_ci	conn = rds_conn_create(&init_net, daddr6, saddr6,
8908c2ecf20Sopenharmony_ci			       &rds_ib_transport, dp_cmn->ricpc_dp_toss,
8918c2ecf20Sopenharmony_ci			       GFP_KERNEL, ifindex);
8928c2ecf20Sopenharmony_ci	if (IS_ERR(conn)) {
8938c2ecf20Sopenharmony_ci		rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
8948c2ecf20Sopenharmony_ci		conn = NULL;
8958c2ecf20Sopenharmony_ci		goto out;
8968c2ecf20Sopenharmony_ci	}
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_ci	/*
8998c2ecf20Sopenharmony_ci	 * The connection request may occur while the
9008c2ecf20Sopenharmony_ci	 * previous connection exist, e.g. in case of failover.
9018c2ecf20Sopenharmony_ci	 * But as connections may be initiated simultaneously
9028c2ecf20Sopenharmony_ci	 * by both hosts, we have a random backoff mechanism -
9038c2ecf20Sopenharmony_ci	 * see the comment above rds_queue_reconnect()
9048c2ecf20Sopenharmony_ci	 */
9058c2ecf20Sopenharmony_ci	mutex_lock(&conn->c_cm_lock);
9068c2ecf20Sopenharmony_ci	if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
9078c2ecf20Sopenharmony_ci		if (rds_conn_state(conn) == RDS_CONN_UP) {
9088c2ecf20Sopenharmony_ci			rdsdebug("incoming connect while connecting\n");
9098c2ecf20Sopenharmony_ci			rds_conn_drop(conn);
9108c2ecf20Sopenharmony_ci			rds_ib_stats_inc(s_ib_listen_closed_stale);
9118c2ecf20Sopenharmony_ci		} else
9128c2ecf20Sopenharmony_ci		if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
9138c2ecf20Sopenharmony_ci			/* Wait and see - our connect may still be succeeding */
9148c2ecf20Sopenharmony_ci			rds_ib_stats_inc(s_ib_connect_raced);
9158c2ecf20Sopenharmony_ci		}
9168c2ecf20Sopenharmony_ci		goto out;
9178c2ecf20Sopenharmony_ci	}
9188c2ecf20Sopenharmony_ci
9198c2ecf20Sopenharmony_ci	ic = conn->c_transport_data;
9208c2ecf20Sopenharmony_ci
9218c2ecf20Sopenharmony_ci	rds_ib_set_protocol(conn, version);
9228c2ecf20Sopenharmony_ci	rds_ib_set_flow_control(conn, be32_to_cpu(dp_cmn->ricpc_credit));
9238c2ecf20Sopenharmony_ci
9248c2ecf20Sopenharmony_ci	/* If the peer gave us the last packet it saw, process this as if
9258c2ecf20Sopenharmony_ci	 * we had received a regular ACK. */
9268c2ecf20Sopenharmony_ci	if (dp_cmn->ricpc_ack_seq)
9278c2ecf20Sopenharmony_ci		rds_send_drop_acked(conn, be64_to_cpu(dp_cmn->ricpc_ack_seq),
9288c2ecf20Sopenharmony_ci				    NULL);
9298c2ecf20Sopenharmony_ci
9308c2ecf20Sopenharmony_ci	BUG_ON(cm_id->context);
9318c2ecf20Sopenharmony_ci	BUG_ON(ic->i_cm_id);
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_ci	ic->i_cm_id = cm_id;
9348c2ecf20Sopenharmony_ci	cm_id->context = conn;
9358c2ecf20Sopenharmony_ci
9368c2ecf20Sopenharmony_ci	/* We got halfway through setting up the ib_connection, if we
9378c2ecf20Sopenharmony_ci	 * fail now, we have to take the long route out of this mess. */
9388c2ecf20Sopenharmony_ci	destroy = 0;
9398c2ecf20Sopenharmony_ci
9408c2ecf20Sopenharmony_ci	err = rds_ib_setup_qp(conn);
9418c2ecf20Sopenharmony_ci	if (err) {
9428c2ecf20Sopenharmony_ci		rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
9438c2ecf20Sopenharmony_ci		goto out;
9448c2ecf20Sopenharmony_ci	}
9458c2ecf20Sopenharmony_ci
9468c2ecf20Sopenharmony_ci	rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
9478c2ecf20Sopenharmony_ci				  event->param.conn.responder_resources,
9488c2ecf20Sopenharmony_ci				  event->param.conn.initiator_depth, isv6);
9498c2ecf20Sopenharmony_ci
9508c2ecf20Sopenharmony_ci	/* rdma_accept() calls rdma_reject() internally if it fails */
9518c2ecf20Sopenharmony_ci	if (rdma_accept(cm_id, &conn_param))
9528c2ecf20Sopenharmony_ci		rds_ib_conn_error(conn, "rdma_accept failed\n");
9538c2ecf20Sopenharmony_ci
9548c2ecf20Sopenharmony_ciout:
9558c2ecf20Sopenharmony_ci	if (conn)
9568c2ecf20Sopenharmony_ci		mutex_unlock(&conn->c_cm_lock);
9578c2ecf20Sopenharmony_ci	if (err)
9588c2ecf20Sopenharmony_ci		rdma_reject(cm_id, &err, sizeof(int),
9598c2ecf20Sopenharmony_ci			    IB_CM_REJ_CONSUMER_DEFINED);
9608c2ecf20Sopenharmony_ci	return destroy;
9618c2ecf20Sopenharmony_ci}
9628c2ecf20Sopenharmony_ci
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ciint rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
9658c2ecf20Sopenharmony_ci{
9668c2ecf20Sopenharmony_ci	struct rds_connection *conn = cm_id->context;
9678c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
9688c2ecf20Sopenharmony_ci	struct rdma_conn_param conn_param;
9698c2ecf20Sopenharmony_ci	union rds_ib_conn_priv dp;
9708c2ecf20Sopenharmony_ci	int ret;
9718c2ecf20Sopenharmony_ci
9728c2ecf20Sopenharmony_ci	/* If the peer doesn't do protocol negotiation, we must
9738c2ecf20Sopenharmony_ci	 * default to RDSv3.0 */
9748c2ecf20Sopenharmony_ci	rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1);
9758c2ecf20Sopenharmony_ci	ic->i_flowctl = rds_ib_sysctl_flow_control;	/* advertise flow control */
9768c2ecf20Sopenharmony_ci
9778c2ecf20Sopenharmony_ci	ret = rds_ib_setup_qp(conn);
9788c2ecf20Sopenharmony_ci	if (ret) {
9798c2ecf20Sopenharmony_ci		rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", ret);
9808c2ecf20Sopenharmony_ci		goto out;
9818c2ecf20Sopenharmony_ci	}
9828c2ecf20Sopenharmony_ci
9838c2ecf20Sopenharmony_ci	rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
9848c2ecf20Sopenharmony_ci				  conn->c_proposed_version,
9858c2ecf20Sopenharmony_ci				  UINT_MAX, UINT_MAX, isv6);
9868c2ecf20Sopenharmony_ci	ret = rdma_connect_locked(cm_id, &conn_param);
9878c2ecf20Sopenharmony_ci	if (ret)
9888c2ecf20Sopenharmony_ci		rds_ib_conn_error(conn, "rdma_connect_locked failed (%d)\n",
9898c2ecf20Sopenharmony_ci				  ret);
9908c2ecf20Sopenharmony_ci
9918c2ecf20Sopenharmony_ciout:
9928c2ecf20Sopenharmony_ci	/* Beware - returning non-zero tells the rdma_cm to destroy
9938c2ecf20Sopenharmony_ci	 * the cm_id. We should certainly not do it as long as we still
9948c2ecf20Sopenharmony_ci	 * "own" the cm_id. */
9958c2ecf20Sopenharmony_ci	if (ret) {
9968c2ecf20Sopenharmony_ci		if (ic->i_cm_id == cm_id)
9978c2ecf20Sopenharmony_ci			ret = 0;
9988c2ecf20Sopenharmony_ci	}
9998c2ecf20Sopenharmony_ci	ic->i_active_side = true;
10008c2ecf20Sopenharmony_ci	return ret;
10018c2ecf20Sopenharmony_ci}
10028c2ecf20Sopenharmony_ci
10038c2ecf20Sopenharmony_ciint rds_ib_conn_path_connect(struct rds_conn_path *cp)
10048c2ecf20Sopenharmony_ci{
10058c2ecf20Sopenharmony_ci	struct rds_connection *conn = cp->cp_conn;
10068c2ecf20Sopenharmony_ci	struct sockaddr_storage src, dest;
10078c2ecf20Sopenharmony_ci	rdma_cm_event_handler handler;
10088c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic;
10098c2ecf20Sopenharmony_ci	int ret;
10108c2ecf20Sopenharmony_ci
10118c2ecf20Sopenharmony_ci	ic = conn->c_transport_data;
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ci	/* XXX I wonder what affect the port space has */
10148c2ecf20Sopenharmony_ci	/* delegate cm event handler to rdma_transport */
10158c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
10168c2ecf20Sopenharmony_ci	if (conn->c_isv6)
10178c2ecf20Sopenharmony_ci		handler = rds6_rdma_cm_event_handler;
10188c2ecf20Sopenharmony_ci	else
10198c2ecf20Sopenharmony_ci#endif
10208c2ecf20Sopenharmony_ci		handler = rds_rdma_cm_event_handler;
10218c2ecf20Sopenharmony_ci	ic->i_cm_id = rdma_create_id(&init_net, handler, conn,
10228c2ecf20Sopenharmony_ci				     RDMA_PS_TCP, IB_QPT_RC);
10238c2ecf20Sopenharmony_ci	if (IS_ERR(ic->i_cm_id)) {
10248c2ecf20Sopenharmony_ci		ret = PTR_ERR(ic->i_cm_id);
10258c2ecf20Sopenharmony_ci		ic->i_cm_id = NULL;
10268c2ecf20Sopenharmony_ci		rdsdebug("rdma_create_id() failed: %d\n", ret);
10278c2ecf20Sopenharmony_ci		goto out;
10288c2ecf20Sopenharmony_ci	}
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_ci	rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
10318c2ecf20Sopenharmony_ci
10328c2ecf20Sopenharmony_ci	if (ipv6_addr_v4mapped(&conn->c_faddr)) {
10338c2ecf20Sopenharmony_ci		struct sockaddr_in *sin;
10348c2ecf20Sopenharmony_ci
10358c2ecf20Sopenharmony_ci		sin = (struct sockaddr_in *)&src;
10368c2ecf20Sopenharmony_ci		sin->sin_family = AF_INET;
10378c2ecf20Sopenharmony_ci		sin->sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
10388c2ecf20Sopenharmony_ci		sin->sin_port = 0;
10398c2ecf20Sopenharmony_ci
10408c2ecf20Sopenharmony_ci		sin = (struct sockaddr_in *)&dest;
10418c2ecf20Sopenharmony_ci		sin->sin_family = AF_INET;
10428c2ecf20Sopenharmony_ci		sin->sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
10438c2ecf20Sopenharmony_ci		sin->sin_port = htons(RDS_PORT);
10448c2ecf20Sopenharmony_ci	} else {
10458c2ecf20Sopenharmony_ci		struct sockaddr_in6 *sin6;
10468c2ecf20Sopenharmony_ci
10478c2ecf20Sopenharmony_ci		sin6 = (struct sockaddr_in6 *)&src;
10488c2ecf20Sopenharmony_ci		sin6->sin6_family = AF_INET6;
10498c2ecf20Sopenharmony_ci		sin6->sin6_addr = conn->c_laddr;
10508c2ecf20Sopenharmony_ci		sin6->sin6_port = 0;
10518c2ecf20Sopenharmony_ci		sin6->sin6_scope_id = conn->c_dev_if;
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci		sin6 = (struct sockaddr_in6 *)&dest;
10548c2ecf20Sopenharmony_ci		sin6->sin6_family = AF_INET6;
10558c2ecf20Sopenharmony_ci		sin6->sin6_addr = conn->c_faddr;
10568c2ecf20Sopenharmony_ci		sin6->sin6_port = htons(RDS_CM_PORT);
10578c2ecf20Sopenharmony_ci		sin6->sin6_scope_id = conn->c_dev_if;
10588c2ecf20Sopenharmony_ci	}
10598c2ecf20Sopenharmony_ci
10608c2ecf20Sopenharmony_ci	ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
10618c2ecf20Sopenharmony_ci				(struct sockaddr *)&dest,
10628c2ecf20Sopenharmony_ci				RDS_RDMA_RESOLVE_TIMEOUT_MS);
10638c2ecf20Sopenharmony_ci	if (ret) {
10648c2ecf20Sopenharmony_ci		rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id,
10658c2ecf20Sopenharmony_ci			 ret);
10668c2ecf20Sopenharmony_ci		rdma_destroy_id(ic->i_cm_id);
10678c2ecf20Sopenharmony_ci		ic->i_cm_id = NULL;
10688c2ecf20Sopenharmony_ci	}
10698c2ecf20Sopenharmony_ci
10708c2ecf20Sopenharmony_ciout:
10718c2ecf20Sopenharmony_ci	return ret;
10728c2ecf20Sopenharmony_ci}
10738c2ecf20Sopenharmony_ci
10748c2ecf20Sopenharmony_ci/*
10758c2ecf20Sopenharmony_ci * This is so careful about only cleaning up resources that were built up
10768c2ecf20Sopenharmony_ci * so that it can be called at any point during startup.  In fact it
10778c2ecf20Sopenharmony_ci * can be called multiple times for a given connection.
10788c2ecf20Sopenharmony_ci */
10798c2ecf20Sopenharmony_civoid rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
10808c2ecf20Sopenharmony_ci{
10818c2ecf20Sopenharmony_ci	struct rds_connection *conn = cp->cp_conn;
10828c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
10838c2ecf20Sopenharmony_ci	int err = 0;
10848c2ecf20Sopenharmony_ci
10858c2ecf20Sopenharmony_ci	rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id,
10868c2ecf20Sopenharmony_ci		 ic->i_pd, ic->i_send_cq, ic->i_recv_cq,
10878c2ecf20Sopenharmony_ci		 ic->i_cm_id ? ic->i_cm_id->qp : NULL);
10888c2ecf20Sopenharmony_ci
10898c2ecf20Sopenharmony_ci	if (ic->i_cm_id) {
10908c2ecf20Sopenharmony_ci		rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
10918c2ecf20Sopenharmony_ci		err = rdma_disconnect(ic->i_cm_id);
10928c2ecf20Sopenharmony_ci		if (err) {
10938c2ecf20Sopenharmony_ci			/* Actually this may happen quite frequently, when
10948c2ecf20Sopenharmony_ci			 * an outgoing connect raced with an incoming connect.
10958c2ecf20Sopenharmony_ci			 */
10968c2ecf20Sopenharmony_ci			rdsdebug("failed to disconnect, cm: %p err %d\n",
10978c2ecf20Sopenharmony_ci				ic->i_cm_id, err);
10988c2ecf20Sopenharmony_ci		}
10998c2ecf20Sopenharmony_ci
11008c2ecf20Sopenharmony_ci		/* kick off "flush_worker" for all pools in order to reap
11018c2ecf20Sopenharmony_ci		 * all FRMR registrations that are still marked "FRMR_IS_INUSE"
11028c2ecf20Sopenharmony_ci		 */
11038c2ecf20Sopenharmony_ci		rds_ib_flush_mrs();
11048c2ecf20Sopenharmony_ci
11058c2ecf20Sopenharmony_ci		/*
11068c2ecf20Sopenharmony_ci		 * We want to wait for tx and rx completion to finish
11078c2ecf20Sopenharmony_ci		 * before we tear down the connection, but we have to be
11088c2ecf20Sopenharmony_ci		 * careful not to get stuck waiting on a send ring that
11098c2ecf20Sopenharmony_ci		 * only has unsignaled sends in it.  We've shutdown new
11108c2ecf20Sopenharmony_ci		 * sends before getting here so by waiting for signaled
11118c2ecf20Sopenharmony_ci		 * sends to complete we're ensured that there will be no
11128c2ecf20Sopenharmony_ci		 * more tx processing.
11138c2ecf20Sopenharmony_ci		 */
11148c2ecf20Sopenharmony_ci		wait_event(rds_ib_ring_empty_wait,
11158c2ecf20Sopenharmony_ci			   rds_ib_ring_empty(&ic->i_recv_ring) &&
11168c2ecf20Sopenharmony_ci			   (atomic_read(&ic->i_signaled_sends) == 0) &&
11178c2ecf20Sopenharmony_ci			   (atomic_read(&ic->i_fastreg_inuse_count) == 0) &&
11188c2ecf20Sopenharmony_ci			   (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR));
11198c2ecf20Sopenharmony_ci		tasklet_kill(&ic->i_send_tasklet);
11208c2ecf20Sopenharmony_ci		tasklet_kill(&ic->i_recv_tasklet);
11218c2ecf20Sopenharmony_ci
11228c2ecf20Sopenharmony_ci		atomic_set(&ic->i_cq_quiesce, 1);
11238c2ecf20Sopenharmony_ci
11248c2ecf20Sopenharmony_ci		/* first destroy the ib state that generates callbacks */
11258c2ecf20Sopenharmony_ci		if (ic->i_cm_id->qp)
11268c2ecf20Sopenharmony_ci			rdma_destroy_qp(ic->i_cm_id);
11278c2ecf20Sopenharmony_ci		if (ic->i_send_cq) {
11288c2ecf20Sopenharmony_ci			if (ic->rds_ibdev)
11298c2ecf20Sopenharmony_ci				ibdev_put_vector(ic->rds_ibdev, ic->i_scq_vector);
11308c2ecf20Sopenharmony_ci			ib_destroy_cq(ic->i_send_cq);
11318c2ecf20Sopenharmony_ci		}
11328c2ecf20Sopenharmony_ci
11338c2ecf20Sopenharmony_ci		if (ic->i_recv_cq) {
11348c2ecf20Sopenharmony_ci			if (ic->rds_ibdev)
11358c2ecf20Sopenharmony_ci				ibdev_put_vector(ic->rds_ibdev, ic->i_rcq_vector);
11368c2ecf20Sopenharmony_ci			ib_destroy_cq(ic->i_recv_cq);
11378c2ecf20Sopenharmony_ci		}
11388c2ecf20Sopenharmony_ci
11398c2ecf20Sopenharmony_ci		if (ic->rds_ibdev) {
11408c2ecf20Sopenharmony_ci			/* then free the resources that ib callbacks use */
11418c2ecf20Sopenharmony_ci			if (ic->i_send_hdrs) {
11428c2ecf20Sopenharmony_ci				rds_dma_hdrs_free(ic->rds_ibdev,
11438c2ecf20Sopenharmony_ci						  ic->i_send_hdrs,
11448c2ecf20Sopenharmony_ci						  ic->i_send_hdrs_dma,
11458c2ecf20Sopenharmony_ci						  ic->i_send_ring.w_nr,
11468c2ecf20Sopenharmony_ci						  DMA_TO_DEVICE);
11478c2ecf20Sopenharmony_ci				ic->i_send_hdrs = NULL;
11488c2ecf20Sopenharmony_ci				ic->i_send_hdrs_dma = NULL;
11498c2ecf20Sopenharmony_ci			}
11508c2ecf20Sopenharmony_ci
11518c2ecf20Sopenharmony_ci			if (ic->i_recv_hdrs) {
11528c2ecf20Sopenharmony_ci				rds_dma_hdrs_free(ic->rds_ibdev,
11538c2ecf20Sopenharmony_ci						  ic->i_recv_hdrs,
11548c2ecf20Sopenharmony_ci						  ic->i_recv_hdrs_dma,
11558c2ecf20Sopenharmony_ci						  ic->i_recv_ring.w_nr,
11568c2ecf20Sopenharmony_ci						  DMA_FROM_DEVICE);
11578c2ecf20Sopenharmony_ci				ic->i_recv_hdrs = NULL;
11588c2ecf20Sopenharmony_ci				ic->i_recv_hdrs_dma = NULL;
11598c2ecf20Sopenharmony_ci			}
11608c2ecf20Sopenharmony_ci
11618c2ecf20Sopenharmony_ci			if (ic->i_ack) {
11628c2ecf20Sopenharmony_ci				rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack,
11638c2ecf20Sopenharmony_ci						 ic->i_ack_dma, DMA_TO_DEVICE);
11648c2ecf20Sopenharmony_ci				ic->i_ack = NULL;
11658c2ecf20Sopenharmony_ci			}
11668c2ecf20Sopenharmony_ci		} else {
11678c2ecf20Sopenharmony_ci			WARN_ON(ic->i_send_hdrs);
11688c2ecf20Sopenharmony_ci			WARN_ON(ic->i_send_hdrs_dma);
11698c2ecf20Sopenharmony_ci			WARN_ON(ic->i_recv_hdrs);
11708c2ecf20Sopenharmony_ci			WARN_ON(ic->i_recv_hdrs_dma);
11718c2ecf20Sopenharmony_ci			WARN_ON(ic->i_ack);
11728c2ecf20Sopenharmony_ci		}
11738c2ecf20Sopenharmony_ci
11748c2ecf20Sopenharmony_ci		if (ic->i_sends)
11758c2ecf20Sopenharmony_ci			rds_ib_send_clear_ring(ic);
11768c2ecf20Sopenharmony_ci		if (ic->i_recvs)
11778c2ecf20Sopenharmony_ci			rds_ib_recv_clear_ring(ic);
11788c2ecf20Sopenharmony_ci
11798c2ecf20Sopenharmony_ci		rdma_destroy_id(ic->i_cm_id);
11808c2ecf20Sopenharmony_ci
11818c2ecf20Sopenharmony_ci		/*
11828c2ecf20Sopenharmony_ci		 * Move connection back to the nodev list.
11838c2ecf20Sopenharmony_ci		 */
11848c2ecf20Sopenharmony_ci		if (ic->rds_ibdev)
11858c2ecf20Sopenharmony_ci			rds_ib_remove_conn(ic->rds_ibdev, conn);
11868c2ecf20Sopenharmony_ci
11878c2ecf20Sopenharmony_ci		ic->i_cm_id = NULL;
11888c2ecf20Sopenharmony_ci		ic->i_pd = NULL;
11898c2ecf20Sopenharmony_ci		ic->i_send_cq = NULL;
11908c2ecf20Sopenharmony_ci		ic->i_recv_cq = NULL;
11918c2ecf20Sopenharmony_ci	}
11928c2ecf20Sopenharmony_ci	BUG_ON(ic->rds_ibdev);
11938c2ecf20Sopenharmony_ci
11948c2ecf20Sopenharmony_ci	/* Clear pending transmit */
11958c2ecf20Sopenharmony_ci	if (ic->i_data_op) {
11968c2ecf20Sopenharmony_ci		struct rds_message *rm;
11978c2ecf20Sopenharmony_ci
11988c2ecf20Sopenharmony_ci		rm = container_of(ic->i_data_op, struct rds_message, data);
11998c2ecf20Sopenharmony_ci		rds_message_put(rm);
12008c2ecf20Sopenharmony_ci		ic->i_data_op = NULL;
12018c2ecf20Sopenharmony_ci	}
12028c2ecf20Sopenharmony_ci
12038c2ecf20Sopenharmony_ci	/* Clear the ACK state */
12048c2ecf20Sopenharmony_ci	clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
12058c2ecf20Sopenharmony_ci#ifdef KERNEL_HAS_ATOMIC64
12068c2ecf20Sopenharmony_ci	atomic64_set(&ic->i_ack_next, 0);
12078c2ecf20Sopenharmony_ci#else
12088c2ecf20Sopenharmony_ci	ic->i_ack_next = 0;
12098c2ecf20Sopenharmony_ci#endif
12108c2ecf20Sopenharmony_ci	ic->i_ack_recv = 0;
12118c2ecf20Sopenharmony_ci
12128c2ecf20Sopenharmony_ci	/* Clear flow control state */
12138c2ecf20Sopenharmony_ci	ic->i_flowctl = 0;
12148c2ecf20Sopenharmony_ci	atomic_set(&ic->i_credits, 0);
12158c2ecf20Sopenharmony_ci
12168c2ecf20Sopenharmony_ci	/* Re-init rings, but retain sizes. */
12178c2ecf20Sopenharmony_ci	rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr);
12188c2ecf20Sopenharmony_ci	rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr);
12198c2ecf20Sopenharmony_ci
12208c2ecf20Sopenharmony_ci	if (ic->i_ibinc) {
12218c2ecf20Sopenharmony_ci		rds_inc_put(&ic->i_ibinc->ii_inc);
12228c2ecf20Sopenharmony_ci		ic->i_ibinc = NULL;
12238c2ecf20Sopenharmony_ci	}
12248c2ecf20Sopenharmony_ci
12258c2ecf20Sopenharmony_ci	vfree(ic->i_sends);
12268c2ecf20Sopenharmony_ci	ic->i_sends = NULL;
12278c2ecf20Sopenharmony_ci	vfree(ic->i_recvs);
12288c2ecf20Sopenharmony_ci	ic->i_recvs = NULL;
12298c2ecf20Sopenharmony_ci	ic->i_active_side = false;
12308c2ecf20Sopenharmony_ci}
12318c2ecf20Sopenharmony_ci
12328c2ecf20Sopenharmony_ciint rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
12338c2ecf20Sopenharmony_ci{
12348c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic;
12358c2ecf20Sopenharmony_ci	unsigned long flags;
12368c2ecf20Sopenharmony_ci	int ret;
12378c2ecf20Sopenharmony_ci
12388c2ecf20Sopenharmony_ci	/* XXX too lazy? */
12398c2ecf20Sopenharmony_ci	ic = kzalloc(sizeof(struct rds_ib_connection), gfp);
12408c2ecf20Sopenharmony_ci	if (!ic)
12418c2ecf20Sopenharmony_ci		return -ENOMEM;
12428c2ecf20Sopenharmony_ci
12438c2ecf20Sopenharmony_ci	ret = rds_ib_recv_alloc_caches(ic, gfp);
12448c2ecf20Sopenharmony_ci	if (ret) {
12458c2ecf20Sopenharmony_ci		kfree(ic);
12468c2ecf20Sopenharmony_ci		return ret;
12478c2ecf20Sopenharmony_ci	}
12488c2ecf20Sopenharmony_ci
12498c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&ic->ib_node);
12508c2ecf20Sopenharmony_ci	tasklet_init(&ic->i_send_tasklet, rds_ib_tasklet_fn_send,
12518c2ecf20Sopenharmony_ci		     (unsigned long)ic);
12528c2ecf20Sopenharmony_ci	tasklet_init(&ic->i_recv_tasklet, rds_ib_tasklet_fn_recv,
12538c2ecf20Sopenharmony_ci		     (unsigned long)ic);
12548c2ecf20Sopenharmony_ci	mutex_init(&ic->i_recv_mutex);
12558c2ecf20Sopenharmony_ci#ifndef KERNEL_HAS_ATOMIC64
12568c2ecf20Sopenharmony_ci	spin_lock_init(&ic->i_ack_lock);
12578c2ecf20Sopenharmony_ci#endif
12588c2ecf20Sopenharmony_ci	atomic_set(&ic->i_signaled_sends, 0);
12598c2ecf20Sopenharmony_ci	atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
12608c2ecf20Sopenharmony_ci
12618c2ecf20Sopenharmony_ci	/*
12628c2ecf20Sopenharmony_ci	 * rds_ib_conn_shutdown() waits for these to be emptied so they
12638c2ecf20Sopenharmony_ci	 * must be initialized before it can be called.
12648c2ecf20Sopenharmony_ci	 */
12658c2ecf20Sopenharmony_ci	rds_ib_ring_init(&ic->i_send_ring, 0);
12668c2ecf20Sopenharmony_ci	rds_ib_ring_init(&ic->i_recv_ring, 0);
12678c2ecf20Sopenharmony_ci
12688c2ecf20Sopenharmony_ci	ic->conn = conn;
12698c2ecf20Sopenharmony_ci	conn->c_transport_data = ic;
12708c2ecf20Sopenharmony_ci
12718c2ecf20Sopenharmony_ci	spin_lock_irqsave(&ib_nodev_conns_lock, flags);
12728c2ecf20Sopenharmony_ci	list_add_tail(&ic->ib_node, &ib_nodev_conns);
12738c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&ib_nodev_conns_lock, flags);
12748c2ecf20Sopenharmony_ci
12758c2ecf20Sopenharmony_ci
12768c2ecf20Sopenharmony_ci	rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data);
12778c2ecf20Sopenharmony_ci	return 0;
12788c2ecf20Sopenharmony_ci}
12798c2ecf20Sopenharmony_ci
12808c2ecf20Sopenharmony_ci/*
12818c2ecf20Sopenharmony_ci * Free a connection. Connection must be shut down and not set for reconnect.
12828c2ecf20Sopenharmony_ci */
12838c2ecf20Sopenharmony_civoid rds_ib_conn_free(void *arg)
12848c2ecf20Sopenharmony_ci{
12858c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = arg;
12868c2ecf20Sopenharmony_ci	spinlock_t	*lock_ptr;
12878c2ecf20Sopenharmony_ci
12888c2ecf20Sopenharmony_ci	rdsdebug("ic %p\n", ic);
12898c2ecf20Sopenharmony_ci
12908c2ecf20Sopenharmony_ci	/*
12918c2ecf20Sopenharmony_ci	 * Conn is either on a dev's list or on the nodev list.
12928c2ecf20Sopenharmony_ci	 * A race with shutdown() or connect() would cause problems
12938c2ecf20Sopenharmony_ci	 * (since rds_ibdev would change) but that should never happen.
12948c2ecf20Sopenharmony_ci	 */
12958c2ecf20Sopenharmony_ci	lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock;
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_ci	spin_lock_irq(lock_ptr);
12988c2ecf20Sopenharmony_ci	list_del(&ic->ib_node);
12998c2ecf20Sopenharmony_ci	spin_unlock_irq(lock_ptr);
13008c2ecf20Sopenharmony_ci
13018c2ecf20Sopenharmony_ci	rds_ib_recv_free_caches(ic);
13028c2ecf20Sopenharmony_ci
13038c2ecf20Sopenharmony_ci	kfree(ic);
13048c2ecf20Sopenharmony_ci}
13058c2ecf20Sopenharmony_ci
13068c2ecf20Sopenharmony_ci
13078c2ecf20Sopenharmony_ci/*
13088c2ecf20Sopenharmony_ci * An error occurred on the connection
13098c2ecf20Sopenharmony_ci */
13108c2ecf20Sopenharmony_civoid
13118c2ecf20Sopenharmony_ci__rds_ib_conn_error(struct rds_connection *conn, const char *fmt, ...)
13128c2ecf20Sopenharmony_ci{
13138c2ecf20Sopenharmony_ci	va_list ap;
13148c2ecf20Sopenharmony_ci
13158c2ecf20Sopenharmony_ci	rds_conn_drop(conn);
13168c2ecf20Sopenharmony_ci
13178c2ecf20Sopenharmony_ci	va_start(ap, fmt);
13188c2ecf20Sopenharmony_ci	vprintk(fmt, ap);
13198c2ecf20Sopenharmony_ci	va_end(ap);
13208c2ecf20Sopenharmony_ci}
1321