162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * This software is available to you under a choice of one of two
562306a36Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the
862306a36Sopenharmony_ci * OpenIB.org BSD license below:
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
1162306a36Sopenharmony_ci *     without modification, are permitted provided that the following
1262306a36Sopenharmony_ci *     conditions are met:
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci *      - Redistributions of source code must retain the above
1562306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
1662306a36Sopenharmony_ci *        disclaimer.
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
1962306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
2062306a36Sopenharmony_ci *        disclaimer in the documentation and/or other materials
2162306a36Sopenharmony_ci *        provided with the distribution.
2262306a36Sopenharmony_ci *
2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3062306a36Sopenharmony_ci * SOFTWARE.
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci */
3362306a36Sopenharmony_ci#include <linux/kernel.h>
3462306a36Sopenharmony_ci#include <linux/in.h>
3562306a36Sopenharmony_ci#include <linux/slab.h>
3662306a36Sopenharmony_ci#include <linux/vmalloc.h>
3762306a36Sopenharmony_ci#include <linux/ratelimit.h>
3862306a36Sopenharmony_ci#include <net/addrconf.h>
3962306a36Sopenharmony_ci#include <rdma/ib_cm.h>
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci#include "rds_single_path.h"
4262306a36Sopenharmony_ci#include "rds.h"
4362306a36Sopenharmony_ci#include "ib.h"
4462306a36Sopenharmony_ci#include "ib_mr.h"
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci/*
4762306a36Sopenharmony_ci * Set the selected protocol version
4862306a36Sopenharmony_ci */
4962306a36Sopenharmony_cistatic void rds_ib_set_protocol(struct rds_connection *conn, unsigned int version)
5062306a36Sopenharmony_ci{
5162306a36Sopenharmony_ci	conn->c_version = version;
5262306a36Sopenharmony_ci}
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci/*
5562306a36Sopenharmony_ci * Set up flow control
5662306a36Sopenharmony_ci */
5762306a36Sopenharmony_cistatic void rds_ib_set_flow_control(struct rds_connection *conn, u32 credits)
5862306a36Sopenharmony_ci{
5962306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	if (rds_ib_sysctl_flow_control && credits != 0) {
6262306a36Sopenharmony_ci		/* We're doing flow control */
6362306a36Sopenharmony_ci		ic->i_flowctl = 1;
6462306a36Sopenharmony_ci		rds_ib_send_add_credits(conn, credits);
6562306a36Sopenharmony_ci	} else {
6662306a36Sopenharmony_ci		ic->i_flowctl = 0;
6762306a36Sopenharmony_ci	}
6862306a36Sopenharmony_ci}
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci/*
7162306a36Sopenharmony_ci * Connection established.
7262306a36Sopenharmony_ci * We get here for both outgoing and incoming connection.
7362306a36Sopenharmony_ci */
7462306a36Sopenharmony_civoid rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
7762306a36Sopenharmony_ci	const union rds_ib_conn_priv *dp = NULL;
7862306a36Sopenharmony_ci	__be64 ack_seq = 0;
7962306a36Sopenharmony_ci	__be32 credit = 0;
8062306a36Sopenharmony_ci	u8 major = 0;
8162306a36Sopenharmony_ci	u8 minor = 0;
8262306a36Sopenharmony_ci	int err;
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	dp = event->param.conn.private_data;
8562306a36Sopenharmony_ci	if (conn->c_isv6) {
8662306a36Sopenharmony_ci		if (event->param.conn.private_data_len >=
8762306a36Sopenharmony_ci		    sizeof(struct rds6_ib_connect_private)) {
8862306a36Sopenharmony_ci			major = dp->ricp_v6.dp_protocol_major;
8962306a36Sopenharmony_ci			minor = dp->ricp_v6.dp_protocol_minor;
9062306a36Sopenharmony_ci			credit = dp->ricp_v6.dp_credit;
9162306a36Sopenharmony_ci			/* dp structure start is not guaranteed to be 8 bytes
9262306a36Sopenharmony_ci			 * aligned.  Since dp_ack_seq is 64-bit extended load
9362306a36Sopenharmony_ci			 * operations can be used so go through get_unaligned
9462306a36Sopenharmony_ci			 * to avoid unaligned errors.
9562306a36Sopenharmony_ci			 */
9662306a36Sopenharmony_ci			ack_seq = get_unaligned(&dp->ricp_v6.dp_ack_seq);
9762306a36Sopenharmony_ci		}
9862306a36Sopenharmony_ci	} else if (event->param.conn.private_data_len >=
9962306a36Sopenharmony_ci		   sizeof(struct rds_ib_connect_private)) {
10062306a36Sopenharmony_ci		major = dp->ricp_v4.dp_protocol_major;
10162306a36Sopenharmony_ci		minor = dp->ricp_v4.dp_protocol_minor;
10262306a36Sopenharmony_ci		credit = dp->ricp_v4.dp_credit;
10362306a36Sopenharmony_ci		ack_seq = get_unaligned(&dp->ricp_v4.dp_ack_seq);
10462306a36Sopenharmony_ci	}
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	/* make sure it isn't empty data */
10762306a36Sopenharmony_ci	if (major) {
10862306a36Sopenharmony_ci		rds_ib_set_protocol(conn, RDS_PROTOCOL(major, minor));
10962306a36Sopenharmony_ci		rds_ib_set_flow_control(conn, be32_to_cpu(credit));
11062306a36Sopenharmony_ci	}
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	if (conn->c_version < RDS_PROTOCOL_VERSION) {
11362306a36Sopenharmony_ci		if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) {
11462306a36Sopenharmony_ci			pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n",
11562306a36Sopenharmony_ci				  &conn->c_laddr, &conn->c_faddr,
11662306a36Sopenharmony_ci				  RDS_PROTOCOL_MAJOR(conn->c_version),
11762306a36Sopenharmony_ci				  RDS_PROTOCOL_MINOR(conn->c_version));
11862306a36Sopenharmony_ci			rds_conn_destroy(conn);
11962306a36Sopenharmony_ci			return;
12062306a36Sopenharmony_ci		}
12162306a36Sopenharmony_ci	}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n",
12462306a36Sopenharmony_ci		  ic->i_active_side ? "Active" : "Passive",
12562306a36Sopenharmony_ci		  &conn->c_laddr, &conn->c_faddr, conn->c_tos,
12662306a36Sopenharmony_ci		  RDS_PROTOCOL_MAJOR(conn->c_version),
12762306a36Sopenharmony_ci		  RDS_PROTOCOL_MINOR(conn->c_version),
12862306a36Sopenharmony_ci		  ic->i_flowctl ? ", flow control" : "");
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	/* receive sl from the peer */
13162306a36Sopenharmony_ci	ic->i_sl = ic->i_cm_id->route.path_rec->sl;
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	atomic_set(&ic->i_cq_quiesce, 0);
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	/* Init rings and fill recv. this needs to wait until protocol
13662306a36Sopenharmony_ci	 * negotiation is complete, since ring layout is different
13762306a36Sopenharmony_ci	 * from 3.1 to 4.1.
13862306a36Sopenharmony_ci	 */
13962306a36Sopenharmony_ci	rds_ib_send_init_ring(ic);
14062306a36Sopenharmony_ci	rds_ib_recv_init_ring(ic);
14162306a36Sopenharmony_ci	/* Post receive buffers - as a side effect, this will update
14262306a36Sopenharmony_ci	 * the posted credit count. */
14362306a36Sopenharmony_ci	rds_ib_recv_refill(conn, 1, GFP_KERNEL);
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	/* update ib_device with this local ipaddr */
14662306a36Sopenharmony_ci	err = rds_ib_update_ipaddr(ic->rds_ibdev, &conn->c_laddr);
14762306a36Sopenharmony_ci	if (err)
14862306a36Sopenharmony_ci		printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
14962306a36Sopenharmony_ci			err);
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	/* If the peer gave us the last packet it saw, process this as if
15262306a36Sopenharmony_ci	 * we had received a regular ACK. */
15362306a36Sopenharmony_ci	if (dp) {
15462306a36Sopenharmony_ci		if (ack_seq)
15562306a36Sopenharmony_ci			rds_send_drop_acked(conn, be64_to_cpu(ack_seq),
15662306a36Sopenharmony_ci					    NULL);
15762306a36Sopenharmony_ci	}
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	conn->c_proposed_version = conn->c_version;
16062306a36Sopenharmony_ci	rds_connect_complete(conn);
16162306a36Sopenharmony_ci}
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_cistatic void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
16462306a36Sopenharmony_ci				      struct rdma_conn_param *conn_param,
16562306a36Sopenharmony_ci				      union rds_ib_conn_priv *dp,
16662306a36Sopenharmony_ci				      u32 protocol_version,
16762306a36Sopenharmony_ci				      u32 max_responder_resources,
16862306a36Sopenharmony_ci				      u32 max_initiator_depth,
16962306a36Sopenharmony_ci				      bool isv6)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
17262306a36Sopenharmony_ci	struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	memset(conn_param, 0, sizeof(struct rdma_conn_param));
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	conn_param->responder_resources =
17762306a36Sopenharmony_ci		min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources);
17862306a36Sopenharmony_ci	conn_param->initiator_depth =
17962306a36Sopenharmony_ci		min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth);
18062306a36Sopenharmony_ci	conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7);
18162306a36Sopenharmony_ci	conn_param->rnr_retry_count = 7;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	if (dp) {
18462306a36Sopenharmony_ci		memset(dp, 0, sizeof(*dp));
18562306a36Sopenharmony_ci		if (isv6) {
18662306a36Sopenharmony_ci			dp->ricp_v6.dp_saddr = conn->c_laddr;
18762306a36Sopenharmony_ci			dp->ricp_v6.dp_daddr = conn->c_faddr;
18862306a36Sopenharmony_ci			dp->ricp_v6.dp_protocol_major =
18962306a36Sopenharmony_ci			    RDS_PROTOCOL_MAJOR(protocol_version);
19062306a36Sopenharmony_ci			dp->ricp_v6.dp_protocol_minor =
19162306a36Sopenharmony_ci			    RDS_PROTOCOL_MINOR(protocol_version);
19262306a36Sopenharmony_ci			dp->ricp_v6.dp_protocol_minor_mask =
19362306a36Sopenharmony_ci			    cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
19462306a36Sopenharmony_ci			dp->ricp_v6.dp_ack_seq =
19562306a36Sopenharmony_ci			    cpu_to_be64(rds_ib_piggyb_ack(ic));
19662306a36Sopenharmony_ci			dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci			conn_param->private_data = &dp->ricp_v6;
19962306a36Sopenharmony_ci			conn_param->private_data_len = sizeof(dp->ricp_v6);
20062306a36Sopenharmony_ci		} else {
20162306a36Sopenharmony_ci			dp->ricp_v4.dp_saddr = conn->c_laddr.s6_addr32[3];
20262306a36Sopenharmony_ci			dp->ricp_v4.dp_daddr = conn->c_faddr.s6_addr32[3];
20362306a36Sopenharmony_ci			dp->ricp_v4.dp_protocol_major =
20462306a36Sopenharmony_ci			    RDS_PROTOCOL_MAJOR(protocol_version);
20562306a36Sopenharmony_ci			dp->ricp_v4.dp_protocol_minor =
20662306a36Sopenharmony_ci			    RDS_PROTOCOL_MINOR(protocol_version);
20762306a36Sopenharmony_ci			dp->ricp_v4.dp_protocol_minor_mask =
20862306a36Sopenharmony_ci			    cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
20962306a36Sopenharmony_ci			dp->ricp_v4.dp_ack_seq =
21062306a36Sopenharmony_ci			    cpu_to_be64(rds_ib_piggyb_ack(ic));
21162306a36Sopenharmony_ci			dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci			conn_param->private_data = &dp->ricp_v4;
21462306a36Sopenharmony_ci			conn_param->private_data_len = sizeof(dp->ricp_v4);
21562306a36Sopenharmony_ci		}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci		/* Advertise flow control */
21862306a36Sopenharmony_ci		if (ic->i_flowctl) {
21962306a36Sopenharmony_ci			unsigned int credits;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci			credits = IB_GET_POST_CREDITS
22262306a36Sopenharmony_ci				(atomic_read(&ic->i_credits));
22362306a36Sopenharmony_ci			if (isv6)
22462306a36Sopenharmony_ci				dp->ricp_v6.dp_credit = cpu_to_be32(credits);
22562306a36Sopenharmony_ci			else
22662306a36Sopenharmony_ci				dp->ricp_v4.dp_credit = cpu_to_be32(credits);
22762306a36Sopenharmony_ci			atomic_sub(IB_SET_POST_CREDITS(credits),
22862306a36Sopenharmony_ci				   &ic->i_credits);
22962306a36Sopenharmony_ci		}
23062306a36Sopenharmony_ci	}
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_cistatic void rds_ib_cq_event_handler(struct ib_event *event, void *data)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	rdsdebug("event %u (%s) data %p\n",
23662306a36Sopenharmony_ci		 event->event, ib_event_msg(event->event), data);
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci/* Plucking the oldest entry from the ring can be done concurrently with
24062306a36Sopenharmony_ci * the thread refilling the ring.  Each ring operation is protected by
24162306a36Sopenharmony_ci * spinlocks and the transient state of refilling doesn't change the
24262306a36Sopenharmony_ci * recording of which entry is oldest.
24362306a36Sopenharmony_ci *
24462306a36Sopenharmony_ci * This relies on IB only calling one cq comp_handler for each cq so that
24562306a36Sopenharmony_ci * there will only be one caller of rds_recv_incoming() per RDS connection.
24662306a36Sopenharmony_ci */
24762306a36Sopenharmony_cistatic void rds_ib_cq_comp_handler_recv(struct ib_cq *cq, void *context)
24862306a36Sopenharmony_ci{
24962306a36Sopenharmony_ci	struct rds_connection *conn = context;
25062306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	rdsdebug("conn %p cq %p\n", conn, cq);
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	rds_ib_stats_inc(s_ib_evt_handler_call);
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	tasklet_schedule(&ic->i_recv_tasklet);
25762306a36Sopenharmony_ci}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_cistatic void poll_scq(struct rds_ib_connection *ic, struct ib_cq *cq,
26062306a36Sopenharmony_ci		     struct ib_wc *wcs)
26162306a36Sopenharmony_ci{
26262306a36Sopenharmony_ci	int nr, i;
26362306a36Sopenharmony_ci	struct ib_wc *wc;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
26662306a36Sopenharmony_ci		for (i = 0; i < nr; i++) {
26762306a36Sopenharmony_ci			wc = wcs + i;
26862306a36Sopenharmony_ci			rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
26962306a36Sopenharmony_ci				 (unsigned long long)wc->wr_id, wc->status,
27062306a36Sopenharmony_ci				 wc->byte_len, be32_to_cpu(wc->ex.imm_data));
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci			if (wc->wr_id <= ic->i_send_ring.w_nr ||
27362306a36Sopenharmony_ci			    wc->wr_id == RDS_IB_ACK_WR_ID)
27462306a36Sopenharmony_ci				rds_ib_send_cqe_handler(ic, wc);
27562306a36Sopenharmony_ci			else
27662306a36Sopenharmony_ci				rds_ib_mr_cqe_handler(ic, wc);
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci		}
27962306a36Sopenharmony_ci	}
28062306a36Sopenharmony_ci}
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_cistatic void rds_ib_tasklet_fn_send(unsigned long data)
28362306a36Sopenharmony_ci{
28462306a36Sopenharmony_ci	struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
28562306a36Sopenharmony_ci	struct rds_connection *conn = ic->conn;
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	rds_ib_stats_inc(s_ib_tasklet_call);
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	/* if cq has been already reaped, ignore incoming cq event */
29062306a36Sopenharmony_ci	if (atomic_read(&ic->i_cq_quiesce))
29162306a36Sopenharmony_ci		return;
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
29462306a36Sopenharmony_ci	ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
29562306a36Sopenharmony_ci	poll_scq(ic, ic->i_send_cq, ic->i_send_wc);
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	if (rds_conn_up(conn) &&
29862306a36Sopenharmony_ci	    (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
29962306a36Sopenharmony_ci	    test_bit(0, &conn->c_map_queued)))
30062306a36Sopenharmony_ci		rds_send_xmit(&ic->conn->c_path[0]);
30162306a36Sopenharmony_ci}
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_cistatic void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq,
30462306a36Sopenharmony_ci		     struct ib_wc *wcs,
30562306a36Sopenharmony_ci		     struct rds_ib_ack_state *ack_state)
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	int nr, i;
30862306a36Sopenharmony_ci	struct ib_wc *wc;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
31162306a36Sopenharmony_ci		for (i = 0; i < nr; i++) {
31262306a36Sopenharmony_ci			wc = wcs + i;
31362306a36Sopenharmony_ci			rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
31462306a36Sopenharmony_ci				 (unsigned long long)wc->wr_id, wc->status,
31562306a36Sopenharmony_ci				 wc->byte_len, be32_to_cpu(wc->ex.imm_data));
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci			rds_ib_recv_cqe_handler(ic, wc, ack_state);
31862306a36Sopenharmony_ci		}
31962306a36Sopenharmony_ci	}
32062306a36Sopenharmony_ci}
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_cistatic void rds_ib_tasklet_fn_recv(unsigned long data)
32362306a36Sopenharmony_ci{
32462306a36Sopenharmony_ci	struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
32562306a36Sopenharmony_ci	struct rds_connection *conn = ic->conn;
32662306a36Sopenharmony_ci	struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
32762306a36Sopenharmony_ci	struct rds_ib_ack_state state;
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	if (!rds_ibdev)
33062306a36Sopenharmony_ci		rds_conn_drop(conn);
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci	rds_ib_stats_inc(s_ib_tasklet_call);
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	/* if cq has been already reaped, ignore incoming cq event */
33562306a36Sopenharmony_ci	if (atomic_read(&ic->i_cq_quiesce))
33662306a36Sopenharmony_ci		return;
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	memset(&state, 0, sizeof(state));
33962306a36Sopenharmony_ci	poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
34062306a36Sopenharmony_ci	ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
34162306a36Sopenharmony_ci	poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	if (state.ack_next_valid)
34462306a36Sopenharmony_ci		rds_ib_set_ack(ic, state.ack_next, state.ack_required);
34562306a36Sopenharmony_ci	if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
34662306a36Sopenharmony_ci		rds_send_drop_acked(conn, state.ack_recv, NULL);
34762306a36Sopenharmony_ci		ic->i_ack_recv = state.ack_recv;
34862306a36Sopenharmony_ci	}
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci	if (rds_conn_up(conn))
35162306a36Sopenharmony_ci		rds_ib_attempt_ack(ic);
35262306a36Sopenharmony_ci}
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_cistatic void rds_ib_qp_event_handler(struct ib_event *event, void *data)
35562306a36Sopenharmony_ci{
35662306a36Sopenharmony_ci	struct rds_connection *conn = data;
35762306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci	rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event,
36062306a36Sopenharmony_ci		 ib_event_msg(event->event));
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	switch (event->event) {
36362306a36Sopenharmony_ci	case IB_EVENT_COMM_EST:
36462306a36Sopenharmony_ci		rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
36562306a36Sopenharmony_ci		break;
36662306a36Sopenharmony_ci	default:
36762306a36Sopenharmony_ci		rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n",
36862306a36Sopenharmony_ci			 event->event, ib_event_msg(event->event),
36962306a36Sopenharmony_ci			 &conn->c_laddr, &conn->c_faddr);
37062306a36Sopenharmony_ci		rds_conn_drop(conn);
37162306a36Sopenharmony_ci		break;
37262306a36Sopenharmony_ci	}
37362306a36Sopenharmony_ci}
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_cistatic void rds_ib_cq_comp_handler_send(struct ib_cq *cq, void *context)
37662306a36Sopenharmony_ci{
37762306a36Sopenharmony_ci	struct rds_connection *conn = context;
37862306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	rdsdebug("conn %p cq %p\n", conn, cq);
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	rds_ib_stats_inc(s_ib_evt_handler_call);
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	tasklet_schedule(&ic->i_send_tasklet);
38562306a36Sopenharmony_ci}
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_cistatic inline int ibdev_get_unused_vector(struct rds_ib_device *rds_ibdev)
38862306a36Sopenharmony_ci{
38962306a36Sopenharmony_ci	int min = rds_ibdev->vector_load[rds_ibdev->dev->num_comp_vectors - 1];
39062306a36Sopenharmony_ci	int index = rds_ibdev->dev->num_comp_vectors - 1;
39162306a36Sopenharmony_ci	int i;
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	for (i = rds_ibdev->dev->num_comp_vectors - 1; i >= 0; i--) {
39462306a36Sopenharmony_ci		if (rds_ibdev->vector_load[i] < min) {
39562306a36Sopenharmony_ci			index = i;
39662306a36Sopenharmony_ci			min = rds_ibdev->vector_load[i];
39762306a36Sopenharmony_ci		}
39862306a36Sopenharmony_ci	}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	rds_ibdev->vector_load[index]++;
40162306a36Sopenharmony_ci	return index;
40262306a36Sopenharmony_ci}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_cistatic inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index)
40562306a36Sopenharmony_ci{
40662306a36Sopenharmony_ci	rds_ibdev->vector_load[index]--;
40762306a36Sopenharmony_ci}
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_cistatic void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr,
41062306a36Sopenharmony_ci		dma_addr_t dma_addr, enum dma_data_direction dir)
41162306a36Sopenharmony_ci{
41262306a36Sopenharmony_ci	ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir);
41362306a36Sopenharmony_ci	kfree(hdr);
41462306a36Sopenharmony_ci}
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_cistatic struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev,
41762306a36Sopenharmony_ci		dma_addr_t *dma_addr, enum dma_data_direction dir)
41862306a36Sopenharmony_ci{
41962306a36Sopenharmony_ci	struct rds_header *hdr;
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev));
42262306a36Sopenharmony_ci	if (!hdr)
42362306a36Sopenharmony_ci		return NULL;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	*dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr),
42662306a36Sopenharmony_ci				      DMA_BIDIRECTIONAL);
42762306a36Sopenharmony_ci	if (ib_dma_mapping_error(dev, *dma_addr)) {
42862306a36Sopenharmony_ci		kfree(hdr);
42962306a36Sopenharmony_ci		return NULL;
43062306a36Sopenharmony_ci	}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	return hdr;
43362306a36Sopenharmony_ci}
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci/* Free the DMA memory used to store struct rds_header.
43662306a36Sopenharmony_ci *
43762306a36Sopenharmony_ci * @dev: the RDS IB device
43862306a36Sopenharmony_ci * @hdrs: pointer to the array storing DMA memory pointers
43962306a36Sopenharmony_ci * @dma_addrs: pointer to the array storing DMA addresses
44062306a36Sopenharmony_ci * @num_hdars: number of headers to free.
44162306a36Sopenharmony_ci */
44262306a36Sopenharmony_cistatic void rds_dma_hdrs_free(struct rds_ib_device *dev,
44362306a36Sopenharmony_ci		struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs,
44462306a36Sopenharmony_ci		enum dma_data_direction dir)
44562306a36Sopenharmony_ci{
44662306a36Sopenharmony_ci	u32 i;
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	for (i = 0; i < num_hdrs; i++)
44962306a36Sopenharmony_ci		rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir);
45062306a36Sopenharmony_ci	kvfree(hdrs);
45162306a36Sopenharmony_ci	kvfree(dma_addrs);
45262306a36Sopenharmony_ci}
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci/* Allocate DMA coherent memory to be used to store struct rds_header for
45662306a36Sopenharmony_ci * sending/receiving packets.  The pointers to the DMA memory and the
45762306a36Sopenharmony_ci * associated DMA addresses are stored in two arrays.
45862306a36Sopenharmony_ci *
45962306a36Sopenharmony_ci * @dev: the RDS IB device
46062306a36Sopenharmony_ci * @dma_addrs: pointer to the array for storing DMA addresses
46162306a36Sopenharmony_ci * @num_hdrs: number of headers to allocate
46262306a36Sopenharmony_ci *
46362306a36Sopenharmony_ci * It returns the pointer to the array storing the DMA memory pointers.  On
46462306a36Sopenharmony_ci * error, NULL pointer is returned.
46562306a36Sopenharmony_ci */
46662306a36Sopenharmony_cistatic struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev,
46762306a36Sopenharmony_ci		dma_addr_t **dma_addrs, u32 num_hdrs,
46862306a36Sopenharmony_ci		enum dma_data_direction dir)
46962306a36Sopenharmony_ci{
47062306a36Sopenharmony_ci	struct rds_header **hdrs;
47162306a36Sopenharmony_ci	dma_addr_t *hdr_daddrs;
47262306a36Sopenharmony_ci	u32 i;
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL,
47562306a36Sopenharmony_ci			     ibdev_to_node(dev->dev));
47662306a36Sopenharmony_ci	if (!hdrs)
47762306a36Sopenharmony_ci		return NULL;
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL,
48062306a36Sopenharmony_ci				   ibdev_to_node(dev->dev));
48162306a36Sopenharmony_ci	if (!hdr_daddrs) {
48262306a36Sopenharmony_ci		kvfree(hdrs);
48362306a36Sopenharmony_ci		return NULL;
48462306a36Sopenharmony_ci	}
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci	for (i = 0; i < num_hdrs; i++) {
48762306a36Sopenharmony_ci		hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir);
48862306a36Sopenharmony_ci		if (!hdrs[i]) {
48962306a36Sopenharmony_ci			rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir);
49062306a36Sopenharmony_ci			return NULL;
49162306a36Sopenharmony_ci		}
49262306a36Sopenharmony_ci	}
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	*dma_addrs = hdr_daddrs;
49562306a36Sopenharmony_ci	return hdrs;
49662306a36Sopenharmony_ci}
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci/*
49962306a36Sopenharmony_ci * This needs to be very careful to not leave IS_ERR pointers around for
50062306a36Sopenharmony_ci * cleanup to trip over.
50162306a36Sopenharmony_ci */
50262306a36Sopenharmony_cistatic int rds_ib_setup_qp(struct rds_connection *conn)
50362306a36Sopenharmony_ci{
50462306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
50562306a36Sopenharmony_ci	struct ib_device *dev = ic->i_cm_id->device;
50662306a36Sopenharmony_ci	struct ib_qp_init_attr attr;
50762306a36Sopenharmony_ci	struct ib_cq_init_attr cq_attr = {};
50862306a36Sopenharmony_ci	struct rds_ib_device *rds_ibdev;
50962306a36Sopenharmony_ci	unsigned long max_wrs;
51062306a36Sopenharmony_ci	int ret, fr_queue_space;
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	/*
51362306a36Sopenharmony_ci	 * It's normal to see a null device if an incoming connection races
51462306a36Sopenharmony_ci	 * with device removal, so we don't print a warning.
51562306a36Sopenharmony_ci	 */
51662306a36Sopenharmony_ci	rds_ibdev = rds_ib_get_client_data(dev);
51762306a36Sopenharmony_ci	if (!rds_ibdev)
51862306a36Sopenharmony_ci		return -EOPNOTSUPP;
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	/* The fr_queue_space is currently set to 512, to add extra space on
52162306a36Sopenharmony_ci	 * completion queue and send queue. This extra space is used for FRWR
52262306a36Sopenharmony_ci	 * registration and invalidation work requests
52362306a36Sopenharmony_ci	 */
52462306a36Sopenharmony_ci	fr_queue_space = RDS_IB_DEFAULT_FR_WR;
52562306a36Sopenharmony_ci
52662306a36Sopenharmony_ci	/* add the conn now so that connection establishment has the dev */
52762306a36Sopenharmony_ci	rds_ib_add_conn(rds_ibdev, conn);
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ?
53062306a36Sopenharmony_ci		rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr;
53162306a36Sopenharmony_ci	if (ic->i_send_ring.w_nr != max_wrs)
53262306a36Sopenharmony_ci		rds_ib_ring_resize(&ic->i_send_ring, max_wrs);
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ?
53562306a36Sopenharmony_ci		rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr;
53662306a36Sopenharmony_ci	if (ic->i_recv_ring.w_nr != max_wrs)
53762306a36Sopenharmony_ci		rds_ib_ring_resize(&ic->i_recv_ring, max_wrs);
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci	/* Protection domain and memory range */
54062306a36Sopenharmony_ci	ic->i_pd = rds_ibdev->pd;
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	ic->i_scq_vector = ibdev_get_unused_vector(rds_ibdev);
54362306a36Sopenharmony_ci	cq_attr.cqe = ic->i_send_ring.w_nr + fr_queue_space + 1;
54462306a36Sopenharmony_ci	cq_attr.comp_vector = ic->i_scq_vector;
54562306a36Sopenharmony_ci	ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send,
54662306a36Sopenharmony_ci				     rds_ib_cq_event_handler, conn,
54762306a36Sopenharmony_ci				     &cq_attr);
54862306a36Sopenharmony_ci	if (IS_ERR(ic->i_send_cq)) {
54962306a36Sopenharmony_ci		ret = PTR_ERR(ic->i_send_cq);
55062306a36Sopenharmony_ci		ic->i_send_cq = NULL;
55162306a36Sopenharmony_ci		ibdev_put_vector(rds_ibdev, ic->i_scq_vector);
55262306a36Sopenharmony_ci		rdsdebug("ib_create_cq send failed: %d\n", ret);
55362306a36Sopenharmony_ci		goto rds_ibdev_out;
55462306a36Sopenharmony_ci	}
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev);
55762306a36Sopenharmony_ci	cq_attr.cqe = ic->i_recv_ring.w_nr;
55862306a36Sopenharmony_ci	cq_attr.comp_vector = ic->i_rcq_vector;
55962306a36Sopenharmony_ci	ic->i_recv_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv,
56062306a36Sopenharmony_ci				     rds_ib_cq_event_handler, conn,
56162306a36Sopenharmony_ci				     &cq_attr);
56262306a36Sopenharmony_ci	if (IS_ERR(ic->i_recv_cq)) {
56362306a36Sopenharmony_ci		ret = PTR_ERR(ic->i_recv_cq);
56462306a36Sopenharmony_ci		ic->i_recv_cq = NULL;
56562306a36Sopenharmony_ci		ibdev_put_vector(rds_ibdev, ic->i_rcq_vector);
56662306a36Sopenharmony_ci		rdsdebug("ib_create_cq recv failed: %d\n", ret);
56762306a36Sopenharmony_ci		goto send_cq_out;
56862306a36Sopenharmony_ci	}
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci	ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
57162306a36Sopenharmony_ci	if (ret) {
57262306a36Sopenharmony_ci		rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
57362306a36Sopenharmony_ci		goto recv_cq_out;
57462306a36Sopenharmony_ci	}
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
57762306a36Sopenharmony_ci	if (ret) {
57862306a36Sopenharmony_ci		rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
57962306a36Sopenharmony_ci		goto recv_cq_out;
58062306a36Sopenharmony_ci	}
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci	/* XXX negotiate max send/recv with remote? */
58362306a36Sopenharmony_ci	memset(&attr, 0, sizeof(attr));
58462306a36Sopenharmony_ci	attr.event_handler = rds_ib_qp_event_handler;
58562306a36Sopenharmony_ci	attr.qp_context = conn;
58662306a36Sopenharmony_ci	/* + 1 to allow for the single ack message */
58762306a36Sopenharmony_ci	attr.cap.max_send_wr = ic->i_send_ring.w_nr + fr_queue_space + 1;
58862306a36Sopenharmony_ci	attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1;
58962306a36Sopenharmony_ci	attr.cap.max_send_sge = rds_ibdev->max_sge;
59062306a36Sopenharmony_ci	attr.cap.max_recv_sge = RDS_IB_RECV_SGE;
59162306a36Sopenharmony_ci	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
59262306a36Sopenharmony_ci	attr.qp_type = IB_QPT_RC;
59362306a36Sopenharmony_ci	attr.send_cq = ic->i_send_cq;
59462306a36Sopenharmony_ci	attr.recv_cq = ic->i_recv_cq;
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	/*
59762306a36Sopenharmony_ci	 * XXX this can fail if max_*_wr is too large?  Are we supposed
59862306a36Sopenharmony_ci	 * to back off until we get a value that the hardware can support?
59962306a36Sopenharmony_ci	 */
60062306a36Sopenharmony_ci	ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
60162306a36Sopenharmony_ci	if (ret) {
60262306a36Sopenharmony_ci		rdsdebug("rdma_create_qp failed: %d\n", ret);
60362306a36Sopenharmony_ci		goto recv_cq_out;
60462306a36Sopenharmony_ci	}
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci	ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma,
60762306a36Sopenharmony_ci					     ic->i_send_ring.w_nr,
60862306a36Sopenharmony_ci					     DMA_TO_DEVICE);
60962306a36Sopenharmony_ci	if (!ic->i_send_hdrs) {
61062306a36Sopenharmony_ci		ret = -ENOMEM;
61162306a36Sopenharmony_ci		rdsdebug("DMA send hdrs alloc failed\n");
61262306a36Sopenharmony_ci		goto qp_out;
61362306a36Sopenharmony_ci	}
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma,
61662306a36Sopenharmony_ci					     ic->i_recv_ring.w_nr,
61762306a36Sopenharmony_ci					     DMA_FROM_DEVICE);
61862306a36Sopenharmony_ci	if (!ic->i_recv_hdrs) {
61962306a36Sopenharmony_ci		ret = -ENOMEM;
62062306a36Sopenharmony_ci		rdsdebug("DMA recv hdrs alloc failed\n");
62162306a36Sopenharmony_ci		goto send_hdrs_dma_out;
62262306a36Sopenharmony_ci	}
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci	ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma,
62562306a36Sopenharmony_ci				      DMA_TO_DEVICE);
62662306a36Sopenharmony_ci	if (!ic->i_ack) {
62762306a36Sopenharmony_ci		ret = -ENOMEM;
62862306a36Sopenharmony_ci		rdsdebug("DMA ack header alloc failed\n");
62962306a36Sopenharmony_ci		goto recv_hdrs_dma_out;
63062306a36Sopenharmony_ci	}
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci	ic->i_sends = vzalloc_node(array_size(sizeof(struct rds_ib_send_work),
63362306a36Sopenharmony_ci					      ic->i_send_ring.w_nr),
63462306a36Sopenharmony_ci				   ibdev_to_node(dev));
63562306a36Sopenharmony_ci	if (!ic->i_sends) {
63662306a36Sopenharmony_ci		ret = -ENOMEM;
63762306a36Sopenharmony_ci		rdsdebug("send allocation failed\n");
63862306a36Sopenharmony_ci		goto ack_dma_out;
63962306a36Sopenharmony_ci	}
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	ic->i_recvs = vzalloc_node(array_size(sizeof(struct rds_ib_recv_work),
64262306a36Sopenharmony_ci					      ic->i_recv_ring.w_nr),
64362306a36Sopenharmony_ci				   ibdev_to_node(dev));
64462306a36Sopenharmony_ci	if (!ic->i_recvs) {
64562306a36Sopenharmony_ci		ret = -ENOMEM;
64662306a36Sopenharmony_ci		rdsdebug("recv allocation failed\n");
64762306a36Sopenharmony_ci		goto sends_out;
64862306a36Sopenharmony_ci	}
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci	rds_ib_recv_init_ack(ic);
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
65362306a36Sopenharmony_ci		 ic->i_send_cq, ic->i_recv_cq);
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci	goto out;
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_cisends_out:
65862306a36Sopenharmony_ci	vfree(ic->i_sends);
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ciack_dma_out:
66162306a36Sopenharmony_ci	rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma,
66262306a36Sopenharmony_ci			 DMA_TO_DEVICE);
66362306a36Sopenharmony_ci	ic->i_ack = NULL;
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_cirecv_hdrs_dma_out:
66662306a36Sopenharmony_ci	rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma,
66762306a36Sopenharmony_ci			  ic->i_recv_ring.w_nr, DMA_FROM_DEVICE);
66862306a36Sopenharmony_ci	ic->i_recv_hdrs = NULL;
66962306a36Sopenharmony_ci	ic->i_recv_hdrs_dma = NULL;
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_cisend_hdrs_dma_out:
67262306a36Sopenharmony_ci	rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma,
67362306a36Sopenharmony_ci			  ic->i_send_ring.w_nr, DMA_TO_DEVICE);
67462306a36Sopenharmony_ci	ic->i_send_hdrs = NULL;
67562306a36Sopenharmony_ci	ic->i_send_hdrs_dma = NULL;
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ciqp_out:
67862306a36Sopenharmony_ci	rdma_destroy_qp(ic->i_cm_id);
67962306a36Sopenharmony_cirecv_cq_out:
68062306a36Sopenharmony_ci	ib_destroy_cq(ic->i_recv_cq);
68162306a36Sopenharmony_ci	ic->i_recv_cq = NULL;
68262306a36Sopenharmony_cisend_cq_out:
68362306a36Sopenharmony_ci	ib_destroy_cq(ic->i_send_cq);
68462306a36Sopenharmony_ci	ic->i_send_cq = NULL;
68562306a36Sopenharmony_cirds_ibdev_out:
68662306a36Sopenharmony_ci	rds_ib_remove_conn(rds_ibdev, conn);
68762306a36Sopenharmony_ciout:
68862306a36Sopenharmony_ci	rds_ib_dev_put(rds_ibdev);
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci	return ret;
69162306a36Sopenharmony_ci}
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_cistatic u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
69462306a36Sopenharmony_ci{
69562306a36Sopenharmony_ci	const union rds_ib_conn_priv *dp = event->param.conn.private_data;
69662306a36Sopenharmony_ci	u8 data_len, major, minor;
69762306a36Sopenharmony_ci	u32 version = 0;
69862306a36Sopenharmony_ci	__be16 mask;
69962306a36Sopenharmony_ci	u16 common;
70062306a36Sopenharmony_ci
70162306a36Sopenharmony_ci	/*
70262306a36Sopenharmony_ci	 * rdma_cm private data is odd - when there is any private data in the
70362306a36Sopenharmony_ci	 * request, we will be given a pretty large buffer without telling us the
70462306a36Sopenharmony_ci	 * original size. The only way to tell the difference is by looking at
70562306a36Sopenharmony_ci	 * the contents, which are initialized to zero.
70662306a36Sopenharmony_ci	 * If the protocol version fields aren't set, this is a connection attempt
70762306a36Sopenharmony_ci	 * from an older version. This could be 3.0 or 2.0 - we can't tell.
70862306a36Sopenharmony_ci	 * We really should have changed this for OFED 1.3 :-(
70962306a36Sopenharmony_ci	 */
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci	/* Be paranoid. RDS always has privdata */
71262306a36Sopenharmony_ci	if (!event->param.conn.private_data_len) {
71362306a36Sopenharmony_ci		printk(KERN_NOTICE "RDS incoming connection has no private data, "
71462306a36Sopenharmony_ci			"rejecting\n");
71562306a36Sopenharmony_ci		return 0;
71662306a36Sopenharmony_ci	}
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	if (isv6) {
71962306a36Sopenharmony_ci		data_len = sizeof(struct rds6_ib_connect_private);
72062306a36Sopenharmony_ci		major = dp->ricp_v6.dp_protocol_major;
72162306a36Sopenharmony_ci		minor = dp->ricp_v6.dp_protocol_minor;
72262306a36Sopenharmony_ci		mask = dp->ricp_v6.dp_protocol_minor_mask;
72362306a36Sopenharmony_ci	} else {
72462306a36Sopenharmony_ci		data_len = sizeof(struct rds_ib_connect_private);
72562306a36Sopenharmony_ci		major = dp->ricp_v4.dp_protocol_major;
72662306a36Sopenharmony_ci		minor = dp->ricp_v4.dp_protocol_minor;
72762306a36Sopenharmony_ci		mask = dp->ricp_v4.dp_protocol_minor_mask;
72862306a36Sopenharmony_ci	}
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci	/* Even if len is crap *now* I still want to check it. -ASG */
73162306a36Sopenharmony_ci	if (event->param.conn.private_data_len < data_len || major == 0)
73262306a36Sopenharmony_ci		return RDS_PROTOCOL_4_0;
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci	common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS;
73562306a36Sopenharmony_ci	if (major == 4 && common) {
73662306a36Sopenharmony_ci		version = RDS_PROTOCOL_4_0;
73762306a36Sopenharmony_ci		while ((common >>= 1) != 0)
73862306a36Sopenharmony_ci			version++;
73962306a36Sopenharmony_ci	} else if (RDS_PROTOCOL_COMPAT_VERSION ==
74062306a36Sopenharmony_ci		   RDS_PROTOCOL(major, minor)) {
74162306a36Sopenharmony_ci		version = RDS_PROTOCOL_COMPAT_VERSION;
74262306a36Sopenharmony_ci	} else {
74362306a36Sopenharmony_ci		if (isv6)
74462306a36Sopenharmony_ci			printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n",
74562306a36Sopenharmony_ci					   &dp->ricp_v6.dp_saddr, major, minor);
74662306a36Sopenharmony_ci		else
74762306a36Sopenharmony_ci			printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n",
74862306a36Sopenharmony_ci					   &dp->ricp_v4.dp_saddr, major, minor);
74962306a36Sopenharmony_ci	}
75062306a36Sopenharmony_ci	return version;
75162306a36Sopenharmony_ci}
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
75462306a36Sopenharmony_ci/* Given an IPv6 address, find the net_device which hosts that address and
75562306a36Sopenharmony_ci * return its index.  This is used by the rds_ib_cm_handle_connect() code to
75662306a36Sopenharmony_ci * find the interface index of where an incoming request comes from when
75762306a36Sopenharmony_ci * the request is using a link local address.
75862306a36Sopenharmony_ci *
75962306a36Sopenharmony_ci * Note one problem in this search.  It is possible that two interfaces have
76062306a36Sopenharmony_ci * the same link local address.  Unfortunately, this cannot be solved unless
76162306a36Sopenharmony_ci * the underlying layer gives us the interface which an incoming RDMA connect
76262306a36Sopenharmony_ci * request comes from.
76362306a36Sopenharmony_ci */
76462306a36Sopenharmony_cistatic u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr)
76562306a36Sopenharmony_ci{
76662306a36Sopenharmony_ci	struct net_device *dev;
76762306a36Sopenharmony_ci	int idx = 0;
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci	rcu_read_lock();
77062306a36Sopenharmony_ci	for_each_netdev_rcu(net, dev) {
77162306a36Sopenharmony_ci		if (ipv6_chk_addr(net, addr, dev, 1)) {
77262306a36Sopenharmony_ci			idx = dev->ifindex;
77362306a36Sopenharmony_ci			break;
77462306a36Sopenharmony_ci		}
77562306a36Sopenharmony_ci	}
77662306a36Sopenharmony_ci	rcu_read_unlock();
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci	return idx;
77962306a36Sopenharmony_ci}
78062306a36Sopenharmony_ci#endif
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ciint rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
78362306a36Sopenharmony_ci			     struct rdma_cm_event *event, bool isv6)
78462306a36Sopenharmony_ci{
78562306a36Sopenharmony_ci	__be64 lguid = cm_id->route.path_rec->sgid.global.interface_id;
78662306a36Sopenharmony_ci	__be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
78762306a36Sopenharmony_ci	const struct rds_ib_conn_priv_cmn *dp_cmn;
78862306a36Sopenharmony_ci	struct rds_connection *conn = NULL;
78962306a36Sopenharmony_ci	struct rds_ib_connection *ic = NULL;
79062306a36Sopenharmony_ci	struct rdma_conn_param conn_param;
79162306a36Sopenharmony_ci	const union rds_ib_conn_priv *dp;
79262306a36Sopenharmony_ci	union rds_ib_conn_priv dp_rep;
79362306a36Sopenharmony_ci	struct in6_addr s_mapped_addr;
79462306a36Sopenharmony_ci	struct in6_addr d_mapped_addr;
79562306a36Sopenharmony_ci	const struct in6_addr *saddr6;
79662306a36Sopenharmony_ci	const struct in6_addr *daddr6;
79762306a36Sopenharmony_ci	int destroy = 1;
79862306a36Sopenharmony_ci	u32 ifindex = 0;
79962306a36Sopenharmony_ci	u32 version;
80062306a36Sopenharmony_ci	int err = 1;
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_ci	/* Check whether the remote protocol version matches ours. */
80362306a36Sopenharmony_ci	version = rds_ib_protocol_compatible(event, isv6);
80462306a36Sopenharmony_ci	if (!version) {
80562306a36Sopenharmony_ci		err = RDS_RDMA_REJ_INCOMPAT;
80662306a36Sopenharmony_ci		goto out;
80762306a36Sopenharmony_ci	}
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_ci	dp = event->param.conn.private_data;
81062306a36Sopenharmony_ci	if (isv6) {
81162306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
81262306a36Sopenharmony_ci		dp_cmn = &dp->ricp_v6.dp_cmn;
81362306a36Sopenharmony_ci		saddr6 = &dp->ricp_v6.dp_saddr;
81462306a36Sopenharmony_ci		daddr6 = &dp->ricp_v6.dp_daddr;
81562306a36Sopenharmony_ci		/* If either address is link local, need to find the
81662306a36Sopenharmony_ci		 * interface index in order to create a proper RDS
81762306a36Sopenharmony_ci		 * connection.
81862306a36Sopenharmony_ci		 */
81962306a36Sopenharmony_ci		if (ipv6_addr_type(daddr6) & IPV6_ADDR_LINKLOCAL) {
82062306a36Sopenharmony_ci			/* Using init_net for now ..  */
82162306a36Sopenharmony_ci			ifindex = __rds_find_ifindex(&init_net, daddr6);
82262306a36Sopenharmony_ci			/* No index found...  Need to bail out. */
82362306a36Sopenharmony_ci			if (ifindex == 0) {
82462306a36Sopenharmony_ci				err = -EOPNOTSUPP;
82562306a36Sopenharmony_ci				goto out;
82662306a36Sopenharmony_ci			}
82762306a36Sopenharmony_ci		} else if (ipv6_addr_type(saddr6) & IPV6_ADDR_LINKLOCAL) {
82862306a36Sopenharmony_ci			/* Use our address to find the correct index. */
82962306a36Sopenharmony_ci			ifindex = __rds_find_ifindex(&init_net, daddr6);
83062306a36Sopenharmony_ci			/* No index found...  Need to bail out. */
83162306a36Sopenharmony_ci			if (ifindex == 0) {
83262306a36Sopenharmony_ci				err = -EOPNOTSUPP;
83362306a36Sopenharmony_ci				goto out;
83462306a36Sopenharmony_ci			}
83562306a36Sopenharmony_ci		}
83662306a36Sopenharmony_ci#else
83762306a36Sopenharmony_ci		err = -EOPNOTSUPP;
83862306a36Sopenharmony_ci		goto out;
83962306a36Sopenharmony_ci#endif
84062306a36Sopenharmony_ci	} else {
84162306a36Sopenharmony_ci		dp_cmn = &dp->ricp_v4.dp_cmn;
84262306a36Sopenharmony_ci		ipv6_addr_set_v4mapped(dp->ricp_v4.dp_saddr, &s_mapped_addr);
84362306a36Sopenharmony_ci		ipv6_addr_set_v4mapped(dp->ricp_v4.dp_daddr, &d_mapped_addr);
84462306a36Sopenharmony_ci		saddr6 = &s_mapped_addr;
84562306a36Sopenharmony_ci		daddr6 = &d_mapped_addr;
84662306a36Sopenharmony_ci	}
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n",
84962306a36Sopenharmony_ci		 saddr6, daddr6, RDS_PROTOCOL_MAJOR(version),
85062306a36Sopenharmony_ci		 RDS_PROTOCOL_MINOR(version),
85162306a36Sopenharmony_ci		 (unsigned long long)be64_to_cpu(lguid),
85262306a36Sopenharmony_ci		 (unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss);
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_ci	/* RDS/IB is not currently netns aware, thus init_net */
85562306a36Sopenharmony_ci	conn = rds_conn_create(&init_net, daddr6, saddr6,
85662306a36Sopenharmony_ci			       &rds_ib_transport, dp_cmn->ricpc_dp_toss,
85762306a36Sopenharmony_ci			       GFP_KERNEL, ifindex);
85862306a36Sopenharmony_ci	if (IS_ERR(conn)) {
85962306a36Sopenharmony_ci		rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
86062306a36Sopenharmony_ci		conn = NULL;
86162306a36Sopenharmony_ci		goto out;
86262306a36Sopenharmony_ci	}
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_ci	/*
86562306a36Sopenharmony_ci	 * The connection request may occur while the
86662306a36Sopenharmony_ci	 * previous connection exist, e.g. in case of failover.
86762306a36Sopenharmony_ci	 * But as connections may be initiated simultaneously
86862306a36Sopenharmony_ci	 * by both hosts, we have a random backoff mechanism -
86962306a36Sopenharmony_ci	 * see the comment above rds_queue_reconnect()
87062306a36Sopenharmony_ci	 */
87162306a36Sopenharmony_ci	mutex_lock(&conn->c_cm_lock);
87262306a36Sopenharmony_ci	if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
87362306a36Sopenharmony_ci		if (rds_conn_state(conn) == RDS_CONN_UP) {
87462306a36Sopenharmony_ci			rdsdebug("incoming connect while connecting\n");
87562306a36Sopenharmony_ci			rds_conn_drop(conn);
87662306a36Sopenharmony_ci			rds_ib_stats_inc(s_ib_listen_closed_stale);
87762306a36Sopenharmony_ci		} else
87862306a36Sopenharmony_ci		if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
87962306a36Sopenharmony_ci			/* Wait and see - our connect may still be succeeding */
88062306a36Sopenharmony_ci			rds_ib_stats_inc(s_ib_connect_raced);
88162306a36Sopenharmony_ci		}
88262306a36Sopenharmony_ci		goto out;
88362306a36Sopenharmony_ci	}
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	ic = conn->c_transport_data;
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci	rds_ib_set_protocol(conn, version);
88862306a36Sopenharmony_ci	rds_ib_set_flow_control(conn, be32_to_cpu(dp_cmn->ricpc_credit));
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	/* If the peer gave us the last packet it saw, process this as if
89162306a36Sopenharmony_ci	 * we had received a regular ACK. */
89262306a36Sopenharmony_ci	if (dp_cmn->ricpc_ack_seq)
89362306a36Sopenharmony_ci		rds_send_drop_acked(conn, be64_to_cpu(dp_cmn->ricpc_ack_seq),
89462306a36Sopenharmony_ci				    NULL);
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci	BUG_ON(cm_id->context);
89762306a36Sopenharmony_ci	BUG_ON(ic->i_cm_id);
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	ic->i_cm_id = cm_id;
90062306a36Sopenharmony_ci	cm_id->context = conn;
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci	/* We got halfway through setting up the ib_connection, if we
90362306a36Sopenharmony_ci	 * fail now, we have to take the long route out of this mess. */
90462306a36Sopenharmony_ci	destroy = 0;
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci	err = rds_ib_setup_qp(conn);
90762306a36Sopenharmony_ci	if (err) {
90862306a36Sopenharmony_ci		rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
90962306a36Sopenharmony_ci		goto out;
91062306a36Sopenharmony_ci	}
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci	rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
91362306a36Sopenharmony_ci				  event->param.conn.responder_resources,
91462306a36Sopenharmony_ci				  event->param.conn.initiator_depth, isv6);
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci	rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32);
91762306a36Sopenharmony_ci	/* rdma_accept() calls rdma_reject() internally if it fails */
91862306a36Sopenharmony_ci	if (rdma_accept(cm_id, &conn_param))
91962306a36Sopenharmony_ci		rds_ib_conn_error(conn, "rdma_accept failed\n");
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ciout:
92262306a36Sopenharmony_ci	if (conn)
92362306a36Sopenharmony_ci		mutex_unlock(&conn->c_cm_lock);
92462306a36Sopenharmony_ci	if (err)
92562306a36Sopenharmony_ci		rdma_reject(cm_id, &err, sizeof(int),
92662306a36Sopenharmony_ci			    IB_CM_REJ_CONSUMER_DEFINED);
92762306a36Sopenharmony_ci	return destroy;
92862306a36Sopenharmony_ci}
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ciint rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
93262306a36Sopenharmony_ci{
93362306a36Sopenharmony_ci	struct rds_connection *conn = cm_id->context;
93462306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
93562306a36Sopenharmony_ci	struct rdma_conn_param conn_param;
93662306a36Sopenharmony_ci	union rds_ib_conn_priv dp;
93762306a36Sopenharmony_ci	int ret;
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci	/* If the peer doesn't do protocol negotiation, we must
94062306a36Sopenharmony_ci	 * default to RDSv3.0 */
94162306a36Sopenharmony_ci	rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1);
94262306a36Sopenharmony_ci	ic->i_flowctl = rds_ib_sysctl_flow_control;	/* advertise flow control */
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_ci	ret = rds_ib_setup_qp(conn);
94562306a36Sopenharmony_ci	if (ret) {
94662306a36Sopenharmony_ci		rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", ret);
94762306a36Sopenharmony_ci		goto out;
94862306a36Sopenharmony_ci	}
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
95162306a36Sopenharmony_ci				  conn->c_proposed_version,
95262306a36Sopenharmony_ci				  UINT_MAX, UINT_MAX, isv6);
95362306a36Sopenharmony_ci	ret = rdma_connect_locked(cm_id, &conn_param);
95462306a36Sopenharmony_ci	if (ret)
95562306a36Sopenharmony_ci		rds_ib_conn_error(conn, "rdma_connect_locked failed (%d)\n",
95662306a36Sopenharmony_ci				  ret);
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ciout:
95962306a36Sopenharmony_ci	/* Beware - returning non-zero tells the rdma_cm to destroy
96062306a36Sopenharmony_ci	 * the cm_id. We should certainly not do it as long as we still
96162306a36Sopenharmony_ci	 * "own" the cm_id. */
96262306a36Sopenharmony_ci	if (ret) {
96362306a36Sopenharmony_ci		if (ic->i_cm_id == cm_id)
96462306a36Sopenharmony_ci			ret = 0;
96562306a36Sopenharmony_ci	}
96662306a36Sopenharmony_ci	ic->i_active_side = true;
96762306a36Sopenharmony_ci	return ret;
96862306a36Sopenharmony_ci}
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ciint rds_ib_conn_path_connect(struct rds_conn_path *cp)
97162306a36Sopenharmony_ci{
97262306a36Sopenharmony_ci	struct rds_connection *conn = cp->cp_conn;
97362306a36Sopenharmony_ci	struct sockaddr_storage src, dest;
97462306a36Sopenharmony_ci	rdma_cm_event_handler handler;
97562306a36Sopenharmony_ci	struct rds_ib_connection *ic;
97662306a36Sopenharmony_ci	int ret;
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_ci	ic = conn->c_transport_data;
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci	/* XXX I wonder what affect the port space has */
98162306a36Sopenharmony_ci	/* delegate cm event handler to rdma_transport */
98262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
98362306a36Sopenharmony_ci	if (conn->c_isv6)
98462306a36Sopenharmony_ci		handler = rds6_rdma_cm_event_handler;
98562306a36Sopenharmony_ci	else
98662306a36Sopenharmony_ci#endif
98762306a36Sopenharmony_ci		handler = rds_rdma_cm_event_handler;
98862306a36Sopenharmony_ci	ic->i_cm_id = rdma_create_id(&init_net, handler, conn,
98962306a36Sopenharmony_ci				     RDMA_PS_TCP, IB_QPT_RC);
99062306a36Sopenharmony_ci	if (IS_ERR(ic->i_cm_id)) {
99162306a36Sopenharmony_ci		ret = PTR_ERR(ic->i_cm_id);
99262306a36Sopenharmony_ci		ic->i_cm_id = NULL;
99362306a36Sopenharmony_ci		rdsdebug("rdma_create_id() failed: %d\n", ret);
99462306a36Sopenharmony_ci		goto out;
99562306a36Sopenharmony_ci	}
99662306a36Sopenharmony_ci
99762306a36Sopenharmony_ci	rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	if (ipv6_addr_v4mapped(&conn->c_faddr)) {
100062306a36Sopenharmony_ci		struct sockaddr_in *sin;
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ci		sin = (struct sockaddr_in *)&src;
100362306a36Sopenharmony_ci		sin->sin_family = AF_INET;
100462306a36Sopenharmony_ci		sin->sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
100562306a36Sopenharmony_ci		sin->sin_port = 0;
100662306a36Sopenharmony_ci
100762306a36Sopenharmony_ci		sin = (struct sockaddr_in *)&dest;
100862306a36Sopenharmony_ci		sin->sin_family = AF_INET;
100962306a36Sopenharmony_ci		sin->sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
101062306a36Sopenharmony_ci		sin->sin_port = htons(RDS_PORT);
101162306a36Sopenharmony_ci	} else {
101262306a36Sopenharmony_ci		struct sockaddr_in6 *sin6;
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_ci		sin6 = (struct sockaddr_in6 *)&src;
101562306a36Sopenharmony_ci		sin6->sin6_family = AF_INET6;
101662306a36Sopenharmony_ci		sin6->sin6_addr = conn->c_laddr;
101762306a36Sopenharmony_ci		sin6->sin6_port = 0;
101862306a36Sopenharmony_ci		sin6->sin6_scope_id = conn->c_dev_if;
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci		sin6 = (struct sockaddr_in6 *)&dest;
102162306a36Sopenharmony_ci		sin6->sin6_family = AF_INET6;
102262306a36Sopenharmony_ci		sin6->sin6_addr = conn->c_faddr;
102362306a36Sopenharmony_ci		sin6->sin6_port = htons(RDS_CM_PORT);
102462306a36Sopenharmony_ci		sin6->sin6_scope_id = conn->c_dev_if;
102562306a36Sopenharmony_ci	}
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_ci	ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
102862306a36Sopenharmony_ci				(struct sockaddr *)&dest,
102962306a36Sopenharmony_ci				RDS_RDMA_RESOLVE_TIMEOUT_MS);
103062306a36Sopenharmony_ci	if (ret) {
103162306a36Sopenharmony_ci		rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id,
103262306a36Sopenharmony_ci			 ret);
103362306a36Sopenharmony_ci		rdma_destroy_id(ic->i_cm_id);
103462306a36Sopenharmony_ci		ic->i_cm_id = NULL;
103562306a36Sopenharmony_ci	}
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_ciout:
103862306a36Sopenharmony_ci	return ret;
103962306a36Sopenharmony_ci}
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci/*
104262306a36Sopenharmony_ci * This is so careful about only cleaning up resources that were built up
104362306a36Sopenharmony_ci * so that it can be called at any point during startup.  In fact it
104462306a36Sopenharmony_ci * can be called multiple times for a given connection.
104562306a36Sopenharmony_ci */
104662306a36Sopenharmony_civoid rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
104762306a36Sopenharmony_ci{
104862306a36Sopenharmony_ci	struct rds_connection *conn = cp->cp_conn;
104962306a36Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
105062306a36Sopenharmony_ci	int err = 0;
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci	rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id,
105362306a36Sopenharmony_ci		 ic->i_pd, ic->i_send_cq, ic->i_recv_cq,
105462306a36Sopenharmony_ci		 ic->i_cm_id ? ic->i_cm_id->qp : NULL);
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci	if (ic->i_cm_id) {
105762306a36Sopenharmony_ci		rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
105862306a36Sopenharmony_ci		err = rdma_disconnect(ic->i_cm_id);
105962306a36Sopenharmony_ci		if (err) {
106062306a36Sopenharmony_ci			/* Actually this may happen quite frequently, when
106162306a36Sopenharmony_ci			 * an outgoing connect raced with an incoming connect.
106262306a36Sopenharmony_ci			 */
106362306a36Sopenharmony_ci			rdsdebug("failed to disconnect, cm: %p err %d\n",
106462306a36Sopenharmony_ci				ic->i_cm_id, err);
106562306a36Sopenharmony_ci		}
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci		/* kick off "flush_worker" for all pools in order to reap
106862306a36Sopenharmony_ci		 * all FRMR registrations that are still marked "FRMR_IS_INUSE"
106962306a36Sopenharmony_ci		 */
107062306a36Sopenharmony_ci		rds_ib_flush_mrs();
107162306a36Sopenharmony_ci
107262306a36Sopenharmony_ci		/*
107362306a36Sopenharmony_ci		 * We want to wait for tx and rx completion to finish
107462306a36Sopenharmony_ci		 * before we tear down the connection, but we have to be
107562306a36Sopenharmony_ci		 * careful not to get stuck waiting on a send ring that
107662306a36Sopenharmony_ci		 * only has unsignaled sends in it.  We've shutdown new
107762306a36Sopenharmony_ci		 * sends before getting here so by waiting for signaled
107862306a36Sopenharmony_ci		 * sends to complete we're ensured that there will be no
107962306a36Sopenharmony_ci		 * more tx processing.
108062306a36Sopenharmony_ci		 */
108162306a36Sopenharmony_ci		wait_event(rds_ib_ring_empty_wait,
108262306a36Sopenharmony_ci			   rds_ib_ring_empty(&ic->i_recv_ring) &&
108362306a36Sopenharmony_ci			   (atomic_read(&ic->i_signaled_sends) == 0) &&
108462306a36Sopenharmony_ci			   (atomic_read(&ic->i_fastreg_inuse_count) == 0) &&
108562306a36Sopenharmony_ci			   (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR));
108662306a36Sopenharmony_ci		tasklet_kill(&ic->i_send_tasklet);
108762306a36Sopenharmony_ci		tasklet_kill(&ic->i_recv_tasklet);
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ci		atomic_set(&ic->i_cq_quiesce, 1);
109062306a36Sopenharmony_ci
109162306a36Sopenharmony_ci		/* first destroy the ib state that generates callbacks */
109262306a36Sopenharmony_ci		if (ic->i_cm_id->qp)
109362306a36Sopenharmony_ci			rdma_destroy_qp(ic->i_cm_id);
109462306a36Sopenharmony_ci		if (ic->i_send_cq) {
109562306a36Sopenharmony_ci			if (ic->rds_ibdev)
109662306a36Sopenharmony_ci				ibdev_put_vector(ic->rds_ibdev, ic->i_scq_vector);
109762306a36Sopenharmony_ci			ib_destroy_cq(ic->i_send_cq);
109862306a36Sopenharmony_ci		}
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_ci		if (ic->i_recv_cq) {
110162306a36Sopenharmony_ci			if (ic->rds_ibdev)
110262306a36Sopenharmony_ci				ibdev_put_vector(ic->rds_ibdev, ic->i_rcq_vector);
110362306a36Sopenharmony_ci			ib_destroy_cq(ic->i_recv_cq);
110462306a36Sopenharmony_ci		}
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci		if (ic->rds_ibdev) {
110762306a36Sopenharmony_ci			/* then free the resources that ib callbacks use */
110862306a36Sopenharmony_ci			if (ic->i_send_hdrs) {
110962306a36Sopenharmony_ci				rds_dma_hdrs_free(ic->rds_ibdev,
111062306a36Sopenharmony_ci						  ic->i_send_hdrs,
111162306a36Sopenharmony_ci						  ic->i_send_hdrs_dma,
111262306a36Sopenharmony_ci						  ic->i_send_ring.w_nr,
111362306a36Sopenharmony_ci						  DMA_TO_DEVICE);
111462306a36Sopenharmony_ci				ic->i_send_hdrs = NULL;
111562306a36Sopenharmony_ci				ic->i_send_hdrs_dma = NULL;
111662306a36Sopenharmony_ci			}
111762306a36Sopenharmony_ci
111862306a36Sopenharmony_ci			if (ic->i_recv_hdrs) {
111962306a36Sopenharmony_ci				rds_dma_hdrs_free(ic->rds_ibdev,
112062306a36Sopenharmony_ci						  ic->i_recv_hdrs,
112162306a36Sopenharmony_ci						  ic->i_recv_hdrs_dma,
112262306a36Sopenharmony_ci						  ic->i_recv_ring.w_nr,
112362306a36Sopenharmony_ci						  DMA_FROM_DEVICE);
112462306a36Sopenharmony_ci				ic->i_recv_hdrs = NULL;
112562306a36Sopenharmony_ci				ic->i_recv_hdrs_dma = NULL;
112662306a36Sopenharmony_ci			}
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci			if (ic->i_ack) {
112962306a36Sopenharmony_ci				rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack,
113062306a36Sopenharmony_ci						 ic->i_ack_dma, DMA_TO_DEVICE);
113162306a36Sopenharmony_ci				ic->i_ack = NULL;
113262306a36Sopenharmony_ci			}
113362306a36Sopenharmony_ci		} else {
113462306a36Sopenharmony_ci			WARN_ON(ic->i_send_hdrs);
113562306a36Sopenharmony_ci			WARN_ON(ic->i_send_hdrs_dma);
113662306a36Sopenharmony_ci			WARN_ON(ic->i_recv_hdrs);
113762306a36Sopenharmony_ci			WARN_ON(ic->i_recv_hdrs_dma);
113862306a36Sopenharmony_ci			WARN_ON(ic->i_ack);
113962306a36Sopenharmony_ci		}
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci		if (ic->i_sends)
114262306a36Sopenharmony_ci			rds_ib_send_clear_ring(ic);
114362306a36Sopenharmony_ci		if (ic->i_recvs)
114462306a36Sopenharmony_ci			rds_ib_recv_clear_ring(ic);
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_ci		rdma_destroy_id(ic->i_cm_id);
114762306a36Sopenharmony_ci
114862306a36Sopenharmony_ci		/*
114962306a36Sopenharmony_ci		 * Move connection back to the nodev list.
115062306a36Sopenharmony_ci		 */
115162306a36Sopenharmony_ci		if (ic->rds_ibdev)
115262306a36Sopenharmony_ci			rds_ib_remove_conn(ic->rds_ibdev, conn);
115362306a36Sopenharmony_ci
115462306a36Sopenharmony_ci		ic->i_cm_id = NULL;
115562306a36Sopenharmony_ci		ic->i_pd = NULL;
115662306a36Sopenharmony_ci		ic->i_send_cq = NULL;
115762306a36Sopenharmony_ci		ic->i_recv_cq = NULL;
115862306a36Sopenharmony_ci	}
115962306a36Sopenharmony_ci	BUG_ON(ic->rds_ibdev);
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_ci	/* Clear pending transmit */
116262306a36Sopenharmony_ci	if (ic->i_data_op) {
116362306a36Sopenharmony_ci		struct rds_message *rm;
116462306a36Sopenharmony_ci
116562306a36Sopenharmony_ci		rm = container_of(ic->i_data_op, struct rds_message, data);
116662306a36Sopenharmony_ci		rds_message_put(rm);
116762306a36Sopenharmony_ci		ic->i_data_op = NULL;
116862306a36Sopenharmony_ci	}
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	/* Clear the ACK state */
117162306a36Sopenharmony_ci	clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
117262306a36Sopenharmony_ci#ifdef KERNEL_HAS_ATOMIC64
117362306a36Sopenharmony_ci	atomic64_set(&ic->i_ack_next, 0);
117462306a36Sopenharmony_ci#else
117562306a36Sopenharmony_ci	ic->i_ack_next = 0;
117662306a36Sopenharmony_ci#endif
117762306a36Sopenharmony_ci	ic->i_ack_recv = 0;
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_ci	/* Clear flow control state */
118062306a36Sopenharmony_ci	ic->i_flowctl = 0;
118162306a36Sopenharmony_ci	atomic_set(&ic->i_credits, 0);
118262306a36Sopenharmony_ci
118362306a36Sopenharmony_ci	/* Re-init rings, but retain sizes. */
118462306a36Sopenharmony_ci	rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr);
118562306a36Sopenharmony_ci	rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr);
118662306a36Sopenharmony_ci
118762306a36Sopenharmony_ci	if (ic->i_ibinc) {
118862306a36Sopenharmony_ci		rds_inc_put(&ic->i_ibinc->ii_inc);
118962306a36Sopenharmony_ci		ic->i_ibinc = NULL;
119062306a36Sopenharmony_ci	}
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci	vfree(ic->i_sends);
119362306a36Sopenharmony_ci	ic->i_sends = NULL;
119462306a36Sopenharmony_ci	vfree(ic->i_recvs);
119562306a36Sopenharmony_ci	ic->i_recvs = NULL;
119662306a36Sopenharmony_ci	ic->i_active_side = false;
119762306a36Sopenharmony_ci}
119862306a36Sopenharmony_ci
119962306a36Sopenharmony_ciint rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
120062306a36Sopenharmony_ci{
120162306a36Sopenharmony_ci	struct rds_ib_connection *ic;
120262306a36Sopenharmony_ci	unsigned long flags;
120362306a36Sopenharmony_ci	int ret;
120462306a36Sopenharmony_ci
120562306a36Sopenharmony_ci	/* XXX too lazy? */
120662306a36Sopenharmony_ci	ic = kzalloc(sizeof(struct rds_ib_connection), gfp);
120762306a36Sopenharmony_ci	if (!ic)
120862306a36Sopenharmony_ci		return -ENOMEM;
120962306a36Sopenharmony_ci
121062306a36Sopenharmony_ci	ret = rds_ib_recv_alloc_caches(ic, gfp);
121162306a36Sopenharmony_ci	if (ret) {
121262306a36Sopenharmony_ci		kfree(ic);
121362306a36Sopenharmony_ci		return ret;
121462306a36Sopenharmony_ci	}
121562306a36Sopenharmony_ci
121662306a36Sopenharmony_ci	INIT_LIST_HEAD(&ic->ib_node);
121762306a36Sopenharmony_ci	tasklet_init(&ic->i_send_tasklet, rds_ib_tasklet_fn_send,
121862306a36Sopenharmony_ci		     (unsigned long)ic);
121962306a36Sopenharmony_ci	tasklet_init(&ic->i_recv_tasklet, rds_ib_tasklet_fn_recv,
122062306a36Sopenharmony_ci		     (unsigned long)ic);
122162306a36Sopenharmony_ci	mutex_init(&ic->i_recv_mutex);
122262306a36Sopenharmony_ci#ifndef KERNEL_HAS_ATOMIC64
122362306a36Sopenharmony_ci	spin_lock_init(&ic->i_ack_lock);
122462306a36Sopenharmony_ci#endif
122562306a36Sopenharmony_ci	atomic_set(&ic->i_signaled_sends, 0);
122662306a36Sopenharmony_ci	atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
122762306a36Sopenharmony_ci
122862306a36Sopenharmony_ci	/*
122962306a36Sopenharmony_ci	 * rds_ib_conn_shutdown() waits for these to be emptied so they
123062306a36Sopenharmony_ci	 * must be initialized before it can be called.
123162306a36Sopenharmony_ci	 */
123262306a36Sopenharmony_ci	rds_ib_ring_init(&ic->i_send_ring, 0);
123362306a36Sopenharmony_ci	rds_ib_ring_init(&ic->i_recv_ring, 0);
123462306a36Sopenharmony_ci
123562306a36Sopenharmony_ci	ic->conn = conn;
123662306a36Sopenharmony_ci	conn->c_transport_data = ic;
123762306a36Sopenharmony_ci
123862306a36Sopenharmony_ci	spin_lock_irqsave(&ib_nodev_conns_lock, flags);
123962306a36Sopenharmony_ci	list_add_tail(&ic->ib_node, &ib_nodev_conns);
124062306a36Sopenharmony_ci	spin_unlock_irqrestore(&ib_nodev_conns_lock, flags);
124162306a36Sopenharmony_ci
124262306a36Sopenharmony_ci
124362306a36Sopenharmony_ci	rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data);
124462306a36Sopenharmony_ci	return 0;
124562306a36Sopenharmony_ci}
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_ci/*
124862306a36Sopenharmony_ci * Free a connection. Connection must be shut down and not set for reconnect.
124962306a36Sopenharmony_ci */
125062306a36Sopenharmony_civoid rds_ib_conn_free(void *arg)
125162306a36Sopenharmony_ci{
125262306a36Sopenharmony_ci	struct rds_ib_connection *ic = arg;
125362306a36Sopenharmony_ci	spinlock_t	*lock_ptr;
125462306a36Sopenharmony_ci
125562306a36Sopenharmony_ci	rdsdebug("ic %p\n", ic);
125662306a36Sopenharmony_ci
125762306a36Sopenharmony_ci	/*
125862306a36Sopenharmony_ci	 * Conn is either on a dev's list or on the nodev list.
125962306a36Sopenharmony_ci	 * A race with shutdown() or connect() would cause problems
126062306a36Sopenharmony_ci	 * (since rds_ibdev would change) but that should never happen.
126162306a36Sopenharmony_ci	 */
126262306a36Sopenharmony_ci	lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock;
126362306a36Sopenharmony_ci
126462306a36Sopenharmony_ci	spin_lock_irq(lock_ptr);
126562306a36Sopenharmony_ci	list_del(&ic->ib_node);
126662306a36Sopenharmony_ci	spin_unlock_irq(lock_ptr);
126762306a36Sopenharmony_ci
126862306a36Sopenharmony_ci	rds_ib_recv_free_caches(ic);
126962306a36Sopenharmony_ci
127062306a36Sopenharmony_ci	kfree(ic);
127162306a36Sopenharmony_ci}
127262306a36Sopenharmony_ci
127362306a36Sopenharmony_ci
127462306a36Sopenharmony_ci/*
127562306a36Sopenharmony_ci * An error occurred on the connection
127662306a36Sopenharmony_ci */
127762306a36Sopenharmony_civoid
127862306a36Sopenharmony_ci__rds_ib_conn_error(struct rds_connection *conn, const char *fmt, ...)
127962306a36Sopenharmony_ci{
128062306a36Sopenharmony_ci	va_list ap;
128162306a36Sopenharmony_ci
128262306a36Sopenharmony_ci	rds_conn_drop(conn);
128362306a36Sopenharmony_ci
128462306a36Sopenharmony_ci	va_start(ap, fmt);
128562306a36Sopenharmony_ci	vprintk(fmt, ap);
128662306a36Sopenharmony_ci	va_end(ap);
128762306a36Sopenharmony_ci}
1288