162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * This software is available to you under a choice of one of two 562306a36Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the 862306a36Sopenharmony_ci * OpenIB.org BSD license below: 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or 1162306a36Sopenharmony_ci * without modification, are permitted provided that the following 1262306a36Sopenharmony_ci * conditions are met: 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * - Redistributions of source code must retain the above 1562306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 1662306a36Sopenharmony_ci * disclaimer. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * - Redistributions in binary form must reproduce the above 1962306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 2062306a36Sopenharmony_ci * disclaimer in the documentation and/or other materials 2162306a36Sopenharmony_ci * provided with the distribution. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 3062306a36Sopenharmony_ci * SOFTWARE. 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci#include <linux/kernel.h> 3462306a36Sopenharmony_ci#include <linux/in.h> 3562306a36Sopenharmony_ci#include <linux/slab.h> 3662306a36Sopenharmony_ci#include <linux/vmalloc.h> 3762306a36Sopenharmony_ci#include <linux/ratelimit.h> 3862306a36Sopenharmony_ci#include <net/addrconf.h> 3962306a36Sopenharmony_ci#include <rdma/ib_cm.h> 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#include "rds_single_path.h" 4262306a36Sopenharmony_ci#include "rds.h" 4362306a36Sopenharmony_ci#include "ib.h" 4462306a36Sopenharmony_ci#include "ib_mr.h" 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci/* 4762306a36Sopenharmony_ci * Set the selected protocol version 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_cistatic void rds_ib_set_protocol(struct rds_connection *conn, unsigned int version) 5062306a36Sopenharmony_ci{ 5162306a36Sopenharmony_ci conn->c_version = version; 5262306a36Sopenharmony_ci} 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci/* 5562306a36Sopenharmony_ci * Set up flow control 5662306a36Sopenharmony_ci */ 5762306a36Sopenharmony_cistatic void rds_ib_set_flow_control(struct rds_connection *conn, u32 credits) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci if (rds_ib_sysctl_flow_control && credits != 0) { 6262306a36Sopenharmony_ci /* We're doing flow control */ 6362306a36Sopenharmony_ci ic->i_flowctl = 1; 6462306a36Sopenharmony_ci rds_ib_send_add_credits(conn, credits); 6562306a36Sopenharmony_ci } else { 6662306a36Sopenharmony_ci ic->i_flowctl = 0; 6762306a36Sopenharmony_ci } 6862306a36Sopenharmony_ci} 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci/* 7162306a36Sopenharmony_ci * Connection established. 7262306a36Sopenharmony_ci * We get here for both outgoing and incoming connection. 7362306a36Sopenharmony_ci */ 7462306a36Sopenharmony_civoid rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 7762306a36Sopenharmony_ci const union rds_ib_conn_priv *dp = NULL; 7862306a36Sopenharmony_ci __be64 ack_seq = 0; 7962306a36Sopenharmony_ci __be32 credit = 0; 8062306a36Sopenharmony_ci u8 major = 0; 8162306a36Sopenharmony_ci u8 minor = 0; 8262306a36Sopenharmony_ci int err; 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci dp = event->param.conn.private_data; 8562306a36Sopenharmony_ci if (conn->c_isv6) { 8662306a36Sopenharmony_ci if (event->param.conn.private_data_len >= 8762306a36Sopenharmony_ci sizeof(struct rds6_ib_connect_private)) { 8862306a36Sopenharmony_ci major = dp->ricp_v6.dp_protocol_major; 8962306a36Sopenharmony_ci minor = dp->ricp_v6.dp_protocol_minor; 9062306a36Sopenharmony_ci credit = dp->ricp_v6.dp_credit; 9162306a36Sopenharmony_ci /* dp structure start is not guaranteed to be 8 bytes 9262306a36Sopenharmony_ci * aligned. Since dp_ack_seq is 64-bit extended load 9362306a36Sopenharmony_ci * operations can be used so go through get_unaligned 9462306a36Sopenharmony_ci * to avoid unaligned errors. 9562306a36Sopenharmony_ci */ 9662306a36Sopenharmony_ci ack_seq = get_unaligned(&dp->ricp_v6.dp_ack_seq); 9762306a36Sopenharmony_ci } 9862306a36Sopenharmony_ci } else if (event->param.conn.private_data_len >= 9962306a36Sopenharmony_ci sizeof(struct rds_ib_connect_private)) { 10062306a36Sopenharmony_ci major = dp->ricp_v4.dp_protocol_major; 10162306a36Sopenharmony_ci minor = dp->ricp_v4.dp_protocol_minor; 10262306a36Sopenharmony_ci credit = dp->ricp_v4.dp_credit; 10362306a36Sopenharmony_ci ack_seq = get_unaligned(&dp->ricp_v4.dp_ack_seq); 10462306a36Sopenharmony_ci } 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci /* make sure it isn't empty data */ 10762306a36Sopenharmony_ci if (major) { 10862306a36Sopenharmony_ci rds_ib_set_protocol(conn, RDS_PROTOCOL(major, minor)); 10962306a36Sopenharmony_ci rds_ib_set_flow_control(conn, be32_to_cpu(credit)); 11062306a36Sopenharmony_ci } 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci if (conn->c_version < RDS_PROTOCOL_VERSION) { 11362306a36Sopenharmony_ci if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) { 11462306a36Sopenharmony_ci pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", 11562306a36Sopenharmony_ci &conn->c_laddr, &conn->c_faddr, 11662306a36Sopenharmony_ci RDS_PROTOCOL_MAJOR(conn->c_version), 11762306a36Sopenharmony_ci RDS_PROTOCOL_MINOR(conn->c_version)); 11862306a36Sopenharmony_ci rds_conn_destroy(conn); 11962306a36Sopenharmony_ci return; 12062306a36Sopenharmony_ci } 12162306a36Sopenharmony_ci } 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n", 12462306a36Sopenharmony_ci ic->i_active_side ? "Active" : "Passive", 12562306a36Sopenharmony_ci &conn->c_laddr, &conn->c_faddr, conn->c_tos, 12662306a36Sopenharmony_ci RDS_PROTOCOL_MAJOR(conn->c_version), 12762306a36Sopenharmony_ci RDS_PROTOCOL_MINOR(conn->c_version), 12862306a36Sopenharmony_ci ic->i_flowctl ? ", flow control" : ""); 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci /* receive sl from the peer */ 13162306a36Sopenharmony_ci ic->i_sl = ic->i_cm_id->route.path_rec->sl; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci atomic_set(&ic->i_cq_quiesce, 0); 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci /* Init rings and fill recv. this needs to wait until protocol 13662306a36Sopenharmony_ci * negotiation is complete, since ring layout is different 13762306a36Sopenharmony_ci * from 3.1 to 4.1. 13862306a36Sopenharmony_ci */ 13962306a36Sopenharmony_ci rds_ib_send_init_ring(ic); 14062306a36Sopenharmony_ci rds_ib_recv_init_ring(ic); 14162306a36Sopenharmony_ci /* Post receive buffers - as a side effect, this will update 14262306a36Sopenharmony_ci * the posted credit count. */ 14362306a36Sopenharmony_ci rds_ib_recv_refill(conn, 1, GFP_KERNEL); 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci /* update ib_device with this local ipaddr */ 14662306a36Sopenharmony_ci err = rds_ib_update_ipaddr(ic->rds_ibdev, &conn->c_laddr); 14762306a36Sopenharmony_ci if (err) 14862306a36Sopenharmony_ci printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", 14962306a36Sopenharmony_ci err); 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci /* If the peer gave us the last packet it saw, process this as if 15262306a36Sopenharmony_ci * we had received a regular ACK. */ 15362306a36Sopenharmony_ci if (dp) { 15462306a36Sopenharmony_ci if (ack_seq) 15562306a36Sopenharmony_ci rds_send_drop_acked(conn, be64_to_cpu(ack_seq), 15662306a36Sopenharmony_ci NULL); 15762306a36Sopenharmony_ci } 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci conn->c_proposed_version = conn->c_version; 16062306a36Sopenharmony_ci rds_connect_complete(conn); 16162306a36Sopenharmony_ci} 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_cistatic void rds_ib_cm_fill_conn_param(struct rds_connection *conn, 16462306a36Sopenharmony_ci struct rdma_conn_param *conn_param, 16562306a36Sopenharmony_ci union rds_ib_conn_priv *dp, 16662306a36Sopenharmony_ci u32 protocol_version, 16762306a36Sopenharmony_ci u32 max_responder_resources, 16862306a36Sopenharmony_ci u32 max_initiator_depth, 16962306a36Sopenharmony_ci bool isv6) 17062306a36Sopenharmony_ci{ 17162306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 17262306a36Sopenharmony_ci struct rds_ib_device *rds_ibdev = ic->rds_ibdev; 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci memset(conn_param, 0, sizeof(struct rdma_conn_param)); 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci conn_param->responder_resources = 17762306a36Sopenharmony_ci min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources); 17862306a36Sopenharmony_ci conn_param->initiator_depth = 17962306a36Sopenharmony_ci min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth); 18062306a36Sopenharmony_ci conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7); 18162306a36Sopenharmony_ci conn_param->rnr_retry_count = 7; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci if (dp) { 18462306a36Sopenharmony_ci memset(dp, 0, sizeof(*dp)); 18562306a36Sopenharmony_ci if (isv6) { 18662306a36Sopenharmony_ci dp->ricp_v6.dp_saddr = conn->c_laddr; 18762306a36Sopenharmony_ci dp->ricp_v6.dp_daddr = conn->c_faddr; 18862306a36Sopenharmony_ci dp->ricp_v6.dp_protocol_major = 18962306a36Sopenharmony_ci RDS_PROTOCOL_MAJOR(protocol_version); 19062306a36Sopenharmony_ci dp->ricp_v6.dp_protocol_minor = 19162306a36Sopenharmony_ci RDS_PROTOCOL_MINOR(protocol_version); 19262306a36Sopenharmony_ci dp->ricp_v6.dp_protocol_minor_mask = 19362306a36Sopenharmony_ci cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); 19462306a36Sopenharmony_ci dp->ricp_v6.dp_ack_seq = 19562306a36Sopenharmony_ci cpu_to_be64(rds_ib_piggyb_ack(ic)); 19662306a36Sopenharmony_ci dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos; 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci conn_param->private_data = &dp->ricp_v6; 19962306a36Sopenharmony_ci conn_param->private_data_len = sizeof(dp->ricp_v6); 20062306a36Sopenharmony_ci } else { 20162306a36Sopenharmony_ci dp->ricp_v4.dp_saddr = conn->c_laddr.s6_addr32[3]; 20262306a36Sopenharmony_ci dp->ricp_v4.dp_daddr = conn->c_faddr.s6_addr32[3]; 20362306a36Sopenharmony_ci dp->ricp_v4.dp_protocol_major = 20462306a36Sopenharmony_ci RDS_PROTOCOL_MAJOR(protocol_version); 20562306a36Sopenharmony_ci dp->ricp_v4.dp_protocol_minor = 20662306a36Sopenharmony_ci RDS_PROTOCOL_MINOR(protocol_version); 20762306a36Sopenharmony_ci dp->ricp_v4.dp_protocol_minor_mask = 20862306a36Sopenharmony_ci cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); 20962306a36Sopenharmony_ci dp->ricp_v4.dp_ack_seq = 21062306a36Sopenharmony_ci cpu_to_be64(rds_ib_piggyb_ack(ic)); 21162306a36Sopenharmony_ci dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci conn_param->private_data = &dp->ricp_v4; 21462306a36Sopenharmony_ci conn_param->private_data_len = sizeof(dp->ricp_v4); 21562306a36Sopenharmony_ci } 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci /* Advertise flow control */ 21862306a36Sopenharmony_ci if (ic->i_flowctl) { 21962306a36Sopenharmony_ci unsigned int credits; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci credits = IB_GET_POST_CREDITS 22262306a36Sopenharmony_ci (atomic_read(&ic->i_credits)); 22362306a36Sopenharmony_ci if (isv6) 22462306a36Sopenharmony_ci dp->ricp_v6.dp_credit = cpu_to_be32(credits); 22562306a36Sopenharmony_ci else 22662306a36Sopenharmony_ci dp->ricp_v4.dp_credit = cpu_to_be32(credits); 22762306a36Sopenharmony_ci atomic_sub(IB_SET_POST_CREDITS(credits), 22862306a36Sopenharmony_ci &ic->i_credits); 22962306a36Sopenharmony_ci } 23062306a36Sopenharmony_ci } 23162306a36Sopenharmony_ci} 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_cistatic void rds_ib_cq_event_handler(struct ib_event *event, void *data) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci rdsdebug("event %u (%s) data %p\n", 23662306a36Sopenharmony_ci event->event, ib_event_msg(event->event), data); 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci/* Plucking the oldest entry from the ring can be done concurrently with 24062306a36Sopenharmony_ci * the thread refilling the ring. Each ring operation is protected by 24162306a36Sopenharmony_ci * spinlocks and the transient state of refilling doesn't change the 24262306a36Sopenharmony_ci * recording of which entry is oldest. 24362306a36Sopenharmony_ci * 24462306a36Sopenharmony_ci * This relies on IB only calling one cq comp_handler for each cq so that 24562306a36Sopenharmony_ci * there will only be one caller of rds_recv_incoming() per RDS connection. 24662306a36Sopenharmony_ci */ 24762306a36Sopenharmony_cistatic void rds_ib_cq_comp_handler_recv(struct ib_cq *cq, void *context) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci struct rds_connection *conn = context; 25062306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci rdsdebug("conn %p cq %p\n", conn, cq); 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_evt_handler_call); 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci tasklet_schedule(&ic->i_recv_tasklet); 25762306a36Sopenharmony_ci} 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_cistatic void poll_scq(struct rds_ib_connection *ic, struct ib_cq *cq, 26062306a36Sopenharmony_ci struct ib_wc *wcs) 26162306a36Sopenharmony_ci{ 26262306a36Sopenharmony_ci int nr, i; 26362306a36Sopenharmony_ci struct ib_wc *wc; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) { 26662306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 26762306a36Sopenharmony_ci wc = wcs + i; 26862306a36Sopenharmony_ci rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 26962306a36Sopenharmony_ci (unsigned long long)wc->wr_id, wc->status, 27062306a36Sopenharmony_ci wc->byte_len, be32_to_cpu(wc->ex.imm_data)); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci if (wc->wr_id <= ic->i_send_ring.w_nr || 27362306a36Sopenharmony_ci wc->wr_id == RDS_IB_ACK_WR_ID) 27462306a36Sopenharmony_ci rds_ib_send_cqe_handler(ic, wc); 27562306a36Sopenharmony_ci else 27662306a36Sopenharmony_ci rds_ib_mr_cqe_handler(ic, wc); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci } 27962306a36Sopenharmony_ci } 28062306a36Sopenharmony_ci} 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_cistatic void rds_ib_tasklet_fn_send(unsigned long data) 28362306a36Sopenharmony_ci{ 28462306a36Sopenharmony_ci struct rds_ib_connection *ic = (struct rds_ib_connection *)data; 28562306a36Sopenharmony_ci struct rds_connection *conn = ic->conn; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_tasklet_call); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci /* if cq has been already reaped, ignore incoming cq event */ 29062306a36Sopenharmony_ci if (atomic_read(&ic->i_cq_quiesce)) 29162306a36Sopenharmony_ci return; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci poll_scq(ic, ic->i_send_cq, ic->i_send_wc); 29462306a36Sopenharmony_ci ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); 29562306a36Sopenharmony_ci poll_scq(ic, ic->i_send_cq, ic->i_send_wc); 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci if (rds_conn_up(conn) && 29862306a36Sopenharmony_ci (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) || 29962306a36Sopenharmony_ci test_bit(0, &conn->c_map_queued))) 30062306a36Sopenharmony_ci rds_send_xmit(&ic->conn->c_path[0]); 30162306a36Sopenharmony_ci} 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_cistatic void poll_rcq(struct rds_ib_connection *ic, struct ib_cq *cq, 30462306a36Sopenharmony_ci struct ib_wc *wcs, 30562306a36Sopenharmony_ci struct rds_ib_ack_state *ack_state) 30662306a36Sopenharmony_ci{ 30762306a36Sopenharmony_ci int nr, i; 30862306a36Sopenharmony_ci struct ib_wc *wc; 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) { 31162306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 31262306a36Sopenharmony_ci wc = wcs + i; 31362306a36Sopenharmony_ci rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 31462306a36Sopenharmony_ci (unsigned long long)wc->wr_id, wc->status, 31562306a36Sopenharmony_ci wc->byte_len, be32_to_cpu(wc->ex.imm_data)); 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci rds_ib_recv_cqe_handler(ic, wc, ack_state); 31862306a36Sopenharmony_ci } 31962306a36Sopenharmony_ci } 32062306a36Sopenharmony_ci} 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_cistatic void rds_ib_tasklet_fn_recv(unsigned long data) 32362306a36Sopenharmony_ci{ 32462306a36Sopenharmony_ci struct rds_ib_connection *ic = (struct rds_ib_connection *)data; 32562306a36Sopenharmony_ci struct rds_connection *conn = ic->conn; 32662306a36Sopenharmony_ci struct rds_ib_device *rds_ibdev = ic->rds_ibdev; 32762306a36Sopenharmony_ci struct rds_ib_ack_state state; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci if (!rds_ibdev) 33062306a36Sopenharmony_ci rds_conn_drop(conn); 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_tasklet_call); 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci /* if cq has been already reaped, ignore incoming cq event */ 33562306a36Sopenharmony_ci if (atomic_read(&ic->i_cq_quiesce)) 33662306a36Sopenharmony_ci return; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci memset(&state, 0, sizeof(state)); 33962306a36Sopenharmony_ci poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); 34062306a36Sopenharmony_ci ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); 34162306a36Sopenharmony_ci poll_rcq(ic, ic->i_recv_cq, ic->i_recv_wc, &state); 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci if (state.ack_next_valid) 34462306a36Sopenharmony_ci rds_ib_set_ack(ic, state.ack_next, state.ack_required); 34562306a36Sopenharmony_ci if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) { 34662306a36Sopenharmony_ci rds_send_drop_acked(conn, state.ack_recv, NULL); 34762306a36Sopenharmony_ci ic->i_ack_recv = state.ack_recv; 34862306a36Sopenharmony_ci } 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci if (rds_conn_up(conn)) 35162306a36Sopenharmony_ci rds_ib_attempt_ack(ic); 35262306a36Sopenharmony_ci} 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_cistatic void rds_ib_qp_event_handler(struct ib_event *event, void *data) 35562306a36Sopenharmony_ci{ 35662306a36Sopenharmony_ci struct rds_connection *conn = data; 35762306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event, 36062306a36Sopenharmony_ci ib_event_msg(event->event)); 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci switch (event->event) { 36362306a36Sopenharmony_ci case IB_EVENT_COMM_EST: 36462306a36Sopenharmony_ci rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); 36562306a36Sopenharmony_ci break; 36662306a36Sopenharmony_ci default: 36762306a36Sopenharmony_ci rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n", 36862306a36Sopenharmony_ci event->event, ib_event_msg(event->event), 36962306a36Sopenharmony_ci &conn->c_laddr, &conn->c_faddr); 37062306a36Sopenharmony_ci rds_conn_drop(conn); 37162306a36Sopenharmony_ci break; 37262306a36Sopenharmony_ci } 37362306a36Sopenharmony_ci} 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_cistatic void rds_ib_cq_comp_handler_send(struct ib_cq *cq, void *context) 37662306a36Sopenharmony_ci{ 37762306a36Sopenharmony_ci struct rds_connection *conn = context; 37862306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci rdsdebug("conn %p cq %p\n", conn, cq); 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_evt_handler_call); 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci tasklet_schedule(&ic->i_send_tasklet); 38562306a36Sopenharmony_ci} 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_cistatic inline int ibdev_get_unused_vector(struct rds_ib_device *rds_ibdev) 38862306a36Sopenharmony_ci{ 38962306a36Sopenharmony_ci int min = rds_ibdev->vector_load[rds_ibdev->dev->num_comp_vectors - 1]; 39062306a36Sopenharmony_ci int index = rds_ibdev->dev->num_comp_vectors - 1; 39162306a36Sopenharmony_ci int i; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci for (i = rds_ibdev->dev->num_comp_vectors - 1; i >= 0; i--) { 39462306a36Sopenharmony_ci if (rds_ibdev->vector_load[i] < min) { 39562306a36Sopenharmony_ci index = i; 39662306a36Sopenharmony_ci min = rds_ibdev->vector_load[i]; 39762306a36Sopenharmony_ci } 39862306a36Sopenharmony_ci } 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci rds_ibdev->vector_load[index]++; 40162306a36Sopenharmony_ci return index; 40262306a36Sopenharmony_ci} 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_cistatic inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index) 40562306a36Sopenharmony_ci{ 40662306a36Sopenharmony_ci rds_ibdev->vector_load[index]--; 40762306a36Sopenharmony_ci} 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_cistatic void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr, 41062306a36Sopenharmony_ci dma_addr_t dma_addr, enum dma_data_direction dir) 41162306a36Sopenharmony_ci{ 41262306a36Sopenharmony_ci ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir); 41362306a36Sopenharmony_ci kfree(hdr); 41462306a36Sopenharmony_ci} 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_cistatic struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev, 41762306a36Sopenharmony_ci dma_addr_t *dma_addr, enum dma_data_direction dir) 41862306a36Sopenharmony_ci{ 41962306a36Sopenharmony_ci struct rds_header *hdr; 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev)); 42262306a36Sopenharmony_ci if (!hdr) 42362306a36Sopenharmony_ci return NULL; 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci *dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr), 42662306a36Sopenharmony_ci DMA_BIDIRECTIONAL); 42762306a36Sopenharmony_ci if (ib_dma_mapping_error(dev, *dma_addr)) { 42862306a36Sopenharmony_ci kfree(hdr); 42962306a36Sopenharmony_ci return NULL; 43062306a36Sopenharmony_ci } 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci return hdr; 43362306a36Sopenharmony_ci} 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci/* Free the DMA memory used to store struct rds_header. 43662306a36Sopenharmony_ci * 43762306a36Sopenharmony_ci * @dev: the RDS IB device 43862306a36Sopenharmony_ci * @hdrs: pointer to the array storing DMA memory pointers 43962306a36Sopenharmony_ci * @dma_addrs: pointer to the array storing DMA addresses 44062306a36Sopenharmony_ci * @num_hdars: number of headers to free. 44162306a36Sopenharmony_ci */ 44262306a36Sopenharmony_cistatic void rds_dma_hdrs_free(struct rds_ib_device *dev, 44362306a36Sopenharmony_ci struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs, 44462306a36Sopenharmony_ci enum dma_data_direction dir) 44562306a36Sopenharmony_ci{ 44662306a36Sopenharmony_ci u32 i; 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci for (i = 0; i < num_hdrs; i++) 44962306a36Sopenharmony_ci rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir); 45062306a36Sopenharmony_ci kvfree(hdrs); 45162306a36Sopenharmony_ci kvfree(dma_addrs); 45262306a36Sopenharmony_ci} 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci/* Allocate DMA coherent memory to be used to store struct rds_header for 45662306a36Sopenharmony_ci * sending/receiving packets. The pointers to the DMA memory and the 45762306a36Sopenharmony_ci * associated DMA addresses are stored in two arrays. 45862306a36Sopenharmony_ci * 45962306a36Sopenharmony_ci * @dev: the RDS IB device 46062306a36Sopenharmony_ci * @dma_addrs: pointer to the array for storing DMA addresses 46162306a36Sopenharmony_ci * @num_hdrs: number of headers to allocate 46262306a36Sopenharmony_ci * 46362306a36Sopenharmony_ci * It returns the pointer to the array storing the DMA memory pointers. On 46462306a36Sopenharmony_ci * error, NULL pointer is returned. 46562306a36Sopenharmony_ci */ 46662306a36Sopenharmony_cistatic struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev, 46762306a36Sopenharmony_ci dma_addr_t **dma_addrs, u32 num_hdrs, 46862306a36Sopenharmony_ci enum dma_data_direction dir) 46962306a36Sopenharmony_ci{ 47062306a36Sopenharmony_ci struct rds_header **hdrs; 47162306a36Sopenharmony_ci dma_addr_t *hdr_daddrs; 47262306a36Sopenharmony_ci u32 i; 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, 47562306a36Sopenharmony_ci ibdev_to_node(dev->dev)); 47662306a36Sopenharmony_ci if (!hdrs) 47762306a36Sopenharmony_ci return NULL; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, 48062306a36Sopenharmony_ci ibdev_to_node(dev->dev)); 48162306a36Sopenharmony_ci if (!hdr_daddrs) { 48262306a36Sopenharmony_ci kvfree(hdrs); 48362306a36Sopenharmony_ci return NULL; 48462306a36Sopenharmony_ci } 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci for (i = 0; i < num_hdrs; i++) { 48762306a36Sopenharmony_ci hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir); 48862306a36Sopenharmony_ci if (!hdrs[i]) { 48962306a36Sopenharmony_ci rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir); 49062306a36Sopenharmony_ci return NULL; 49162306a36Sopenharmony_ci } 49262306a36Sopenharmony_ci } 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci *dma_addrs = hdr_daddrs; 49562306a36Sopenharmony_ci return hdrs; 49662306a36Sopenharmony_ci} 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci/* 49962306a36Sopenharmony_ci * This needs to be very careful to not leave IS_ERR pointers around for 50062306a36Sopenharmony_ci * cleanup to trip over. 50162306a36Sopenharmony_ci */ 50262306a36Sopenharmony_cistatic int rds_ib_setup_qp(struct rds_connection *conn) 50362306a36Sopenharmony_ci{ 50462306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 50562306a36Sopenharmony_ci struct ib_device *dev = ic->i_cm_id->device; 50662306a36Sopenharmony_ci struct ib_qp_init_attr attr; 50762306a36Sopenharmony_ci struct ib_cq_init_attr cq_attr = {}; 50862306a36Sopenharmony_ci struct rds_ib_device *rds_ibdev; 50962306a36Sopenharmony_ci unsigned long max_wrs; 51062306a36Sopenharmony_ci int ret, fr_queue_space; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci /* 51362306a36Sopenharmony_ci * It's normal to see a null device if an incoming connection races 51462306a36Sopenharmony_ci * with device removal, so we don't print a warning. 51562306a36Sopenharmony_ci */ 51662306a36Sopenharmony_ci rds_ibdev = rds_ib_get_client_data(dev); 51762306a36Sopenharmony_ci if (!rds_ibdev) 51862306a36Sopenharmony_ci return -EOPNOTSUPP; 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci /* The fr_queue_space is currently set to 512, to add extra space on 52162306a36Sopenharmony_ci * completion queue and send queue. This extra space is used for FRWR 52262306a36Sopenharmony_ci * registration and invalidation work requests 52362306a36Sopenharmony_ci */ 52462306a36Sopenharmony_ci fr_queue_space = RDS_IB_DEFAULT_FR_WR; 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci /* add the conn now so that connection establishment has the dev */ 52762306a36Sopenharmony_ci rds_ib_add_conn(rds_ibdev, conn); 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ? 53062306a36Sopenharmony_ci rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr; 53162306a36Sopenharmony_ci if (ic->i_send_ring.w_nr != max_wrs) 53262306a36Sopenharmony_ci rds_ib_ring_resize(&ic->i_send_ring, max_wrs); 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ? 53562306a36Sopenharmony_ci rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr; 53662306a36Sopenharmony_ci if (ic->i_recv_ring.w_nr != max_wrs) 53762306a36Sopenharmony_ci rds_ib_ring_resize(&ic->i_recv_ring, max_wrs); 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci /* Protection domain and memory range */ 54062306a36Sopenharmony_ci ic->i_pd = rds_ibdev->pd; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci ic->i_scq_vector = ibdev_get_unused_vector(rds_ibdev); 54362306a36Sopenharmony_ci cq_attr.cqe = ic->i_send_ring.w_nr + fr_queue_space + 1; 54462306a36Sopenharmony_ci cq_attr.comp_vector = ic->i_scq_vector; 54562306a36Sopenharmony_ci ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send, 54662306a36Sopenharmony_ci rds_ib_cq_event_handler, conn, 54762306a36Sopenharmony_ci &cq_attr); 54862306a36Sopenharmony_ci if (IS_ERR(ic->i_send_cq)) { 54962306a36Sopenharmony_ci ret = PTR_ERR(ic->i_send_cq); 55062306a36Sopenharmony_ci ic->i_send_cq = NULL; 55162306a36Sopenharmony_ci ibdev_put_vector(rds_ibdev, ic->i_scq_vector); 55262306a36Sopenharmony_ci rdsdebug("ib_create_cq send failed: %d\n", ret); 55362306a36Sopenharmony_ci goto rds_ibdev_out; 55462306a36Sopenharmony_ci } 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev); 55762306a36Sopenharmony_ci cq_attr.cqe = ic->i_recv_ring.w_nr; 55862306a36Sopenharmony_ci cq_attr.comp_vector = ic->i_rcq_vector; 55962306a36Sopenharmony_ci ic->i_recv_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv, 56062306a36Sopenharmony_ci rds_ib_cq_event_handler, conn, 56162306a36Sopenharmony_ci &cq_attr); 56262306a36Sopenharmony_ci if (IS_ERR(ic->i_recv_cq)) { 56362306a36Sopenharmony_ci ret = PTR_ERR(ic->i_recv_cq); 56462306a36Sopenharmony_ci ic->i_recv_cq = NULL; 56562306a36Sopenharmony_ci ibdev_put_vector(rds_ibdev, ic->i_rcq_vector); 56662306a36Sopenharmony_ci rdsdebug("ib_create_cq recv failed: %d\n", ret); 56762306a36Sopenharmony_ci goto send_cq_out; 56862306a36Sopenharmony_ci } 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); 57162306a36Sopenharmony_ci if (ret) { 57262306a36Sopenharmony_ci rdsdebug("ib_req_notify_cq send failed: %d\n", ret); 57362306a36Sopenharmony_ci goto recv_cq_out; 57462306a36Sopenharmony_ci } 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); 57762306a36Sopenharmony_ci if (ret) { 57862306a36Sopenharmony_ci rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); 57962306a36Sopenharmony_ci goto recv_cq_out; 58062306a36Sopenharmony_ci } 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci /* XXX negotiate max send/recv with remote? */ 58362306a36Sopenharmony_ci memset(&attr, 0, sizeof(attr)); 58462306a36Sopenharmony_ci attr.event_handler = rds_ib_qp_event_handler; 58562306a36Sopenharmony_ci attr.qp_context = conn; 58662306a36Sopenharmony_ci /* + 1 to allow for the single ack message */ 58762306a36Sopenharmony_ci attr.cap.max_send_wr = ic->i_send_ring.w_nr + fr_queue_space + 1; 58862306a36Sopenharmony_ci attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; 58962306a36Sopenharmony_ci attr.cap.max_send_sge = rds_ibdev->max_sge; 59062306a36Sopenharmony_ci attr.cap.max_recv_sge = RDS_IB_RECV_SGE; 59162306a36Sopenharmony_ci attr.sq_sig_type = IB_SIGNAL_REQ_WR; 59262306a36Sopenharmony_ci attr.qp_type = IB_QPT_RC; 59362306a36Sopenharmony_ci attr.send_cq = ic->i_send_cq; 59462306a36Sopenharmony_ci attr.recv_cq = ic->i_recv_cq; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci /* 59762306a36Sopenharmony_ci * XXX this can fail if max_*_wr is too large? Are we supposed 59862306a36Sopenharmony_ci * to back off until we get a value that the hardware can support? 59962306a36Sopenharmony_ci */ 60062306a36Sopenharmony_ci ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); 60162306a36Sopenharmony_ci if (ret) { 60262306a36Sopenharmony_ci rdsdebug("rdma_create_qp failed: %d\n", ret); 60362306a36Sopenharmony_ci goto recv_cq_out; 60462306a36Sopenharmony_ci } 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma, 60762306a36Sopenharmony_ci ic->i_send_ring.w_nr, 60862306a36Sopenharmony_ci DMA_TO_DEVICE); 60962306a36Sopenharmony_ci if (!ic->i_send_hdrs) { 61062306a36Sopenharmony_ci ret = -ENOMEM; 61162306a36Sopenharmony_ci rdsdebug("DMA send hdrs alloc failed\n"); 61262306a36Sopenharmony_ci goto qp_out; 61362306a36Sopenharmony_ci } 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma, 61662306a36Sopenharmony_ci ic->i_recv_ring.w_nr, 61762306a36Sopenharmony_ci DMA_FROM_DEVICE); 61862306a36Sopenharmony_ci if (!ic->i_recv_hdrs) { 61962306a36Sopenharmony_ci ret = -ENOMEM; 62062306a36Sopenharmony_ci rdsdebug("DMA recv hdrs alloc failed\n"); 62162306a36Sopenharmony_ci goto send_hdrs_dma_out; 62262306a36Sopenharmony_ci } 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma, 62562306a36Sopenharmony_ci DMA_TO_DEVICE); 62662306a36Sopenharmony_ci if (!ic->i_ack) { 62762306a36Sopenharmony_ci ret = -ENOMEM; 62862306a36Sopenharmony_ci rdsdebug("DMA ack header alloc failed\n"); 62962306a36Sopenharmony_ci goto recv_hdrs_dma_out; 63062306a36Sopenharmony_ci } 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci ic->i_sends = vzalloc_node(array_size(sizeof(struct rds_ib_send_work), 63362306a36Sopenharmony_ci ic->i_send_ring.w_nr), 63462306a36Sopenharmony_ci ibdev_to_node(dev)); 63562306a36Sopenharmony_ci if (!ic->i_sends) { 63662306a36Sopenharmony_ci ret = -ENOMEM; 63762306a36Sopenharmony_ci rdsdebug("send allocation failed\n"); 63862306a36Sopenharmony_ci goto ack_dma_out; 63962306a36Sopenharmony_ci } 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci ic->i_recvs = vzalloc_node(array_size(sizeof(struct rds_ib_recv_work), 64262306a36Sopenharmony_ci ic->i_recv_ring.w_nr), 64362306a36Sopenharmony_ci ibdev_to_node(dev)); 64462306a36Sopenharmony_ci if (!ic->i_recvs) { 64562306a36Sopenharmony_ci ret = -ENOMEM; 64662306a36Sopenharmony_ci rdsdebug("recv allocation failed\n"); 64762306a36Sopenharmony_ci goto sends_out; 64862306a36Sopenharmony_ci } 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci rds_ib_recv_init_ack(ic); 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, 65362306a36Sopenharmony_ci ic->i_send_cq, ic->i_recv_cq); 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci goto out; 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_cisends_out: 65862306a36Sopenharmony_ci vfree(ic->i_sends); 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_ciack_dma_out: 66162306a36Sopenharmony_ci rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma, 66262306a36Sopenharmony_ci DMA_TO_DEVICE); 66362306a36Sopenharmony_ci ic->i_ack = NULL; 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_cirecv_hdrs_dma_out: 66662306a36Sopenharmony_ci rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, 66762306a36Sopenharmony_ci ic->i_recv_ring.w_nr, DMA_FROM_DEVICE); 66862306a36Sopenharmony_ci ic->i_recv_hdrs = NULL; 66962306a36Sopenharmony_ci ic->i_recv_hdrs_dma = NULL; 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_cisend_hdrs_dma_out: 67262306a36Sopenharmony_ci rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma, 67362306a36Sopenharmony_ci ic->i_send_ring.w_nr, DMA_TO_DEVICE); 67462306a36Sopenharmony_ci ic->i_send_hdrs = NULL; 67562306a36Sopenharmony_ci ic->i_send_hdrs_dma = NULL; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ciqp_out: 67862306a36Sopenharmony_ci rdma_destroy_qp(ic->i_cm_id); 67962306a36Sopenharmony_cirecv_cq_out: 68062306a36Sopenharmony_ci ib_destroy_cq(ic->i_recv_cq); 68162306a36Sopenharmony_ci ic->i_recv_cq = NULL; 68262306a36Sopenharmony_cisend_cq_out: 68362306a36Sopenharmony_ci ib_destroy_cq(ic->i_send_cq); 68462306a36Sopenharmony_ci ic->i_send_cq = NULL; 68562306a36Sopenharmony_cirds_ibdev_out: 68662306a36Sopenharmony_ci rds_ib_remove_conn(rds_ibdev, conn); 68762306a36Sopenharmony_ciout: 68862306a36Sopenharmony_ci rds_ib_dev_put(rds_ibdev); 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci return ret; 69162306a36Sopenharmony_ci} 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_cistatic u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) 69462306a36Sopenharmony_ci{ 69562306a36Sopenharmony_ci const union rds_ib_conn_priv *dp = event->param.conn.private_data; 69662306a36Sopenharmony_ci u8 data_len, major, minor; 69762306a36Sopenharmony_ci u32 version = 0; 69862306a36Sopenharmony_ci __be16 mask; 69962306a36Sopenharmony_ci u16 common; 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci /* 70262306a36Sopenharmony_ci * rdma_cm private data is odd - when there is any private data in the 70362306a36Sopenharmony_ci * request, we will be given a pretty large buffer without telling us the 70462306a36Sopenharmony_ci * original size. The only way to tell the difference is by looking at 70562306a36Sopenharmony_ci * the contents, which are initialized to zero. 70662306a36Sopenharmony_ci * If the protocol version fields aren't set, this is a connection attempt 70762306a36Sopenharmony_ci * from an older version. This could be 3.0 or 2.0 - we can't tell. 70862306a36Sopenharmony_ci * We really should have changed this for OFED 1.3 :-( 70962306a36Sopenharmony_ci */ 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci /* Be paranoid. RDS always has privdata */ 71262306a36Sopenharmony_ci if (!event->param.conn.private_data_len) { 71362306a36Sopenharmony_ci printk(KERN_NOTICE "RDS incoming connection has no private data, " 71462306a36Sopenharmony_ci "rejecting\n"); 71562306a36Sopenharmony_ci return 0; 71662306a36Sopenharmony_ci } 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci if (isv6) { 71962306a36Sopenharmony_ci data_len = sizeof(struct rds6_ib_connect_private); 72062306a36Sopenharmony_ci major = dp->ricp_v6.dp_protocol_major; 72162306a36Sopenharmony_ci minor = dp->ricp_v6.dp_protocol_minor; 72262306a36Sopenharmony_ci mask = dp->ricp_v6.dp_protocol_minor_mask; 72362306a36Sopenharmony_ci } else { 72462306a36Sopenharmony_ci data_len = sizeof(struct rds_ib_connect_private); 72562306a36Sopenharmony_ci major = dp->ricp_v4.dp_protocol_major; 72662306a36Sopenharmony_ci minor = dp->ricp_v4.dp_protocol_minor; 72762306a36Sopenharmony_ci mask = dp->ricp_v4.dp_protocol_minor_mask; 72862306a36Sopenharmony_ci } 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci /* Even if len is crap *now* I still want to check it. -ASG */ 73162306a36Sopenharmony_ci if (event->param.conn.private_data_len < data_len || major == 0) 73262306a36Sopenharmony_ci return RDS_PROTOCOL_4_0; 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS; 73562306a36Sopenharmony_ci if (major == 4 && common) { 73662306a36Sopenharmony_ci version = RDS_PROTOCOL_4_0; 73762306a36Sopenharmony_ci while ((common >>= 1) != 0) 73862306a36Sopenharmony_ci version++; 73962306a36Sopenharmony_ci } else if (RDS_PROTOCOL_COMPAT_VERSION == 74062306a36Sopenharmony_ci RDS_PROTOCOL(major, minor)) { 74162306a36Sopenharmony_ci version = RDS_PROTOCOL_COMPAT_VERSION; 74262306a36Sopenharmony_ci } else { 74362306a36Sopenharmony_ci if (isv6) 74462306a36Sopenharmony_ci printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n", 74562306a36Sopenharmony_ci &dp->ricp_v6.dp_saddr, major, minor); 74662306a36Sopenharmony_ci else 74762306a36Sopenharmony_ci printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n", 74862306a36Sopenharmony_ci &dp->ricp_v4.dp_saddr, major, minor); 74962306a36Sopenharmony_ci } 75062306a36Sopenharmony_ci return version; 75162306a36Sopenharmony_ci} 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 75462306a36Sopenharmony_ci/* Given an IPv6 address, find the net_device which hosts that address and 75562306a36Sopenharmony_ci * return its index. This is used by the rds_ib_cm_handle_connect() code to 75662306a36Sopenharmony_ci * find the interface index of where an incoming request comes from when 75762306a36Sopenharmony_ci * the request is using a link local address. 75862306a36Sopenharmony_ci * 75962306a36Sopenharmony_ci * Note one problem in this search. It is possible that two interfaces have 76062306a36Sopenharmony_ci * the same link local address. Unfortunately, this cannot be solved unless 76162306a36Sopenharmony_ci * the underlying layer gives us the interface which an incoming RDMA connect 76262306a36Sopenharmony_ci * request comes from. 76362306a36Sopenharmony_ci */ 76462306a36Sopenharmony_cistatic u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr) 76562306a36Sopenharmony_ci{ 76662306a36Sopenharmony_ci struct net_device *dev; 76762306a36Sopenharmony_ci int idx = 0; 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci rcu_read_lock(); 77062306a36Sopenharmony_ci for_each_netdev_rcu(net, dev) { 77162306a36Sopenharmony_ci if (ipv6_chk_addr(net, addr, dev, 1)) { 77262306a36Sopenharmony_ci idx = dev->ifindex; 77362306a36Sopenharmony_ci break; 77462306a36Sopenharmony_ci } 77562306a36Sopenharmony_ci } 77662306a36Sopenharmony_ci rcu_read_unlock(); 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci return idx; 77962306a36Sopenharmony_ci} 78062306a36Sopenharmony_ci#endif 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_ciint rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, 78362306a36Sopenharmony_ci struct rdma_cm_event *event, bool isv6) 78462306a36Sopenharmony_ci{ 78562306a36Sopenharmony_ci __be64 lguid = cm_id->route.path_rec->sgid.global.interface_id; 78662306a36Sopenharmony_ci __be64 fguid = cm_id->route.path_rec->dgid.global.interface_id; 78762306a36Sopenharmony_ci const struct rds_ib_conn_priv_cmn *dp_cmn; 78862306a36Sopenharmony_ci struct rds_connection *conn = NULL; 78962306a36Sopenharmony_ci struct rds_ib_connection *ic = NULL; 79062306a36Sopenharmony_ci struct rdma_conn_param conn_param; 79162306a36Sopenharmony_ci const union rds_ib_conn_priv *dp; 79262306a36Sopenharmony_ci union rds_ib_conn_priv dp_rep; 79362306a36Sopenharmony_ci struct in6_addr s_mapped_addr; 79462306a36Sopenharmony_ci struct in6_addr d_mapped_addr; 79562306a36Sopenharmony_ci const struct in6_addr *saddr6; 79662306a36Sopenharmony_ci const struct in6_addr *daddr6; 79762306a36Sopenharmony_ci int destroy = 1; 79862306a36Sopenharmony_ci u32 ifindex = 0; 79962306a36Sopenharmony_ci u32 version; 80062306a36Sopenharmony_ci int err = 1; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci /* Check whether the remote protocol version matches ours. */ 80362306a36Sopenharmony_ci version = rds_ib_protocol_compatible(event, isv6); 80462306a36Sopenharmony_ci if (!version) { 80562306a36Sopenharmony_ci err = RDS_RDMA_REJ_INCOMPAT; 80662306a36Sopenharmony_ci goto out; 80762306a36Sopenharmony_ci } 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci dp = event->param.conn.private_data; 81062306a36Sopenharmony_ci if (isv6) { 81162306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 81262306a36Sopenharmony_ci dp_cmn = &dp->ricp_v6.dp_cmn; 81362306a36Sopenharmony_ci saddr6 = &dp->ricp_v6.dp_saddr; 81462306a36Sopenharmony_ci daddr6 = &dp->ricp_v6.dp_daddr; 81562306a36Sopenharmony_ci /* If either address is link local, need to find the 81662306a36Sopenharmony_ci * interface index in order to create a proper RDS 81762306a36Sopenharmony_ci * connection. 81862306a36Sopenharmony_ci */ 81962306a36Sopenharmony_ci if (ipv6_addr_type(daddr6) & IPV6_ADDR_LINKLOCAL) { 82062306a36Sopenharmony_ci /* Using init_net for now .. */ 82162306a36Sopenharmony_ci ifindex = __rds_find_ifindex(&init_net, daddr6); 82262306a36Sopenharmony_ci /* No index found... Need to bail out. */ 82362306a36Sopenharmony_ci if (ifindex == 0) { 82462306a36Sopenharmony_ci err = -EOPNOTSUPP; 82562306a36Sopenharmony_ci goto out; 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci } else if (ipv6_addr_type(saddr6) & IPV6_ADDR_LINKLOCAL) { 82862306a36Sopenharmony_ci /* Use our address to find the correct index. */ 82962306a36Sopenharmony_ci ifindex = __rds_find_ifindex(&init_net, daddr6); 83062306a36Sopenharmony_ci /* No index found... Need to bail out. */ 83162306a36Sopenharmony_ci if (ifindex == 0) { 83262306a36Sopenharmony_ci err = -EOPNOTSUPP; 83362306a36Sopenharmony_ci goto out; 83462306a36Sopenharmony_ci } 83562306a36Sopenharmony_ci } 83662306a36Sopenharmony_ci#else 83762306a36Sopenharmony_ci err = -EOPNOTSUPP; 83862306a36Sopenharmony_ci goto out; 83962306a36Sopenharmony_ci#endif 84062306a36Sopenharmony_ci } else { 84162306a36Sopenharmony_ci dp_cmn = &dp->ricp_v4.dp_cmn; 84262306a36Sopenharmony_ci ipv6_addr_set_v4mapped(dp->ricp_v4.dp_saddr, &s_mapped_addr); 84362306a36Sopenharmony_ci ipv6_addr_set_v4mapped(dp->ricp_v4.dp_daddr, &d_mapped_addr); 84462306a36Sopenharmony_ci saddr6 = &s_mapped_addr; 84562306a36Sopenharmony_ci daddr6 = &d_mapped_addr; 84662306a36Sopenharmony_ci } 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n", 84962306a36Sopenharmony_ci saddr6, daddr6, RDS_PROTOCOL_MAJOR(version), 85062306a36Sopenharmony_ci RDS_PROTOCOL_MINOR(version), 85162306a36Sopenharmony_ci (unsigned long long)be64_to_cpu(lguid), 85262306a36Sopenharmony_ci (unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss); 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci /* RDS/IB is not currently netns aware, thus init_net */ 85562306a36Sopenharmony_ci conn = rds_conn_create(&init_net, daddr6, saddr6, 85662306a36Sopenharmony_ci &rds_ib_transport, dp_cmn->ricpc_dp_toss, 85762306a36Sopenharmony_ci GFP_KERNEL, ifindex); 85862306a36Sopenharmony_ci if (IS_ERR(conn)) { 85962306a36Sopenharmony_ci rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); 86062306a36Sopenharmony_ci conn = NULL; 86162306a36Sopenharmony_ci goto out; 86262306a36Sopenharmony_ci } 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci /* 86562306a36Sopenharmony_ci * The connection request may occur while the 86662306a36Sopenharmony_ci * previous connection exist, e.g. in case of failover. 86762306a36Sopenharmony_ci * But as connections may be initiated simultaneously 86862306a36Sopenharmony_ci * by both hosts, we have a random backoff mechanism - 86962306a36Sopenharmony_ci * see the comment above rds_queue_reconnect() 87062306a36Sopenharmony_ci */ 87162306a36Sopenharmony_ci mutex_lock(&conn->c_cm_lock); 87262306a36Sopenharmony_ci if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { 87362306a36Sopenharmony_ci if (rds_conn_state(conn) == RDS_CONN_UP) { 87462306a36Sopenharmony_ci rdsdebug("incoming connect while connecting\n"); 87562306a36Sopenharmony_ci rds_conn_drop(conn); 87662306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_listen_closed_stale); 87762306a36Sopenharmony_ci } else 87862306a36Sopenharmony_ci if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { 87962306a36Sopenharmony_ci /* Wait and see - our connect may still be succeeding */ 88062306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_connect_raced); 88162306a36Sopenharmony_ci } 88262306a36Sopenharmony_ci goto out; 88362306a36Sopenharmony_ci } 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci ic = conn->c_transport_data; 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci rds_ib_set_protocol(conn, version); 88862306a36Sopenharmony_ci rds_ib_set_flow_control(conn, be32_to_cpu(dp_cmn->ricpc_credit)); 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci /* If the peer gave us the last packet it saw, process this as if 89162306a36Sopenharmony_ci * we had received a regular ACK. */ 89262306a36Sopenharmony_ci if (dp_cmn->ricpc_ack_seq) 89362306a36Sopenharmony_ci rds_send_drop_acked(conn, be64_to_cpu(dp_cmn->ricpc_ack_seq), 89462306a36Sopenharmony_ci NULL); 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci BUG_ON(cm_id->context); 89762306a36Sopenharmony_ci BUG_ON(ic->i_cm_id); 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci ic->i_cm_id = cm_id; 90062306a36Sopenharmony_ci cm_id->context = conn; 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci /* We got halfway through setting up the ib_connection, if we 90362306a36Sopenharmony_ci * fail now, we have to take the long route out of this mess. */ 90462306a36Sopenharmony_ci destroy = 0; 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci err = rds_ib_setup_qp(conn); 90762306a36Sopenharmony_ci if (err) { 90862306a36Sopenharmony_ci rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); 90962306a36Sopenharmony_ci goto out; 91062306a36Sopenharmony_ci } 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version, 91362306a36Sopenharmony_ci event->param.conn.responder_resources, 91462306a36Sopenharmony_ci event->param.conn.initiator_depth, isv6); 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32); 91762306a36Sopenharmony_ci /* rdma_accept() calls rdma_reject() internally if it fails */ 91862306a36Sopenharmony_ci if (rdma_accept(cm_id, &conn_param)) 91962306a36Sopenharmony_ci rds_ib_conn_error(conn, "rdma_accept failed\n"); 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ciout: 92262306a36Sopenharmony_ci if (conn) 92362306a36Sopenharmony_ci mutex_unlock(&conn->c_cm_lock); 92462306a36Sopenharmony_ci if (err) 92562306a36Sopenharmony_ci rdma_reject(cm_id, &err, sizeof(int), 92662306a36Sopenharmony_ci IB_CM_REJ_CONSUMER_DEFINED); 92762306a36Sopenharmony_ci return destroy; 92862306a36Sopenharmony_ci} 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ciint rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) 93262306a36Sopenharmony_ci{ 93362306a36Sopenharmony_ci struct rds_connection *conn = cm_id->context; 93462306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 93562306a36Sopenharmony_ci struct rdma_conn_param conn_param; 93662306a36Sopenharmony_ci union rds_ib_conn_priv dp; 93762306a36Sopenharmony_ci int ret; 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci /* If the peer doesn't do protocol negotiation, we must 94062306a36Sopenharmony_ci * default to RDSv3.0 */ 94162306a36Sopenharmony_ci rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1); 94262306a36Sopenharmony_ci ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci ret = rds_ib_setup_qp(conn); 94562306a36Sopenharmony_ci if (ret) { 94662306a36Sopenharmony_ci rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", ret); 94762306a36Sopenharmony_ci goto out; 94862306a36Sopenharmony_ci } 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ci rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, 95162306a36Sopenharmony_ci conn->c_proposed_version, 95262306a36Sopenharmony_ci UINT_MAX, UINT_MAX, isv6); 95362306a36Sopenharmony_ci ret = rdma_connect_locked(cm_id, &conn_param); 95462306a36Sopenharmony_ci if (ret) 95562306a36Sopenharmony_ci rds_ib_conn_error(conn, "rdma_connect_locked failed (%d)\n", 95662306a36Sopenharmony_ci ret); 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ciout: 95962306a36Sopenharmony_ci /* Beware - returning non-zero tells the rdma_cm to destroy 96062306a36Sopenharmony_ci * the cm_id. We should certainly not do it as long as we still 96162306a36Sopenharmony_ci * "own" the cm_id. */ 96262306a36Sopenharmony_ci if (ret) { 96362306a36Sopenharmony_ci if (ic->i_cm_id == cm_id) 96462306a36Sopenharmony_ci ret = 0; 96562306a36Sopenharmony_ci } 96662306a36Sopenharmony_ci ic->i_active_side = true; 96762306a36Sopenharmony_ci return ret; 96862306a36Sopenharmony_ci} 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ciint rds_ib_conn_path_connect(struct rds_conn_path *cp) 97162306a36Sopenharmony_ci{ 97262306a36Sopenharmony_ci struct rds_connection *conn = cp->cp_conn; 97362306a36Sopenharmony_ci struct sockaddr_storage src, dest; 97462306a36Sopenharmony_ci rdma_cm_event_handler handler; 97562306a36Sopenharmony_ci struct rds_ib_connection *ic; 97662306a36Sopenharmony_ci int ret; 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci ic = conn->c_transport_data; 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci /* XXX I wonder what affect the port space has */ 98162306a36Sopenharmony_ci /* delegate cm event handler to rdma_transport */ 98262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 98362306a36Sopenharmony_ci if (conn->c_isv6) 98462306a36Sopenharmony_ci handler = rds6_rdma_cm_event_handler; 98562306a36Sopenharmony_ci else 98662306a36Sopenharmony_ci#endif 98762306a36Sopenharmony_ci handler = rds_rdma_cm_event_handler; 98862306a36Sopenharmony_ci ic->i_cm_id = rdma_create_id(&init_net, handler, conn, 98962306a36Sopenharmony_ci RDMA_PS_TCP, IB_QPT_RC); 99062306a36Sopenharmony_ci if (IS_ERR(ic->i_cm_id)) { 99162306a36Sopenharmony_ci ret = PTR_ERR(ic->i_cm_id); 99262306a36Sopenharmony_ci ic->i_cm_id = NULL; 99362306a36Sopenharmony_ci rdsdebug("rdma_create_id() failed: %d\n", ret); 99462306a36Sopenharmony_ci goto out; 99562306a36Sopenharmony_ci } 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn); 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci if (ipv6_addr_v4mapped(&conn->c_faddr)) { 100062306a36Sopenharmony_ci struct sockaddr_in *sin; 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_ci sin = (struct sockaddr_in *)&src; 100362306a36Sopenharmony_ci sin->sin_family = AF_INET; 100462306a36Sopenharmony_ci sin->sin_addr.s_addr = conn->c_laddr.s6_addr32[3]; 100562306a36Sopenharmony_ci sin->sin_port = 0; 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_ci sin = (struct sockaddr_in *)&dest; 100862306a36Sopenharmony_ci sin->sin_family = AF_INET; 100962306a36Sopenharmony_ci sin->sin_addr.s_addr = conn->c_faddr.s6_addr32[3]; 101062306a36Sopenharmony_ci sin->sin_port = htons(RDS_PORT); 101162306a36Sopenharmony_ci } else { 101262306a36Sopenharmony_ci struct sockaddr_in6 *sin6; 101362306a36Sopenharmony_ci 101462306a36Sopenharmony_ci sin6 = (struct sockaddr_in6 *)&src; 101562306a36Sopenharmony_ci sin6->sin6_family = AF_INET6; 101662306a36Sopenharmony_ci sin6->sin6_addr = conn->c_laddr; 101762306a36Sopenharmony_ci sin6->sin6_port = 0; 101862306a36Sopenharmony_ci sin6->sin6_scope_id = conn->c_dev_if; 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_ci sin6 = (struct sockaddr_in6 *)&dest; 102162306a36Sopenharmony_ci sin6->sin6_family = AF_INET6; 102262306a36Sopenharmony_ci sin6->sin6_addr = conn->c_faddr; 102362306a36Sopenharmony_ci sin6->sin6_port = htons(RDS_CM_PORT); 102462306a36Sopenharmony_ci sin6->sin6_scope_id = conn->c_dev_if; 102562306a36Sopenharmony_ci } 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ci ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src, 102862306a36Sopenharmony_ci (struct sockaddr *)&dest, 102962306a36Sopenharmony_ci RDS_RDMA_RESOLVE_TIMEOUT_MS); 103062306a36Sopenharmony_ci if (ret) { 103162306a36Sopenharmony_ci rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id, 103262306a36Sopenharmony_ci ret); 103362306a36Sopenharmony_ci rdma_destroy_id(ic->i_cm_id); 103462306a36Sopenharmony_ci ic->i_cm_id = NULL; 103562306a36Sopenharmony_ci } 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ciout: 103862306a36Sopenharmony_ci return ret; 103962306a36Sopenharmony_ci} 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci/* 104262306a36Sopenharmony_ci * This is so careful about only cleaning up resources that were built up 104362306a36Sopenharmony_ci * so that it can be called at any point during startup. In fact it 104462306a36Sopenharmony_ci * can be called multiple times for a given connection. 104562306a36Sopenharmony_ci */ 104662306a36Sopenharmony_civoid rds_ib_conn_path_shutdown(struct rds_conn_path *cp) 104762306a36Sopenharmony_ci{ 104862306a36Sopenharmony_ci struct rds_connection *conn = cp->cp_conn; 104962306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 105062306a36Sopenharmony_ci int err = 0; 105162306a36Sopenharmony_ci 105262306a36Sopenharmony_ci rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id, 105362306a36Sopenharmony_ci ic->i_pd, ic->i_send_cq, ic->i_recv_cq, 105462306a36Sopenharmony_ci ic->i_cm_id ? ic->i_cm_id->qp : NULL); 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci if (ic->i_cm_id) { 105762306a36Sopenharmony_ci rdsdebug("disconnecting cm %p\n", ic->i_cm_id); 105862306a36Sopenharmony_ci err = rdma_disconnect(ic->i_cm_id); 105962306a36Sopenharmony_ci if (err) { 106062306a36Sopenharmony_ci /* Actually this may happen quite frequently, when 106162306a36Sopenharmony_ci * an outgoing connect raced with an incoming connect. 106262306a36Sopenharmony_ci */ 106362306a36Sopenharmony_ci rdsdebug("failed to disconnect, cm: %p err %d\n", 106462306a36Sopenharmony_ci ic->i_cm_id, err); 106562306a36Sopenharmony_ci } 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci /* kick off "flush_worker" for all pools in order to reap 106862306a36Sopenharmony_ci * all FRMR registrations that are still marked "FRMR_IS_INUSE" 106962306a36Sopenharmony_ci */ 107062306a36Sopenharmony_ci rds_ib_flush_mrs(); 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci /* 107362306a36Sopenharmony_ci * We want to wait for tx and rx completion to finish 107462306a36Sopenharmony_ci * before we tear down the connection, but we have to be 107562306a36Sopenharmony_ci * careful not to get stuck waiting on a send ring that 107662306a36Sopenharmony_ci * only has unsignaled sends in it. We've shutdown new 107762306a36Sopenharmony_ci * sends before getting here so by waiting for signaled 107862306a36Sopenharmony_ci * sends to complete we're ensured that there will be no 107962306a36Sopenharmony_ci * more tx processing. 108062306a36Sopenharmony_ci */ 108162306a36Sopenharmony_ci wait_event(rds_ib_ring_empty_wait, 108262306a36Sopenharmony_ci rds_ib_ring_empty(&ic->i_recv_ring) && 108362306a36Sopenharmony_ci (atomic_read(&ic->i_signaled_sends) == 0) && 108462306a36Sopenharmony_ci (atomic_read(&ic->i_fastreg_inuse_count) == 0) && 108562306a36Sopenharmony_ci (atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR)); 108662306a36Sopenharmony_ci tasklet_kill(&ic->i_send_tasklet); 108762306a36Sopenharmony_ci tasklet_kill(&ic->i_recv_tasklet); 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_ci atomic_set(&ic->i_cq_quiesce, 1); 109062306a36Sopenharmony_ci 109162306a36Sopenharmony_ci /* first destroy the ib state that generates callbacks */ 109262306a36Sopenharmony_ci if (ic->i_cm_id->qp) 109362306a36Sopenharmony_ci rdma_destroy_qp(ic->i_cm_id); 109462306a36Sopenharmony_ci if (ic->i_send_cq) { 109562306a36Sopenharmony_ci if (ic->rds_ibdev) 109662306a36Sopenharmony_ci ibdev_put_vector(ic->rds_ibdev, ic->i_scq_vector); 109762306a36Sopenharmony_ci ib_destroy_cq(ic->i_send_cq); 109862306a36Sopenharmony_ci } 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci if (ic->i_recv_cq) { 110162306a36Sopenharmony_ci if (ic->rds_ibdev) 110262306a36Sopenharmony_ci ibdev_put_vector(ic->rds_ibdev, ic->i_rcq_vector); 110362306a36Sopenharmony_ci ib_destroy_cq(ic->i_recv_cq); 110462306a36Sopenharmony_ci } 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci if (ic->rds_ibdev) { 110762306a36Sopenharmony_ci /* then free the resources that ib callbacks use */ 110862306a36Sopenharmony_ci if (ic->i_send_hdrs) { 110962306a36Sopenharmony_ci rds_dma_hdrs_free(ic->rds_ibdev, 111062306a36Sopenharmony_ci ic->i_send_hdrs, 111162306a36Sopenharmony_ci ic->i_send_hdrs_dma, 111262306a36Sopenharmony_ci ic->i_send_ring.w_nr, 111362306a36Sopenharmony_ci DMA_TO_DEVICE); 111462306a36Sopenharmony_ci ic->i_send_hdrs = NULL; 111562306a36Sopenharmony_ci ic->i_send_hdrs_dma = NULL; 111662306a36Sopenharmony_ci } 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci if (ic->i_recv_hdrs) { 111962306a36Sopenharmony_ci rds_dma_hdrs_free(ic->rds_ibdev, 112062306a36Sopenharmony_ci ic->i_recv_hdrs, 112162306a36Sopenharmony_ci ic->i_recv_hdrs_dma, 112262306a36Sopenharmony_ci ic->i_recv_ring.w_nr, 112362306a36Sopenharmony_ci DMA_FROM_DEVICE); 112462306a36Sopenharmony_ci ic->i_recv_hdrs = NULL; 112562306a36Sopenharmony_ci ic->i_recv_hdrs_dma = NULL; 112662306a36Sopenharmony_ci } 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci if (ic->i_ack) { 112962306a36Sopenharmony_ci rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack, 113062306a36Sopenharmony_ci ic->i_ack_dma, DMA_TO_DEVICE); 113162306a36Sopenharmony_ci ic->i_ack = NULL; 113262306a36Sopenharmony_ci } 113362306a36Sopenharmony_ci } else { 113462306a36Sopenharmony_ci WARN_ON(ic->i_send_hdrs); 113562306a36Sopenharmony_ci WARN_ON(ic->i_send_hdrs_dma); 113662306a36Sopenharmony_ci WARN_ON(ic->i_recv_hdrs); 113762306a36Sopenharmony_ci WARN_ON(ic->i_recv_hdrs_dma); 113862306a36Sopenharmony_ci WARN_ON(ic->i_ack); 113962306a36Sopenharmony_ci } 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci if (ic->i_sends) 114262306a36Sopenharmony_ci rds_ib_send_clear_ring(ic); 114362306a36Sopenharmony_ci if (ic->i_recvs) 114462306a36Sopenharmony_ci rds_ib_recv_clear_ring(ic); 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci rdma_destroy_id(ic->i_cm_id); 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci /* 114962306a36Sopenharmony_ci * Move connection back to the nodev list. 115062306a36Sopenharmony_ci */ 115162306a36Sopenharmony_ci if (ic->rds_ibdev) 115262306a36Sopenharmony_ci rds_ib_remove_conn(ic->rds_ibdev, conn); 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci ic->i_cm_id = NULL; 115562306a36Sopenharmony_ci ic->i_pd = NULL; 115662306a36Sopenharmony_ci ic->i_send_cq = NULL; 115762306a36Sopenharmony_ci ic->i_recv_cq = NULL; 115862306a36Sopenharmony_ci } 115962306a36Sopenharmony_ci BUG_ON(ic->rds_ibdev); 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci /* Clear pending transmit */ 116262306a36Sopenharmony_ci if (ic->i_data_op) { 116362306a36Sopenharmony_ci struct rds_message *rm; 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_ci rm = container_of(ic->i_data_op, struct rds_message, data); 116662306a36Sopenharmony_ci rds_message_put(rm); 116762306a36Sopenharmony_ci ic->i_data_op = NULL; 116862306a36Sopenharmony_ci } 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci /* Clear the ACK state */ 117162306a36Sopenharmony_ci clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 117262306a36Sopenharmony_ci#ifdef KERNEL_HAS_ATOMIC64 117362306a36Sopenharmony_ci atomic64_set(&ic->i_ack_next, 0); 117462306a36Sopenharmony_ci#else 117562306a36Sopenharmony_ci ic->i_ack_next = 0; 117662306a36Sopenharmony_ci#endif 117762306a36Sopenharmony_ci ic->i_ack_recv = 0; 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci /* Clear flow control state */ 118062306a36Sopenharmony_ci ic->i_flowctl = 0; 118162306a36Sopenharmony_ci atomic_set(&ic->i_credits, 0); 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci /* Re-init rings, but retain sizes. */ 118462306a36Sopenharmony_ci rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr); 118562306a36Sopenharmony_ci rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr); 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_ci if (ic->i_ibinc) { 118862306a36Sopenharmony_ci rds_inc_put(&ic->i_ibinc->ii_inc); 118962306a36Sopenharmony_ci ic->i_ibinc = NULL; 119062306a36Sopenharmony_ci } 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci vfree(ic->i_sends); 119362306a36Sopenharmony_ci ic->i_sends = NULL; 119462306a36Sopenharmony_ci vfree(ic->i_recvs); 119562306a36Sopenharmony_ci ic->i_recvs = NULL; 119662306a36Sopenharmony_ci ic->i_active_side = false; 119762306a36Sopenharmony_ci} 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_ciint rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) 120062306a36Sopenharmony_ci{ 120162306a36Sopenharmony_ci struct rds_ib_connection *ic; 120262306a36Sopenharmony_ci unsigned long flags; 120362306a36Sopenharmony_ci int ret; 120462306a36Sopenharmony_ci 120562306a36Sopenharmony_ci /* XXX too lazy? */ 120662306a36Sopenharmony_ci ic = kzalloc(sizeof(struct rds_ib_connection), gfp); 120762306a36Sopenharmony_ci if (!ic) 120862306a36Sopenharmony_ci return -ENOMEM; 120962306a36Sopenharmony_ci 121062306a36Sopenharmony_ci ret = rds_ib_recv_alloc_caches(ic, gfp); 121162306a36Sopenharmony_ci if (ret) { 121262306a36Sopenharmony_ci kfree(ic); 121362306a36Sopenharmony_ci return ret; 121462306a36Sopenharmony_ci } 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci INIT_LIST_HEAD(&ic->ib_node); 121762306a36Sopenharmony_ci tasklet_init(&ic->i_send_tasklet, rds_ib_tasklet_fn_send, 121862306a36Sopenharmony_ci (unsigned long)ic); 121962306a36Sopenharmony_ci tasklet_init(&ic->i_recv_tasklet, rds_ib_tasklet_fn_recv, 122062306a36Sopenharmony_ci (unsigned long)ic); 122162306a36Sopenharmony_ci mutex_init(&ic->i_recv_mutex); 122262306a36Sopenharmony_ci#ifndef KERNEL_HAS_ATOMIC64 122362306a36Sopenharmony_ci spin_lock_init(&ic->i_ack_lock); 122462306a36Sopenharmony_ci#endif 122562306a36Sopenharmony_ci atomic_set(&ic->i_signaled_sends, 0); 122662306a36Sopenharmony_ci atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR); 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_ci /* 122962306a36Sopenharmony_ci * rds_ib_conn_shutdown() waits for these to be emptied so they 123062306a36Sopenharmony_ci * must be initialized before it can be called. 123162306a36Sopenharmony_ci */ 123262306a36Sopenharmony_ci rds_ib_ring_init(&ic->i_send_ring, 0); 123362306a36Sopenharmony_ci rds_ib_ring_init(&ic->i_recv_ring, 0); 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci ic->conn = conn; 123662306a36Sopenharmony_ci conn->c_transport_data = ic; 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci spin_lock_irqsave(&ib_nodev_conns_lock, flags); 123962306a36Sopenharmony_ci list_add_tail(&ic->ib_node, &ib_nodev_conns); 124062306a36Sopenharmony_ci spin_unlock_irqrestore(&ib_nodev_conns_lock, flags); 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci 124362306a36Sopenharmony_ci rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data); 124462306a36Sopenharmony_ci return 0; 124562306a36Sopenharmony_ci} 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_ci/* 124862306a36Sopenharmony_ci * Free a connection. Connection must be shut down and not set for reconnect. 124962306a36Sopenharmony_ci */ 125062306a36Sopenharmony_civoid rds_ib_conn_free(void *arg) 125162306a36Sopenharmony_ci{ 125262306a36Sopenharmony_ci struct rds_ib_connection *ic = arg; 125362306a36Sopenharmony_ci spinlock_t *lock_ptr; 125462306a36Sopenharmony_ci 125562306a36Sopenharmony_ci rdsdebug("ic %p\n", ic); 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci /* 125862306a36Sopenharmony_ci * Conn is either on a dev's list or on the nodev list. 125962306a36Sopenharmony_ci * A race with shutdown() or connect() would cause problems 126062306a36Sopenharmony_ci * (since rds_ibdev would change) but that should never happen. 126162306a36Sopenharmony_ci */ 126262306a36Sopenharmony_ci lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock; 126362306a36Sopenharmony_ci 126462306a36Sopenharmony_ci spin_lock_irq(lock_ptr); 126562306a36Sopenharmony_ci list_del(&ic->ib_node); 126662306a36Sopenharmony_ci spin_unlock_irq(lock_ptr); 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci rds_ib_recv_free_caches(ic); 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci kfree(ic); 127162306a36Sopenharmony_ci} 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci/* 127562306a36Sopenharmony_ci * An error occurred on the connection 127662306a36Sopenharmony_ci */ 127762306a36Sopenharmony_civoid 127862306a36Sopenharmony_ci__rds_ib_conn_error(struct rds_connection *conn, const char *fmt, ...) 127962306a36Sopenharmony_ci{ 128062306a36Sopenharmony_ci va_list ap; 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci rds_conn_drop(conn); 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci va_start(ap, fmt); 128562306a36Sopenharmony_ci vprintk(fmt, ap); 128662306a36Sopenharmony_ci va_end(ap); 128762306a36Sopenharmony_ci} 1288