18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (c) 2006 Mellanox Technologies. All rights reserved
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two
58c2ecf20Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the
88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below:
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
118c2ecf20Sopenharmony_ci *     without modification, are permitted provided that the following
128c2ecf20Sopenharmony_ci *     conditions are met:
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci *      - Redistributions of source code must retain the above
158c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
168c2ecf20Sopenharmony_ci *        disclaimer.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
198c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
208c2ecf20Sopenharmony_ci *        disclaimer in the documentation and/or other materials
218c2ecf20Sopenharmony_ci *        provided with the distribution.
228c2ecf20Sopenharmony_ci *
238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
308c2ecf20Sopenharmony_ci * SOFTWARE.
318c2ecf20Sopenharmony_ci */
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#include <rdma/ib_cm.h>
348c2ecf20Sopenharmony_ci#include <net/dst.h>
358c2ecf20Sopenharmony_ci#include <net/icmp.h>
368c2ecf20Sopenharmony_ci#include <linux/icmpv6.h>
378c2ecf20Sopenharmony_ci#include <linux/delay.h>
388c2ecf20Sopenharmony_ci#include <linux/slab.h>
398c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
408c2ecf20Sopenharmony_ci#include <linux/moduleparam.h>
418c2ecf20Sopenharmony_ci#include <linux/sched/signal.h>
428c2ecf20Sopenharmony_ci#include <linux/sched/mm.h>
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci#include "ipoib.h"
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ciint ipoib_max_conn_qp = 128;
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_cimodule_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444);
498c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_nonsrq_conn_qp,
508c2ecf20Sopenharmony_ci		 "Max number of connected-mode QPs per interface "
518c2ecf20Sopenharmony_ci		 "(applied only if shared receive queue is not available)");
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
548c2ecf20Sopenharmony_cistatic int data_debug_level;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_cimodule_param_named(cm_data_debug_level, data_debug_level, int, 0644);
578c2ecf20Sopenharmony_ciMODULE_PARM_DESC(cm_data_debug_level,
588c2ecf20Sopenharmony_ci		 "Enable data path debug tracing for connected mode if > 0");
598c2ecf20Sopenharmony_ci#endif
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci#define IPOIB_CM_IETF_ID 0x1000000000000000ULL
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ)
648c2ecf20Sopenharmony_ci#define IPOIB_CM_RX_TIMEOUT     (2 * 256 * HZ)
658c2ecf20Sopenharmony_ci#define IPOIB_CM_RX_DELAY       (3 * 256 * HZ)
668c2ecf20Sopenharmony_ci#define IPOIB_CM_RX_UPDATE_MASK (0x3)
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci#define IPOIB_CM_RX_RESERVE     (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN)
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_cistatic struct ib_qp_attr ipoib_cm_err_attr = {
718c2ecf20Sopenharmony_ci	.qp_state = IB_QPS_ERR
728c2ecf20Sopenharmony_ci};
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_cistatic struct ib_send_wr ipoib_cm_rx_drain_wr = {
778c2ecf20Sopenharmony_ci	.opcode = IB_WR_SEND,
788c2ecf20Sopenharmony_ci};
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_cistatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
818c2ecf20Sopenharmony_ci			       const struct ib_cm_event *event);
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_cistatic void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
848c2ecf20Sopenharmony_ci				  u64 mapping[IPOIB_CM_RX_SG])
858c2ecf20Sopenharmony_ci{
868c2ecf20Sopenharmony_ci	int i;
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	for (i = 0; i < frags; ++i)
918c2ecf20Sopenharmony_ci		ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
928c2ecf20Sopenharmony_ci}
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_cistatic int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
958c2ecf20Sopenharmony_ci{
968c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
978c2ecf20Sopenharmony_ci	int i, ret;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	for (i = 0; i < priv->cm.num_frags; ++i)
1028c2ecf20Sopenharmony_ci		priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, NULL);
1058c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
1068c2ecf20Sopenharmony_ci		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
1078c2ecf20Sopenharmony_ci		ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
1088c2ecf20Sopenharmony_ci				      priv->cm.srq_ring[id].mapping);
1098c2ecf20Sopenharmony_ci		dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
1108c2ecf20Sopenharmony_ci		priv->cm.srq_ring[id].skb = NULL;
1118c2ecf20Sopenharmony_ci	}
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	return ret;
1148c2ecf20Sopenharmony_ci}
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_cistatic int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
1178c2ecf20Sopenharmony_ci					struct ipoib_cm_rx *rx,
1188c2ecf20Sopenharmony_ci					struct ib_recv_wr *wr,
1198c2ecf20Sopenharmony_ci					struct ib_sge *sge, int id)
1208c2ecf20Sopenharmony_ci{
1218c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1228c2ecf20Sopenharmony_ci	int i, ret;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	for (i = 0; i < IPOIB_CM_RX_SG; ++i)
1278c2ecf20Sopenharmony_ci		sge[i].addr = rx->rx_ring[id].mapping[i];
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	ret = ib_post_recv(rx->qp, wr, NULL);
1308c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
1318c2ecf20Sopenharmony_ci		ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
1328c2ecf20Sopenharmony_ci		ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
1338c2ecf20Sopenharmony_ci				      rx->rx_ring[id].mapping);
1348c2ecf20Sopenharmony_ci		dev_kfree_skb_any(rx->rx_ring[id].skb);
1358c2ecf20Sopenharmony_ci		rx->rx_ring[id].skb = NULL;
1368c2ecf20Sopenharmony_ci	}
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	return ret;
1398c2ecf20Sopenharmony_ci}
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_cistatic struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
1428c2ecf20Sopenharmony_ci					     struct ipoib_cm_rx_buf *rx_ring,
1438c2ecf20Sopenharmony_ci					     int id, int frags,
1448c2ecf20Sopenharmony_ci					     u64 mapping[IPOIB_CM_RX_SG],
1458c2ecf20Sopenharmony_ci					     gfp_t gfp)
1468c2ecf20Sopenharmony_ci{
1478c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1488c2ecf20Sopenharmony_ci	struct sk_buff *skb;
1498c2ecf20Sopenharmony_ci	int i;
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16));
1528c2ecf20Sopenharmony_ci	if (unlikely(!skb))
1538c2ecf20Sopenharmony_ci		return NULL;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	/*
1568c2ecf20Sopenharmony_ci	 * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the
1578c2ecf20Sopenharmony_ci	 * IP header to a multiple of 16.
1588c2ecf20Sopenharmony_ci	 */
1598c2ecf20Sopenharmony_ci	skb_reserve(skb, IPOIB_CM_RX_RESERVE);
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
1628c2ecf20Sopenharmony_ci				       DMA_FROM_DEVICE);
1638c2ecf20Sopenharmony_ci	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
1648c2ecf20Sopenharmony_ci		dev_kfree_skb_any(skb);
1658c2ecf20Sopenharmony_ci		return NULL;
1668c2ecf20Sopenharmony_ci	}
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	for (i = 0; i < frags; i++) {
1698c2ecf20Sopenharmony_ci		struct page *page = alloc_page(gfp);
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci		if (!page)
1728c2ecf20Sopenharmony_ci			goto partial_error;
1738c2ecf20Sopenharmony_ci		skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci		mapping[i + 1] = ib_dma_map_page(priv->ca, page,
1768c2ecf20Sopenharmony_ci						 0, PAGE_SIZE, DMA_FROM_DEVICE);
1778c2ecf20Sopenharmony_ci		if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
1788c2ecf20Sopenharmony_ci			goto partial_error;
1798c2ecf20Sopenharmony_ci	}
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci	rx_ring[id].skb = skb;
1828c2ecf20Sopenharmony_ci	return skb;
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_cipartial_error:
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	for (; i > 0; --i)
1898c2ecf20Sopenharmony_ci		ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	dev_kfree_skb_any(skb);
1928c2ecf20Sopenharmony_ci	return NULL;
1938c2ecf20Sopenharmony_ci}
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_cistatic void ipoib_cm_free_rx_ring(struct net_device *dev,
1968c2ecf20Sopenharmony_ci				  struct ipoib_cm_rx_buf *rx_ring)
1978c2ecf20Sopenharmony_ci{
1988c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
1998c2ecf20Sopenharmony_ci	int i;
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	for (i = 0; i < ipoib_recvq_size; ++i)
2028c2ecf20Sopenharmony_ci		if (rx_ring[i].skb) {
2038c2ecf20Sopenharmony_ci			ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
2048c2ecf20Sopenharmony_ci					      rx_ring[i].mapping);
2058c2ecf20Sopenharmony_ci			dev_kfree_skb_any(rx_ring[i].skb);
2068c2ecf20Sopenharmony_ci		}
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	vfree(rx_ring);
2098c2ecf20Sopenharmony_ci}
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_cistatic void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
2128c2ecf20Sopenharmony_ci{
2138c2ecf20Sopenharmony_ci	struct ipoib_cm_rx *p;
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci	/* We only reserved 1 extra slot in CQ for drain WRs, so
2168c2ecf20Sopenharmony_ci	 * make sure we have at most 1 outstanding WR. */
2178c2ecf20Sopenharmony_ci	if (list_empty(&priv->cm.rx_flush_list) ||
2188c2ecf20Sopenharmony_ci	    !list_empty(&priv->cm.rx_drain_list))
2198c2ecf20Sopenharmony_ci		return;
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	/*
2228c2ecf20Sopenharmony_ci	 * QPs on flush list are error state.  This way, a "flush
2238c2ecf20Sopenharmony_ci	 * error" WC will be immediately generated for each WR we post.
2248c2ecf20Sopenharmony_ci	 */
2258c2ecf20Sopenharmony_ci	p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
2268c2ecf20Sopenharmony_ci	ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
2278c2ecf20Sopenharmony_ci	if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL))
2288c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to post drain wr\n");
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
2318c2ecf20Sopenharmony_ci}
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_cistatic void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
2348c2ecf20Sopenharmony_ci{
2358c2ecf20Sopenharmony_ci	struct ipoib_cm_rx *p = ctx;
2368c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
2378c2ecf20Sopenharmony_ci	unsigned long flags;
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
2408c2ecf20Sopenharmony_ci		return;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	spin_lock_irqsave(&priv->lock, flags);
2438c2ecf20Sopenharmony_ci	list_move(&p->list, &priv->cm.rx_flush_list);
2448c2ecf20Sopenharmony_ci	p->state = IPOIB_CM_RX_FLUSH;
2458c2ecf20Sopenharmony_ci	ipoib_cm_start_rx_drain(priv);
2468c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&priv->lock, flags);
2478c2ecf20Sopenharmony_ci}
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_cistatic struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
2508c2ecf20Sopenharmony_ci					   struct ipoib_cm_rx *p)
2518c2ecf20Sopenharmony_ci{
2528c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
2538c2ecf20Sopenharmony_ci	struct ib_qp_init_attr attr = {
2548c2ecf20Sopenharmony_ci		.event_handler = ipoib_cm_rx_event_handler,
2558c2ecf20Sopenharmony_ci		.send_cq = priv->recv_cq, /* For drain WR */
2568c2ecf20Sopenharmony_ci		.recv_cq = priv->recv_cq,
2578c2ecf20Sopenharmony_ci		.srq = priv->cm.srq,
2588c2ecf20Sopenharmony_ci		.cap.max_send_wr = 1, /* For drain WR */
2598c2ecf20Sopenharmony_ci		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
2608c2ecf20Sopenharmony_ci		.sq_sig_type = IB_SIGNAL_ALL_WR,
2618c2ecf20Sopenharmony_ci		.qp_type = IB_QPT_RC,
2628c2ecf20Sopenharmony_ci		.qp_context = p,
2638c2ecf20Sopenharmony_ci	};
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci	if (!ipoib_cm_has_srq(dev)) {
2668c2ecf20Sopenharmony_ci		attr.cap.max_recv_wr  = ipoib_recvq_size;
2678c2ecf20Sopenharmony_ci		attr.cap.max_recv_sge = IPOIB_CM_RX_SG;
2688c2ecf20Sopenharmony_ci	}
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci	return ib_create_qp(priv->pd, &attr);
2718c2ecf20Sopenharmony_ci}
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_cistatic int ipoib_cm_modify_rx_qp(struct net_device *dev,
2748c2ecf20Sopenharmony_ci				 struct ib_cm_id *cm_id, struct ib_qp *qp,
2758c2ecf20Sopenharmony_ci				 unsigned int psn)
2768c2ecf20Sopenharmony_ci{
2778c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
2788c2ecf20Sopenharmony_ci	struct ib_qp_attr qp_attr;
2798c2ecf20Sopenharmony_ci	int qp_attr_mask, ret;
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci	qp_attr.qp_state = IB_QPS_INIT;
2828c2ecf20Sopenharmony_ci	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
2838c2ecf20Sopenharmony_ci	if (ret) {
2848c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
2858c2ecf20Sopenharmony_ci		return ret;
2868c2ecf20Sopenharmony_ci	}
2878c2ecf20Sopenharmony_ci	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
2888c2ecf20Sopenharmony_ci	if (ret) {
2898c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret);
2908c2ecf20Sopenharmony_ci		return ret;
2918c2ecf20Sopenharmony_ci	}
2928c2ecf20Sopenharmony_ci	qp_attr.qp_state = IB_QPS_RTR;
2938c2ecf20Sopenharmony_ci	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
2948c2ecf20Sopenharmony_ci	if (ret) {
2958c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
2968c2ecf20Sopenharmony_ci		return ret;
2978c2ecf20Sopenharmony_ci	}
2988c2ecf20Sopenharmony_ci	qp_attr.rq_psn = psn;
2998c2ecf20Sopenharmony_ci	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
3008c2ecf20Sopenharmony_ci	if (ret) {
3018c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
3028c2ecf20Sopenharmony_ci		return ret;
3038c2ecf20Sopenharmony_ci	}
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	/*
3068c2ecf20Sopenharmony_ci	 * Current Mellanox HCA firmware won't generate completions
3078c2ecf20Sopenharmony_ci	 * with error for drain WRs unless the QP has been moved to
3088c2ecf20Sopenharmony_ci	 * RTS first. This work-around leaves a window where a QP has
3098c2ecf20Sopenharmony_ci	 * moved to error asynchronously, but this will eventually get
3108c2ecf20Sopenharmony_ci	 * fixed in firmware, so let's not error out if modify QP
3118c2ecf20Sopenharmony_ci	 * fails.
3128c2ecf20Sopenharmony_ci	 */
3138c2ecf20Sopenharmony_ci	qp_attr.qp_state = IB_QPS_RTS;
3148c2ecf20Sopenharmony_ci	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
3158c2ecf20Sopenharmony_ci	if (ret) {
3168c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
3178c2ecf20Sopenharmony_ci		return 0;
3188c2ecf20Sopenharmony_ci	}
3198c2ecf20Sopenharmony_ci	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
3208c2ecf20Sopenharmony_ci	if (ret) {
3218c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
3228c2ecf20Sopenharmony_ci		return 0;
3238c2ecf20Sopenharmony_ci	}
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_ci	return 0;
3268c2ecf20Sopenharmony_ci}
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_cistatic void ipoib_cm_init_rx_wr(struct net_device *dev,
3298c2ecf20Sopenharmony_ci				struct ib_recv_wr *wr,
3308c2ecf20Sopenharmony_ci				struct ib_sge *sge)
3318c2ecf20Sopenharmony_ci{
3328c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
3338c2ecf20Sopenharmony_ci	int i;
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci	for (i = 0; i < priv->cm.num_frags; ++i)
3368c2ecf20Sopenharmony_ci		sge[i].lkey = priv->pd->local_dma_lkey;
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci	sge[0].length = IPOIB_CM_HEAD_SIZE;
3398c2ecf20Sopenharmony_ci	for (i = 1; i < priv->cm.num_frags; ++i)
3408c2ecf20Sopenharmony_ci		sge[i].length = PAGE_SIZE;
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ci	wr->next    = NULL;
3438c2ecf20Sopenharmony_ci	wr->sg_list = sge;
3448c2ecf20Sopenharmony_ci	wr->num_sge = priv->cm.num_frags;
3458c2ecf20Sopenharmony_ci}
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_cistatic int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
3488c2ecf20Sopenharmony_ci				   struct ipoib_cm_rx *rx)
3498c2ecf20Sopenharmony_ci{
3508c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
3518c2ecf20Sopenharmony_ci	struct {
3528c2ecf20Sopenharmony_ci		struct ib_recv_wr wr;
3538c2ecf20Sopenharmony_ci		struct ib_sge sge[IPOIB_CM_RX_SG];
3548c2ecf20Sopenharmony_ci	} *t;
3558c2ecf20Sopenharmony_ci	int ret;
3568c2ecf20Sopenharmony_ci	int i;
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_ci	rx->rx_ring = vzalloc(array_size(ipoib_recvq_size,
3598c2ecf20Sopenharmony_ci					 sizeof(*rx->rx_ring)));
3608c2ecf20Sopenharmony_ci	if (!rx->rx_ring)
3618c2ecf20Sopenharmony_ci		return -ENOMEM;
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci	t = kmalloc(sizeof(*t), GFP_KERNEL);
3648c2ecf20Sopenharmony_ci	if (!t) {
3658c2ecf20Sopenharmony_ci		ret = -ENOMEM;
3668c2ecf20Sopenharmony_ci		goto err_free_1;
3678c2ecf20Sopenharmony_ci	}
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci	ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	spin_lock_irq(&priv->lock);
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
3748c2ecf20Sopenharmony_ci		spin_unlock_irq(&priv->lock);
3758c2ecf20Sopenharmony_ci		ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0);
3768c2ecf20Sopenharmony_ci		ret = -EINVAL;
3778c2ecf20Sopenharmony_ci		goto err_free;
3788c2ecf20Sopenharmony_ci	} else
3798c2ecf20Sopenharmony_ci		++priv->cm.nonsrq_conn_qp;
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci	spin_unlock_irq(&priv->lock);
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci	for (i = 0; i < ipoib_recvq_size; ++i) {
3848c2ecf20Sopenharmony_ci		if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
3858c2ecf20Sopenharmony_ci					   rx->rx_ring[i].mapping,
3868c2ecf20Sopenharmony_ci					   GFP_KERNEL)) {
3878c2ecf20Sopenharmony_ci			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
3888c2ecf20Sopenharmony_ci			ret = -ENOMEM;
3898c2ecf20Sopenharmony_ci			goto err_count;
3908c2ecf20Sopenharmony_ci		}
3918c2ecf20Sopenharmony_ci		ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
3928c2ecf20Sopenharmony_ci		if (ret) {
3938c2ecf20Sopenharmony_ci			ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
3948c2ecf20Sopenharmony_ci				   "failed for buf %d\n", i);
3958c2ecf20Sopenharmony_ci			ret = -EIO;
3968c2ecf20Sopenharmony_ci			goto err_count;
3978c2ecf20Sopenharmony_ci		}
3988c2ecf20Sopenharmony_ci	}
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	rx->recv_count = ipoib_recvq_size;
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci	kfree(t);
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	return 0;
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_cierr_count:
4078c2ecf20Sopenharmony_ci	spin_lock_irq(&priv->lock);
4088c2ecf20Sopenharmony_ci	--priv->cm.nonsrq_conn_qp;
4098c2ecf20Sopenharmony_ci	spin_unlock_irq(&priv->lock);
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_cierr_free:
4128c2ecf20Sopenharmony_ci	kfree(t);
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_cierr_free_1:
4158c2ecf20Sopenharmony_ci	ipoib_cm_free_rx_ring(dev, rx->rx_ring);
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	return ret;
4188c2ecf20Sopenharmony_ci}
4198c2ecf20Sopenharmony_ci
4208c2ecf20Sopenharmony_cistatic int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
4218c2ecf20Sopenharmony_ci			     struct ib_qp *qp,
4228c2ecf20Sopenharmony_ci			     const struct ib_cm_req_event_param *req,
4238c2ecf20Sopenharmony_ci			     unsigned int psn)
4248c2ecf20Sopenharmony_ci{
4258c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
4268c2ecf20Sopenharmony_ci	struct ipoib_cm_data data = {};
4278c2ecf20Sopenharmony_ci	struct ib_cm_rep_param rep = {};
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_ci	data.qpn = cpu_to_be32(priv->qp->qp_num);
4308c2ecf20Sopenharmony_ci	data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci	rep.private_data = &data;
4338c2ecf20Sopenharmony_ci	rep.private_data_len = sizeof(data);
4348c2ecf20Sopenharmony_ci	rep.flow_control = 0;
4358c2ecf20Sopenharmony_ci	rep.rnr_retry_count = req->rnr_retry_count;
4368c2ecf20Sopenharmony_ci	rep.srq = ipoib_cm_has_srq(dev);
4378c2ecf20Sopenharmony_ci	rep.qp_num = qp->qp_num;
4388c2ecf20Sopenharmony_ci	rep.starting_psn = psn;
4398c2ecf20Sopenharmony_ci	return ib_send_cm_rep(cm_id, &rep);
4408c2ecf20Sopenharmony_ci}
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_cistatic int ipoib_cm_req_handler(struct ib_cm_id *cm_id,
4438c2ecf20Sopenharmony_ci				const struct ib_cm_event *event)
4448c2ecf20Sopenharmony_ci{
4458c2ecf20Sopenharmony_ci	struct net_device *dev = cm_id->context;
4468c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
4478c2ecf20Sopenharmony_ci	struct ipoib_cm_rx *p;
4488c2ecf20Sopenharmony_ci	unsigned int psn;
4498c2ecf20Sopenharmony_ci	int ret;
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	ipoib_dbg(priv, "REQ arrived\n");
4528c2ecf20Sopenharmony_ci	p = kzalloc(sizeof(*p), GFP_KERNEL);
4538c2ecf20Sopenharmony_ci	if (!p)
4548c2ecf20Sopenharmony_ci		return -ENOMEM;
4558c2ecf20Sopenharmony_ci	p->dev = dev;
4568c2ecf20Sopenharmony_ci	p->id = cm_id;
4578c2ecf20Sopenharmony_ci	cm_id->context = p;
4588c2ecf20Sopenharmony_ci	p->state = IPOIB_CM_RX_LIVE;
4598c2ecf20Sopenharmony_ci	p->jiffies = jiffies;
4608c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&p->list);
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci	p->qp = ipoib_cm_create_rx_qp(dev, p);
4638c2ecf20Sopenharmony_ci	if (IS_ERR(p->qp)) {
4648c2ecf20Sopenharmony_ci		ret = PTR_ERR(p->qp);
4658c2ecf20Sopenharmony_ci		goto err_qp;
4668c2ecf20Sopenharmony_ci	}
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci	psn = prandom_u32() & 0xffffff;
4698c2ecf20Sopenharmony_ci	ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
4708c2ecf20Sopenharmony_ci	if (ret)
4718c2ecf20Sopenharmony_ci		goto err_modify;
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci	if (!ipoib_cm_has_srq(dev)) {
4748c2ecf20Sopenharmony_ci		ret = ipoib_cm_nonsrq_init_rx(dev, cm_id, p);
4758c2ecf20Sopenharmony_ci		if (ret)
4768c2ecf20Sopenharmony_ci			goto err_modify;
4778c2ecf20Sopenharmony_ci	}
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci	spin_lock_irq(&priv->lock);
4808c2ecf20Sopenharmony_ci	queue_delayed_work(priv->wq,
4818c2ecf20Sopenharmony_ci			   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
4828c2ecf20Sopenharmony_ci	/* Add this entry to passive ids list head, but do not re-add it
4838c2ecf20Sopenharmony_ci	 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
4848c2ecf20Sopenharmony_ci	p->jiffies = jiffies;
4858c2ecf20Sopenharmony_ci	if (p->state == IPOIB_CM_RX_LIVE)
4868c2ecf20Sopenharmony_ci		list_move(&p->list, &priv->cm.passive_ids);
4878c2ecf20Sopenharmony_ci	spin_unlock_irq(&priv->lock);
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci	ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
4908c2ecf20Sopenharmony_ci	if (ret) {
4918c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to send REP: %d\n", ret);
4928c2ecf20Sopenharmony_ci		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
4938c2ecf20Sopenharmony_ci			ipoib_warn(priv, "unable to move qp to error state\n");
4948c2ecf20Sopenharmony_ci	}
4958c2ecf20Sopenharmony_ci	return 0;
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_cierr_modify:
4988c2ecf20Sopenharmony_ci	ib_destroy_qp(p->qp);
4998c2ecf20Sopenharmony_cierr_qp:
5008c2ecf20Sopenharmony_ci	kfree(p);
5018c2ecf20Sopenharmony_ci	return ret;
5028c2ecf20Sopenharmony_ci}
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_cistatic int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
5058c2ecf20Sopenharmony_ci			       const struct ib_cm_event *event)
5068c2ecf20Sopenharmony_ci{
5078c2ecf20Sopenharmony_ci	struct ipoib_cm_rx *p;
5088c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv;
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci	switch (event->event) {
5118c2ecf20Sopenharmony_ci	case IB_CM_REQ_RECEIVED:
5128c2ecf20Sopenharmony_ci		return ipoib_cm_req_handler(cm_id, event);
5138c2ecf20Sopenharmony_ci	case IB_CM_DREQ_RECEIVED:
5148c2ecf20Sopenharmony_ci		ib_send_cm_drep(cm_id, NULL, 0);
5158c2ecf20Sopenharmony_ci		fallthrough;
5168c2ecf20Sopenharmony_ci	case IB_CM_REJ_RECEIVED:
5178c2ecf20Sopenharmony_ci		p = cm_id->context;
5188c2ecf20Sopenharmony_ci		priv = ipoib_priv(p->dev);
5198c2ecf20Sopenharmony_ci		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
5208c2ecf20Sopenharmony_ci			ipoib_warn(priv, "unable to move qp to error state\n");
5218c2ecf20Sopenharmony_ci		fallthrough;
5228c2ecf20Sopenharmony_ci	default:
5238c2ecf20Sopenharmony_ci		return 0;
5248c2ecf20Sopenharmony_ci	}
5258c2ecf20Sopenharmony_ci}
5268c2ecf20Sopenharmony_ci/* Adjust length of skb with fragments to match received data */
5278c2ecf20Sopenharmony_cistatic void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
5288c2ecf20Sopenharmony_ci			  unsigned int length, struct sk_buff *toskb)
5298c2ecf20Sopenharmony_ci{
5308c2ecf20Sopenharmony_ci	int i, num_frags;
5318c2ecf20Sopenharmony_ci	unsigned int size;
5328c2ecf20Sopenharmony_ci
5338c2ecf20Sopenharmony_ci	/* put header into skb */
5348c2ecf20Sopenharmony_ci	size = min(length, hdr_space);
5358c2ecf20Sopenharmony_ci	skb->tail += size;
5368c2ecf20Sopenharmony_ci	skb->len += size;
5378c2ecf20Sopenharmony_ci	length -= size;
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	num_frags = skb_shinfo(skb)->nr_frags;
5408c2ecf20Sopenharmony_ci	for (i = 0; i < num_frags; i++) {
5418c2ecf20Sopenharmony_ci		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ci		if (length == 0) {
5448c2ecf20Sopenharmony_ci			/* don't need this page */
5458c2ecf20Sopenharmony_ci			skb_fill_page_desc(toskb, i, skb_frag_page(frag),
5468c2ecf20Sopenharmony_ci					   0, PAGE_SIZE);
5478c2ecf20Sopenharmony_ci			--skb_shinfo(skb)->nr_frags;
5488c2ecf20Sopenharmony_ci		} else {
5498c2ecf20Sopenharmony_ci			size = min_t(unsigned int, length, PAGE_SIZE);
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_ci			skb_frag_size_set(frag, size);
5528c2ecf20Sopenharmony_ci			skb->data_len += size;
5538c2ecf20Sopenharmony_ci			skb->truesize += size;
5548c2ecf20Sopenharmony_ci			skb->len += size;
5558c2ecf20Sopenharmony_ci			length -= size;
5568c2ecf20Sopenharmony_ci		}
5578c2ecf20Sopenharmony_ci	}
5588c2ecf20Sopenharmony_ci}
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_civoid ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
5618c2ecf20Sopenharmony_ci{
5628c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
5638c2ecf20Sopenharmony_ci	struct ipoib_cm_rx_buf *rx_ring;
5648c2ecf20Sopenharmony_ci	unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
5658c2ecf20Sopenharmony_ci	struct sk_buff *skb, *newskb;
5668c2ecf20Sopenharmony_ci	struct ipoib_cm_rx *p;
5678c2ecf20Sopenharmony_ci	unsigned long flags;
5688c2ecf20Sopenharmony_ci	u64 mapping[IPOIB_CM_RX_SG];
5698c2ecf20Sopenharmony_ci	int frags;
5708c2ecf20Sopenharmony_ci	int has_srq;
5718c2ecf20Sopenharmony_ci	struct sk_buff *small_skb;
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
5748c2ecf20Sopenharmony_ci		       wr_id, wc->status);
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci	if (unlikely(wr_id >= ipoib_recvq_size)) {
5778c2ecf20Sopenharmony_ci		if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
5788c2ecf20Sopenharmony_ci			spin_lock_irqsave(&priv->lock, flags);
5798c2ecf20Sopenharmony_ci			list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
5808c2ecf20Sopenharmony_ci			ipoib_cm_start_rx_drain(priv);
5818c2ecf20Sopenharmony_ci			queue_work(priv->wq, &priv->cm.rx_reap_task);
5828c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&priv->lock, flags);
5838c2ecf20Sopenharmony_ci		} else
5848c2ecf20Sopenharmony_ci			ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
5858c2ecf20Sopenharmony_ci				   wr_id, ipoib_recvq_size);
5868c2ecf20Sopenharmony_ci		return;
5878c2ecf20Sopenharmony_ci	}
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	p = wc->qp->qp_context;
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci	has_srq = ipoib_cm_has_srq(dev);
5928c2ecf20Sopenharmony_ci	rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring;
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci	skb = rx_ring[wr_id].skb;
5958c2ecf20Sopenharmony_ci
5968c2ecf20Sopenharmony_ci	if (unlikely(wc->status != IB_WC_SUCCESS)) {
5978c2ecf20Sopenharmony_ci		ipoib_dbg(priv,
5988c2ecf20Sopenharmony_ci			  "cm recv error (status=%d, wrid=%d vend_err %#x)\n",
5998c2ecf20Sopenharmony_ci			  wc->status, wr_id, wc->vendor_err);
6008c2ecf20Sopenharmony_ci		++dev->stats.rx_dropped;
6018c2ecf20Sopenharmony_ci		if (has_srq)
6028c2ecf20Sopenharmony_ci			goto repost;
6038c2ecf20Sopenharmony_ci		else {
6048c2ecf20Sopenharmony_ci			if (!--p->recv_count) {
6058c2ecf20Sopenharmony_ci				spin_lock_irqsave(&priv->lock, flags);
6068c2ecf20Sopenharmony_ci				list_move(&p->list, &priv->cm.rx_reap_list);
6078c2ecf20Sopenharmony_ci				spin_unlock_irqrestore(&priv->lock, flags);
6088c2ecf20Sopenharmony_ci				queue_work(priv->wq, &priv->cm.rx_reap_task);
6098c2ecf20Sopenharmony_ci			}
6108c2ecf20Sopenharmony_ci			return;
6118c2ecf20Sopenharmony_ci		}
6128c2ecf20Sopenharmony_ci	}
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_ci	if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
6158c2ecf20Sopenharmony_ci		if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
6168c2ecf20Sopenharmony_ci			spin_lock_irqsave(&priv->lock, flags);
6178c2ecf20Sopenharmony_ci			p->jiffies = jiffies;
6188c2ecf20Sopenharmony_ci			/* Move this entry to list head, but do not re-add it
6198c2ecf20Sopenharmony_ci			 * if it has been moved out of list. */
6208c2ecf20Sopenharmony_ci			if (p->state == IPOIB_CM_RX_LIVE)
6218c2ecf20Sopenharmony_ci				list_move(&p->list, &priv->cm.passive_ids);
6228c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&priv->lock, flags);
6238c2ecf20Sopenharmony_ci		}
6248c2ecf20Sopenharmony_ci	}
6258c2ecf20Sopenharmony_ci
6268c2ecf20Sopenharmony_ci	if (wc->byte_len < IPOIB_CM_COPYBREAK) {
6278c2ecf20Sopenharmony_ci		int dlen = wc->byte_len;
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci		small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE);
6308c2ecf20Sopenharmony_ci		if (small_skb) {
6318c2ecf20Sopenharmony_ci			skb_reserve(small_skb, IPOIB_CM_RX_RESERVE);
6328c2ecf20Sopenharmony_ci			ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
6338c2ecf20Sopenharmony_ci						   dlen, DMA_FROM_DEVICE);
6348c2ecf20Sopenharmony_ci			skb_copy_from_linear_data(skb, small_skb->data, dlen);
6358c2ecf20Sopenharmony_ci			ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0],
6368c2ecf20Sopenharmony_ci						      dlen, DMA_FROM_DEVICE);
6378c2ecf20Sopenharmony_ci			skb_put(small_skb, dlen);
6388c2ecf20Sopenharmony_ci			skb = small_skb;
6398c2ecf20Sopenharmony_ci			goto copied;
6408c2ecf20Sopenharmony_ci		}
6418c2ecf20Sopenharmony_ci	}
6428c2ecf20Sopenharmony_ci
6438c2ecf20Sopenharmony_ci	frags = PAGE_ALIGN(wc->byte_len -
6448c2ecf20Sopenharmony_ci			   min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) /
6458c2ecf20Sopenharmony_ci		PAGE_SIZE;
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci	newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
6488c2ecf20Sopenharmony_ci				       mapping, GFP_ATOMIC);
6498c2ecf20Sopenharmony_ci	if (unlikely(!newskb)) {
6508c2ecf20Sopenharmony_ci		/*
6518c2ecf20Sopenharmony_ci		 * If we can't allocate a new RX buffer, dump
6528c2ecf20Sopenharmony_ci		 * this packet and reuse the old buffer.
6538c2ecf20Sopenharmony_ci		 */
6548c2ecf20Sopenharmony_ci		ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
6558c2ecf20Sopenharmony_ci		++dev->stats.rx_dropped;
6568c2ecf20Sopenharmony_ci		goto repost;
6578c2ecf20Sopenharmony_ci	}
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci	ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
6608c2ecf20Sopenharmony_ci	memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping));
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
6638c2ecf20Sopenharmony_ci		       wc->byte_len, wc->slid);
6648c2ecf20Sopenharmony_ci
6658c2ecf20Sopenharmony_ci	skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_cicopied:
6688c2ecf20Sopenharmony_ci	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
6698c2ecf20Sopenharmony_ci	skb_add_pseudo_hdr(skb);
6708c2ecf20Sopenharmony_ci
6718c2ecf20Sopenharmony_ci	++dev->stats.rx_packets;
6728c2ecf20Sopenharmony_ci	dev->stats.rx_bytes += skb->len;
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci	skb->dev = dev;
6758c2ecf20Sopenharmony_ci	/* XXX get correct PACKET_ type here */
6768c2ecf20Sopenharmony_ci	skb->pkt_type = PACKET_HOST;
6778c2ecf20Sopenharmony_ci	netif_receive_skb(skb);
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_cirepost:
6808c2ecf20Sopenharmony_ci	if (has_srq) {
6818c2ecf20Sopenharmony_ci		if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id)))
6828c2ecf20Sopenharmony_ci			ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
6838c2ecf20Sopenharmony_ci				   "for buf %d\n", wr_id);
6848c2ecf20Sopenharmony_ci	} else {
6858c2ecf20Sopenharmony_ci		if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p,
6868c2ecf20Sopenharmony_ci							  &priv->cm.rx_wr,
6878c2ecf20Sopenharmony_ci							  priv->cm.rx_sge,
6888c2ecf20Sopenharmony_ci							  wr_id))) {
6898c2ecf20Sopenharmony_ci			--p->recv_count;
6908c2ecf20Sopenharmony_ci			ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
6918c2ecf20Sopenharmony_ci				   "for buf %d\n", wr_id);
6928c2ecf20Sopenharmony_ci		}
6938c2ecf20Sopenharmony_ci	}
6948c2ecf20Sopenharmony_ci}
6958c2ecf20Sopenharmony_ci
6968c2ecf20Sopenharmony_cistatic inline int post_send(struct ipoib_dev_priv *priv,
6978c2ecf20Sopenharmony_ci			    struct ipoib_cm_tx *tx,
6988c2ecf20Sopenharmony_ci			    unsigned int wr_id,
6998c2ecf20Sopenharmony_ci			    struct ipoib_tx_buf *tx_req)
7008c2ecf20Sopenharmony_ci{
7018c2ecf20Sopenharmony_ci	ipoib_build_sge(priv, tx_req);
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_ci	priv->tx_wr.wr.wr_id	= wr_id | IPOIB_OP_CM;
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_ci	return ib_post_send(tx->qp, &priv->tx_wr.wr, NULL);
7068c2ecf20Sopenharmony_ci}
7078c2ecf20Sopenharmony_ci
7088c2ecf20Sopenharmony_civoid ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
7098c2ecf20Sopenharmony_ci{
7108c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
7118c2ecf20Sopenharmony_ci	struct ipoib_tx_buf *tx_req;
7128c2ecf20Sopenharmony_ci	int rc;
7138c2ecf20Sopenharmony_ci	unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb);
7148c2ecf20Sopenharmony_ci
7158c2ecf20Sopenharmony_ci	if (unlikely(skb->len > tx->mtu)) {
7168c2ecf20Sopenharmony_ci		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
7178c2ecf20Sopenharmony_ci			   skb->len, tx->mtu);
7188c2ecf20Sopenharmony_ci		++dev->stats.tx_dropped;
7198c2ecf20Sopenharmony_ci		++dev->stats.tx_errors;
7208c2ecf20Sopenharmony_ci		ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
7218c2ecf20Sopenharmony_ci		return;
7228c2ecf20Sopenharmony_ci	}
7238c2ecf20Sopenharmony_ci	if (skb_shinfo(skb)->nr_frags > usable_sge) {
7248c2ecf20Sopenharmony_ci		if (skb_linearize(skb) < 0) {
7258c2ecf20Sopenharmony_ci			ipoib_warn(priv, "skb could not be linearized\n");
7268c2ecf20Sopenharmony_ci			++dev->stats.tx_dropped;
7278c2ecf20Sopenharmony_ci			++dev->stats.tx_errors;
7288c2ecf20Sopenharmony_ci			dev_kfree_skb_any(skb);
7298c2ecf20Sopenharmony_ci			return;
7308c2ecf20Sopenharmony_ci		}
7318c2ecf20Sopenharmony_ci		/* Does skb_linearize return ok without reducing nr_frags? */
7328c2ecf20Sopenharmony_ci		if (skb_shinfo(skb)->nr_frags > usable_sge) {
7338c2ecf20Sopenharmony_ci			ipoib_warn(priv, "too many frags after skb linearize\n");
7348c2ecf20Sopenharmony_ci			++dev->stats.tx_dropped;
7358c2ecf20Sopenharmony_ci			++dev->stats.tx_errors;
7368c2ecf20Sopenharmony_ci			dev_kfree_skb_any(skb);
7378c2ecf20Sopenharmony_ci			return;
7388c2ecf20Sopenharmony_ci		}
7398c2ecf20Sopenharmony_ci	}
7408c2ecf20Sopenharmony_ci	ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
7418c2ecf20Sopenharmony_ci		       tx->tx_head, skb->len, tx->qp->qp_num);
7428c2ecf20Sopenharmony_ci
7438c2ecf20Sopenharmony_ci	/*
7448c2ecf20Sopenharmony_ci	 * We put the skb into the tx_ring _before_ we call post_send()
7458c2ecf20Sopenharmony_ci	 * because it's entirely possible that the completion handler will
7468c2ecf20Sopenharmony_ci	 * run before we execute anything after the post_send().  That
7478c2ecf20Sopenharmony_ci	 * means we have to make sure everything is properly recorded and
7488c2ecf20Sopenharmony_ci	 * our state is consistent before we call post_send().
7498c2ecf20Sopenharmony_ci	 */
7508c2ecf20Sopenharmony_ci	tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
7518c2ecf20Sopenharmony_ci	tx_req->skb = skb;
7528c2ecf20Sopenharmony_ci
7538c2ecf20Sopenharmony_ci	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
7548c2ecf20Sopenharmony_ci		++dev->stats.tx_errors;
7558c2ecf20Sopenharmony_ci		dev_kfree_skb_any(skb);
7568c2ecf20Sopenharmony_ci		return;
7578c2ecf20Sopenharmony_ci	}
7588c2ecf20Sopenharmony_ci
7598c2ecf20Sopenharmony_ci	if ((priv->global_tx_head - priv->global_tx_tail) ==
7608c2ecf20Sopenharmony_ci	    ipoib_sendq_size - 1) {
7618c2ecf20Sopenharmony_ci		ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
7628c2ecf20Sopenharmony_ci			  tx->qp->qp_num);
7638c2ecf20Sopenharmony_ci		netif_stop_queue(dev);
7648c2ecf20Sopenharmony_ci	}
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_ci	skb_orphan(skb);
7678c2ecf20Sopenharmony_ci	skb_dst_drop(skb);
7688c2ecf20Sopenharmony_ci
7698c2ecf20Sopenharmony_ci	if (netif_queue_stopped(dev)) {
7708c2ecf20Sopenharmony_ci		rc = ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
7718c2ecf20Sopenharmony_ci				      IB_CQ_REPORT_MISSED_EVENTS);
7728c2ecf20Sopenharmony_ci		if (unlikely(rc < 0))
7738c2ecf20Sopenharmony_ci			ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n");
7748c2ecf20Sopenharmony_ci		else if (rc)
7758c2ecf20Sopenharmony_ci			napi_schedule(&priv->send_napi);
7768c2ecf20Sopenharmony_ci	}
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
7798c2ecf20Sopenharmony_ci	if (unlikely(rc)) {
7808c2ecf20Sopenharmony_ci		ipoib_warn(priv, "IPoIB/CM:post_send failed, error %d\n", rc);
7818c2ecf20Sopenharmony_ci		++dev->stats.tx_errors;
7828c2ecf20Sopenharmony_ci		ipoib_dma_unmap_tx(priv, tx_req);
7838c2ecf20Sopenharmony_ci		dev_kfree_skb_any(skb);
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_ci		if (netif_queue_stopped(dev))
7868c2ecf20Sopenharmony_ci			netif_wake_queue(dev);
7878c2ecf20Sopenharmony_ci	} else {
7888c2ecf20Sopenharmony_ci		netif_trans_update(dev);
7898c2ecf20Sopenharmony_ci		++tx->tx_head;
7908c2ecf20Sopenharmony_ci		++priv->global_tx_head;
7918c2ecf20Sopenharmony_ci	}
7928c2ecf20Sopenharmony_ci}
7938c2ecf20Sopenharmony_ci
7948c2ecf20Sopenharmony_civoid ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
7958c2ecf20Sopenharmony_ci{
7968c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
7978c2ecf20Sopenharmony_ci	struct ipoib_cm_tx *tx = wc->qp->qp_context;
7988c2ecf20Sopenharmony_ci	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
7998c2ecf20Sopenharmony_ci	struct ipoib_tx_buf *tx_req;
8008c2ecf20Sopenharmony_ci	unsigned long flags;
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ci	ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
8038c2ecf20Sopenharmony_ci		       wr_id, wc->status);
8048c2ecf20Sopenharmony_ci
8058c2ecf20Sopenharmony_ci	if (unlikely(wr_id >= ipoib_sendq_size)) {
8068c2ecf20Sopenharmony_ci		ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
8078c2ecf20Sopenharmony_ci			   wr_id, ipoib_sendq_size);
8088c2ecf20Sopenharmony_ci		return;
8098c2ecf20Sopenharmony_ci	}
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci	tx_req = &tx->tx_ring[wr_id];
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_ci	ipoib_dma_unmap_tx(priv, tx_req);
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci	/* FIXME: is this right? Shouldn't we only increment on success? */
8168c2ecf20Sopenharmony_ci	++dev->stats.tx_packets;
8178c2ecf20Sopenharmony_ci	dev->stats.tx_bytes += tx_req->skb->len;
8188c2ecf20Sopenharmony_ci
8198c2ecf20Sopenharmony_ci	dev_kfree_skb_any(tx_req->skb);
8208c2ecf20Sopenharmony_ci
8218c2ecf20Sopenharmony_ci	netif_tx_lock(dev);
8228c2ecf20Sopenharmony_ci
8238c2ecf20Sopenharmony_ci	++tx->tx_tail;
8248c2ecf20Sopenharmony_ci	++priv->global_tx_tail;
8258c2ecf20Sopenharmony_ci
8268c2ecf20Sopenharmony_ci	if (unlikely(netif_queue_stopped(dev) &&
8278c2ecf20Sopenharmony_ci		     ((priv->global_tx_head - priv->global_tx_tail) <=
8288c2ecf20Sopenharmony_ci		      ipoib_sendq_size >> 1) &&
8298c2ecf20Sopenharmony_ci		     test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
8308c2ecf20Sopenharmony_ci		netif_wake_queue(dev);
8318c2ecf20Sopenharmony_ci
8328c2ecf20Sopenharmony_ci	if (wc->status != IB_WC_SUCCESS &&
8338c2ecf20Sopenharmony_ci	    wc->status != IB_WC_WR_FLUSH_ERR) {
8348c2ecf20Sopenharmony_ci		struct ipoib_neigh *neigh;
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_ci		/* IB_WC[_RNR]_RETRY_EXC_ERR error is part of the life cycle,
8378c2ecf20Sopenharmony_ci		 * so don't make waves.
8388c2ecf20Sopenharmony_ci		 */
8398c2ecf20Sopenharmony_ci		if (wc->status == IB_WC_RNR_RETRY_EXC_ERR ||
8408c2ecf20Sopenharmony_ci		    wc->status == IB_WC_RETRY_EXC_ERR)
8418c2ecf20Sopenharmony_ci			ipoib_dbg(priv,
8428c2ecf20Sopenharmony_ci				  "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n",
8438c2ecf20Sopenharmony_ci				   __func__, wc->status, wr_id, wc->vendor_err);
8448c2ecf20Sopenharmony_ci		else
8458c2ecf20Sopenharmony_ci			ipoib_warn(priv,
8468c2ecf20Sopenharmony_ci				    "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n",
8478c2ecf20Sopenharmony_ci				   __func__, wc->status, wr_id, wc->vendor_err);
8488c2ecf20Sopenharmony_ci
8498c2ecf20Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
8508c2ecf20Sopenharmony_ci		neigh = tx->neigh;
8518c2ecf20Sopenharmony_ci
8528c2ecf20Sopenharmony_ci		if (neigh) {
8538c2ecf20Sopenharmony_ci			neigh->cm = NULL;
8548c2ecf20Sopenharmony_ci			ipoib_neigh_free(neigh);
8558c2ecf20Sopenharmony_ci
8568c2ecf20Sopenharmony_ci			tx->neigh = NULL;
8578c2ecf20Sopenharmony_ci		}
8588c2ecf20Sopenharmony_ci
8598c2ecf20Sopenharmony_ci		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
8608c2ecf20Sopenharmony_ci			list_move(&tx->list, &priv->cm.reap_list);
8618c2ecf20Sopenharmony_ci			queue_work(priv->wq, &priv->cm.reap_task);
8628c2ecf20Sopenharmony_ci		}
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_ci		clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
8658c2ecf20Sopenharmony_ci
8668c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
8678c2ecf20Sopenharmony_ci	}
8688c2ecf20Sopenharmony_ci
8698c2ecf20Sopenharmony_ci	netif_tx_unlock(dev);
8708c2ecf20Sopenharmony_ci}
8718c2ecf20Sopenharmony_ci
8728c2ecf20Sopenharmony_ciint ipoib_cm_dev_open(struct net_device *dev)
8738c2ecf20Sopenharmony_ci{
8748c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
8758c2ecf20Sopenharmony_ci	int ret;
8768c2ecf20Sopenharmony_ci
8778c2ecf20Sopenharmony_ci	if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
8788c2ecf20Sopenharmony_ci		return 0;
8798c2ecf20Sopenharmony_ci
8808c2ecf20Sopenharmony_ci	priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
8818c2ecf20Sopenharmony_ci	if (IS_ERR(priv->cm.id)) {
8828c2ecf20Sopenharmony_ci		pr_warn("%s: failed to create CM ID\n", priv->ca->name);
8838c2ecf20Sopenharmony_ci		ret = PTR_ERR(priv->cm.id);
8848c2ecf20Sopenharmony_ci		goto err_cm;
8858c2ecf20Sopenharmony_ci	}
8868c2ecf20Sopenharmony_ci
8878c2ecf20Sopenharmony_ci	ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
8888c2ecf20Sopenharmony_ci			   0);
8898c2ecf20Sopenharmony_ci	if (ret) {
8908c2ecf20Sopenharmony_ci		pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name,
8918c2ecf20Sopenharmony_ci			IPOIB_CM_IETF_ID | priv->qp->qp_num);
8928c2ecf20Sopenharmony_ci		goto err_listen;
8938c2ecf20Sopenharmony_ci	}
8948c2ecf20Sopenharmony_ci
8958c2ecf20Sopenharmony_ci	return 0;
8968c2ecf20Sopenharmony_ci
8978c2ecf20Sopenharmony_cierr_listen:
8988c2ecf20Sopenharmony_ci	ib_destroy_cm_id(priv->cm.id);
8998c2ecf20Sopenharmony_cierr_cm:
9008c2ecf20Sopenharmony_ci	priv->cm.id = NULL;
9018c2ecf20Sopenharmony_ci	return ret;
9028c2ecf20Sopenharmony_ci}
9038c2ecf20Sopenharmony_ci
9048c2ecf20Sopenharmony_cistatic void ipoib_cm_free_rx_reap_list(struct net_device *dev)
9058c2ecf20Sopenharmony_ci{
9068c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
9078c2ecf20Sopenharmony_ci	struct ipoib_cm_rx *rx, *n;
9088c2ecf20Sopenharmony_ci	LIST_HEAD(list);
9098c2ecf20Sopenharmony_ci
9108c2ecf20Sopenharmony_ci	spin_lock_irq(&priv->lock);
9118c2ecf20Sopenharmony_ci	list_splice_init(&priv->cm.rx_reap_list, &list);
9128c2ecf20Sopenharmony_ci	spin_unlock_irq(&priv->lock);
9138c2ecf20Sopenharmony_ci
9148c2ecf20Sopenharmony_ci	list_for_each_entry_safe(rx, n, &list, list) {
9158c2ecf20Sopenharmony_ci		ib_destroy_cm_id(rx->id);
9168c2ecf20Sopenharmony_ci		ib_destroy_qp(rx->qp);
9178c2ecf20Sopenharmony_ci		if (!ipoib_cm_has_srq(dev)) {
9188c2ecf20Sopenharmony_ci			ipoib_cm_free_rx_ring(priv->dev, rx->rx_ring);
9198c2ecf20Sopenharmony_ci			spin_lock_irq(&priv->lock);
9208c2ecf20Sopenharmony_ci			--priv->cm.nonsrq_conn_qp;
9218c2ecf20Sopenharmony_ci			spin_unlock_irq(&priv->lock);
9228c2ecf20Sopenharmony_ci		}
9238c2ecf20Sopenharmony_ci		kfree(rx);
9248c2ecf20Sopenharmony_ci	}
9258c2ecf20Sopenharmony_ci}
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_civoid ipoib_cm_dev_stop(struct net_device *dev)
9288c2ecf20Sopenharmony_ci{
9298c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
9308c2ecf20Sopenharmony_ci	struct ipoib_cm_rx *p;
9318c2ecf20Sopenharmony_ci	unsigned long begin;
9328c2ecf20Sopenharmony_ci	int ret;
9338c2ecf20Sopenharmony_ci
9348c2ecf20Sopenharmony_ci	if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
9358c2ecf20Sopenharmony_ci		return;
9368c2ecf20Sopenharmony_ci
9378c2ecf20Sopenharmony_ci	ib_destroy_cm_id(priv->cm.id);
9388c2ecf20Sopenharmony_ci	priv->cm.id = NULL;
9398c2ecf20Sopenharmony_ci
9408c2ecf20Sopenharmony_ci	spin_lock_irq(&priv->lock);
9418c2ecf20Sopenharmony_ci	while (!list_empty(&priv->cm.passive_ids)) {
9428c2ecf20Sopenharmony_ci		p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
9438c2ecf20Sopenharmony_ci		list_move(&p->list, &priv->cm.rx_error_list);
9448c2ecf20Sopenharmony_ci		p->state = IPOIB_CM_RX_ERROR;
9458c2ecf20Sopenharmony_ci		spin_unlock_irq(&priv->lock);
9468c2ecf20Sopenharmony_ci		ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
9478c2ecf20Sopenharmony_ci		if (ret)
9488c2ecf20Sopenharmony_ci			ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
9498c2ecf20Sopenharmony_ci		spin_lock_irq(&priv->lock);
9508c2ecf20Sopenharmony_ci	}
9518c2ecf20Sopenharmony_ci
9528c2ecf20Sopenharmony_ci	/* Wait for all RX to be drained */
9538c2ecf20Sopenharmony_ci	begin = jiffies;
9548c2ecf20Sopenharmony_ci
9558c2ecf20Sopenharmony_ci	while (!list_empty(&priv->cm.rx_error_list) ||
9568c2ecf20Sopenharmony_ci	       !list_empty(&priv->cm.rx_flush_list) ||
9578c2ecf20Sopenharmony_ci	       !list_empty(&priv->cm.rx_drain_list)) {
9588c2ecf20Sopenharmony_ci		if (time_after(jiffies, begin + 5 * HZ)) {
9598c2ecf20Sopenharmony_ci			ipoib_warn(priv, "RX drain timing out\n");
9608c2ecf20Sopenharmony_ci
9618c2ecf20Sopenharmony_ci			/*
9628c2ecf20Sopenharmony_ci			 * assume the HW is wedged and just free up everything.
9638c2ecf20Sopenharmony_ci			 */
9648c2ecf20Sopenharmony_ci			list_splice_init(&priv->cm.rx_flush_list,
9658c2ecf20Sopenharmony_ci					 &priv->cm.rx_reap_list);
9668c2ecf20Sopenharmony_ci			list_splice_init(&priv->cm.rx_error_list,
9678c2ecf20Sopenharmony_ci					 &priv->cm.rx_reap_list);
9688c2ecf20Sopenharmony_ci			list_splice_init(&priv->cm.rx_drain_list,
9698c2ecf20Sopenharmony_ci					 &priv->cm.rx_reap_list);
9708c2ecf20Sopenharmony_ci			break;
9718c2ecf20Sopenharmony_ci		}
9728c2ecf20Sopenharmony_ci		spin_unlock_irq(&priv->lock);
9738c2ecf20Sopenharmony_ci		usleep_range(1000, 2000);
9748c2ecf20Sopenharmony_ci		ipoib_drain_cq(dev);
9758c2ecf20Sopenharmony_ci		spin_lock_irq(&priv->lock);
9768c2ecf20Sopenharmony_ci	}
9778c2ecf20Sopenharmony_ci
9788c2ecf20Sopenharmony_ci	spin_unlock_irq(&priv->lock);
9798c2ecf20Sopenharmony_ci
9808c2ecf20Sopenharmony_ci	ipoib_cm_free_rx_reap_list(dev);
9818c2ecf20Sopenharmony_ci
9828c2ecf20Sopenharmony_ci	cancel_delayed_work(&priv->cm.stale_task);
9838c2ecf20Sopenharmony_ci}
9848c2ecf20Sopenharmony_ci
9858c2ecf20Sopenharmony_cistatic int ipoib_cm_rep_handler(struct ib_cm_id *cm_id,
9868c2ecf20Sopenharmony_ci				const struct ib_cm_event *event)
9878c2ecf20Sopenharmony_ci{
9888c2ecf20Sopenharmony_ci	struct ipoib_cm_tx *p = cm_id->context;
9898c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
9908c2ecf20Sopenharmony_ci	struct ipoib_cm_data *data = event->private_data;
9918c2ecf20Sopenharmony_ci	struct sk_buff_head skqueue;
9928c2ecf20Sopenharmony_ci	struct ib_qp_attr qp_attr;
9938c2ecf20Sopenharmony_ci	int qp_attr_mask, ret;
9948c2ecf20Sopenharmony_ci	struct sk_buff *skb;
9958c2ecf20Sopenharmony_ci
9968c2ecf20Sopenharmony_ci	p->mtu = be32_to_cpu(data->mtu);
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_ci	if (p->mtu <= IPOIB_ENCAP_LEN) {
9998c2ecf20Sopenharmony_ci		ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n",
10008c2ecf20Sopenharmony_ci			   p->mtu, IPOIB_ENCAP_LEN);
10018c2ecf20Sopenharmony_ci		return -EINVAL;
10028c2ecf20Sopenharmony_ci	}
10038c2ecf20Sopenharmony_ci
10048c2ecf20Sopenharmony_ci	qp_attr.qp_state = IB_QPS_RTR;
10058c2ecf20Sopenharmony_ci	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
10068c2ecf20Sopenharmony_ci	if (ret) {
10078c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
10088c2ecf20Sopenharmony_ci		return ret;
10098c2ecf20Sopenharmony_ci	}
10108c2ecf20Sopenharmony_ci
10118c2ecf20Sopenharmony_ci	qp_attr.rq_psn = 0 /* FIXME */;
10128c2ecf20Sopenharmony_ci	ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
10138c2ecf20Sopenharmony_ci	if (ret) {
10148c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
10158c2ecf20Sopenharmony_ci		return ret;
10168c2ecf20Sopenharmony_ci	}
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci	qp_attr.qp_state = IB_QPS_RTS;
10198c2ecf20Sopenharmony_ci	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
10208c2ecf20Sopenharmony_ci	if (ret) {
10218c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
10228c2ecf20Sopenharmony_ci		return ret;
10238c2ecf20Sopenharmony_ci	}
10248c2ecf20Sopenharmony_ci	ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
10258c2ecf20Sopenharmony_ci	if (ret) {
10268c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
10278c2ecf20Sopenharmony_ci		return ret;
10288c2ecf20Sopenharmony_ci	}
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_ci	skb_queue_head_init(&skqueue);
10318c2ecf20Sopenharmony_ci
10328c2ecf20Sopenharmony_ci	netif_tx_lock_bh(p->dev);
10338c2ecf20Sopenharmony_ci	spin_lock_irq(&priv->lock);
10348c2ecf20Sopenharmony_ci	set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
10358c2ecf20Sopenharmony_ci	if (p->neigh)
10368c2ecf20Sopenharmony_ci		while ((skb = __skb_dequeue(&p->neigh->queue)))
10378c2ecf20Sopenharmony_ci			__skb_queue_tail(&skqueue, skb);
10388c2ecf20Sopenharmony_ci	spin_unlock_irq(&priv->lock);
10398c2ecf20Sopenharmony_ci	netif_tx_unlock_bh(p->dev);
10408c2ecf20Sopenharmony_ci
10418c2ecf20Sopenharmony_ci	while ((skb = __skb_dequeue(&skqueue))) {
10428c2ecf20Sopenharmony_ci		skb->dev = p->dev;
10438c2ecf20Sopenharmony_ci		ret = dev_queue_xmit(skb);
10448c2ecf20Sopenharmony_ci		if (ret)
10458c2ecf20Sopenharmony_ci			ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
10468c2ecf20Sopenharmony_ci				   __func__, ret);
10478c2ecf20Sopenharmony_ci	}
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_ci	ret = ib_send_cm_rtu(cm_id, NULL, 0);
10508c2ecf20Sopenharmony_ci	if (ret) {
10518c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to send RTU: %d\n", ret);
10528c2ecf20Sopenharmony_ci		return ret;
10538c2ecf20Sopenharmony_ci	}
10548c2ecf20Sopenharmony_ci	return 0;
10558c2ecf20Sopenharmony_ci}
10568c2ecf20Sopenharmony_ci
10578c2ecf20Sopenharmony_cistatic struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
10588c2ecf20Sopenharmony_ci{
10598c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
10608c2ecf20Sopenharmony_ci	struct ib_qp_init_attr attr = {
10618c2ecf20Sopenharmony_ci		.send_cq		= priv->send_cq,
10628c2ecf20Sopenharmony_ci		.recv_cq		= priv->recv_cq,
10638c2ecf20Sopenharmony_ci		.srq			= priv->cm.srq,
10648c2ecf20Sopenharmony_ci		.cap.max_send_wr	= ipoib_sendq_size,
10658c2ecf20Sopenharmony_ci		.cap.max_send_sge	= 1,
10668c2ecf20Sopenharmony_ci		.sq_sig_type		= IB_SIGNAL_ALL_WR,
10678c2ecf20Sopenharmony_ci		.qp_type		= IB_QPT_RC,
10688c2ecf20Sopenharmony_ci		.qp_context		= tx,
10698c2ecf20Sopenharmony_ci		.create_flags		= 0
10708c2ecf20Sopenharmony_ci	};
10718c2ecf20Sopenharmony_ci	struct ib_qp *tx_qp;
10728c2ecf20Sopenharmony_ci
10738c2ecf20Sopenharmony_ci	if (dev->features & NETIF_F_SG)
10748c2ecf20Sopenharmony_ci		attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
10758c2ecf20Sopenharmony_ci					      MAX_SKB_FRAGS + 1);
10768c2ecf20Sopenharmony_ci
10778c2ecf20Sopenharmony_ci	tx_qp = ib_create_qp(priv->pd, &attr);
10788c2ecf20Sopenharmony_ci	tx->max_send_sge = attr.cap.max_send_sge;
10798c2ecf20Sopenharmony_ci	return tx_qp;
10808c2ecf20Sopenharmony_ci}
10818c2ecf20Sopenharmony_ci
10828c2ecf20Sopenharmony_cistatic int ipoib_cm_send_req(struct net_device *dev,
10838c2ecf20Sopenharmony_ci			     struct ib_cm_id *id, struct ib_qp *qp,
10848c2ecf20Sopenharmony_ci			     u32 qpn,
10858c2ecf20Sopenharmony_ci			     struct sa_path_rec *pathrec)
10868c2ecf20Sopenharmony_ci{
10878c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
10888c2ecf20Sopenharmony_ci	struct ipoib_cm_data data = {};
10898c2ecf20Sopenharmony_ci	struct ib_cm_req_param req = {};
10908c2ecf20Sopenharmony_ci
10918c2ecf20Sopenharmony_ci	data.qpn = cpu_to_be32(priv->qp->qp_num);
10928c2ecf20Sopenharmony_ci	data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
10938c2ecf20Sopenharmony_ci
10948c2ecf20Sopenharmony_ci	req.primary_path		= pathrec;
10958c2ecf20Sopenharmony_ci	req.alternate_path		= NULL;
10968c2ecf20Sopenharmony_ci	req.service_id			= cpu_to_be64(IPOIB_CM_IETF_ID | qpn);
10978c2ecf20Sopenharmony_ci	req.qp_num			= qp->qp_num;
10988c2ecf20Sopenharmony_ci	req.qp_type			= qp->qp_type;
10998c2ecf20Sopenharmony_ci	req.private_data		= &data;
11008c2ecf20Sopenharmony_ci	req.private_data_len		= sizeof(data);
11018c2ecf20Sopenharmony_ci	req.flow_control		= 0;
11028c2ecf20Sopenharmony_ci
11038c2ecf20Sopenharmony_ci	req.starting_psn		= 0; /* FIXME */
11048c2ecf20Sopenharmony_ci
11058c2ecf20Sopenharmony_ci	/*
11068c2ecf20Sopenharmony_ci	 * Pick some arbitrary defaults here; we could make these
11078c2ecf20Sopenharmony_ci	 * module parameters if anyone cared about setting them.
11088c2ecf20Sopenharmony_ci	 */
11098c2ecf20Sopenharmony_ci	req.responder_resources		= 4;
11108c2ecf20Sopenharmony_ci	req.remote_cm_response_timeout	= 20;
11118c2ecf20Sopenharmony_ci	req.local_cm_response_timeout	= 20;
11128c2ecf20Sopenharmony_ci	req.retry_count			= 0; /* RFC draft warns against retries */
11138c2ecf20Sopenharmony_ci	req.rnr_retry_count		= 0; /* RFC draft warns against retries */
11148c2ecf20Sopenharmony_ci	req.max_cm_retries		= 15;
11158c2ecf20Sopenharmony_ci	req.srq				= ipoib_cm_has_srq(dev);
11168c2ecf20Sopenharmony_ci	return ib_send_cm_req(id, &req);
11178c2ecf20Sopenharmony_ci}
11188c2ecf20Sopenharmony_ci
11198c2ecf20Sopenharmony_cistatic int ipoib_cm_modify_tx_init(struct net_device *dev,
11208c2ecf20Sopenharmony_ci				  struct ib_cm_id *cm_id, struct ib_qp *qp)
11218c2ecf20Sopenharmony_ci{
11228c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
11238c2ecf20Sopenharmony_ci	struct ib_qp_attr qp_attr;
11248c2ecf20Sopenharmony_ci	int qp_attr_mask, ret;
11258c2ecf20Sopenharmony_ci	ret = ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
11268c2ecf20Sopenharmony_ci	if (ret) {
11278c2ecf20Sopenharmony_ci		ipoib_warn(priv, "pkey 0x%x not found: %d\n", priv->pkey, ret);
11288c2ecf20Sopenharmony_ci		return ret;
11298c2ecf20Sopenharmony_ci	}
11308c2ecf20Sopenharmony_ci
11318c2ecf20Sopenharmony_ci	qp_attr.qp_state = IB_QPS_INIT;
11328c2ecf20Sopenharmony_ci	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
11338c2ecf20Sopenharmony_ci	qp_attr.port_num = priv->port;
11348c2ecf20Sopenharmony_ci	qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
11358c2ecf20Sopenharmony_ci
11368c2ecf20Sopenharmony_ci	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
11378c2ecf20Sopenharmony_ci	if (ret) {
11388c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret);
11398c2ecf20Sopenharmony_ci		return ret;
11408c2ecf20Sopenharmony_ci	}
11418c2ecf20Sopenharmony_ci	return 0;
11428c2ecf20Sopenharmony_ci}
11438c2ecf20Sopenharmony_ci
11448c2ecf20Sopenharmony_cistatic int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
11458c2ecf20Sopenharmony_ci			    struct sa_path_rec *pathrec)
11468c2ecf20Sopenharmony_ci{
11478c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
11488c2ecf20Sopenharmony_ci	unsigned int noio_flag;
11498c2ecf20Sopenharmony_ci	int ret;
11508c2ecf20Sopenharmony_ci
11518c2ecf20Sopenharmony_ci	noio_flag = memalloc_noio_save();
11528c2ecf20Sopenharmony_ci	p->tx_ring = vzalloc(array_size(ipoib_sendq_size, sizeof(*p->tx_ring)));
11538c2ecf20Sopenharmony_ci	if (!p->tx_ring) {
11548c2ecf20Sopenharmony_ci		memalloc_noio_restore(noio_flag);
11558c2ecf20Sopenharmony_ci		ret = -ENOMEM;
11568c2ecf20Sopenharmony_ci		goto err_tx;
11578c2ecf20Sopenharmony_ci	}
11588c2ecf20Sopenharmony_ci
11598c2ecf20Sopenharmony_ci	p->qp = ipoib_cm_create_tx_qp(p->dev, p);
11608c2ecf20Sopenharmony_ci	memalloc_noio_restore(noio_flag);
11618c2ecf20Sopenharmony_ci	if (IS_ERR(p->qp)) {
11628c2ecf20Sopenharmony_ci		ret = PTR_ERR(p->qp);
11638c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to create tx qp: %d\n", ret);
11648c2ecf20Sopenharmony_ci		goto err_qp;
11658c2ecf20Sopenharmony_ci	}
11668c2ecf20Sopenharmony_ci
11678c2ecf20Sopenharmony_ci	p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p);
11688c2ecf20Sopenharmony_ci	if (IS_ERR(p->id)) {
11698c2ecf20Sopenharmony_ci		ret = PTR_ERR(p->id);
11708c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to create tx cm id: %d\n", ret);
11718c2ecf20Sopenharmony_ci		goto err_id;
11728c2ecf20Sopenharmony_ci	}
11738c2ecf20Sopenharmony_ci
11748c2ecf20Sopenharmony_ci	ret = ipoib_cm_modify_tx_init(p->dev, p->id,  p->qp);
11758c2ecf20Sopenharmony_ci	if (ret) {
11768c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
11778c2ecf20Sopenharmony_ci		goto err_modify_send;
11788c2ecf20Sopenharmony_ci	}
11798c2ecf20Sopenharmony_ci
11808c2ecf20Sopenharmony_ci	ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec);
11818c2ecf20Sopenharmony_ci	if (ret) {
11828c2ecf20Sopenharmony_ci		ipoib_warn(priv, "failed to send cm req: %d\n", ret);
11838c2ecf20Sopenharmony_ci		goto err_modify_send;
11848c2ecf20Sopenharmony_ci	}
11858c2ecf20Sopenharmony_ci
11868c2ecf20Sopenharmony_ci	ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n",
11878c2ecf20Sopenharmony_ci		  p->qp->qp_num, pathrec->dgid.raw, qpn);
11888c2ecf20Sopenharmony_ci
11898c2ecf20Sopenharmony_ci	return 0;
11908c2ecf20Sopenharmony_ci
11918c2ecf20Sopenharmony_cierr_modify_send:
11928c2ecf20Sopenharmony_ci	ib_destroy_cm_id(p->id);
11938c2ecf20Sopenharmony_cierr_id:
11948c2ecf20Sopenharmony_ci	p->id = NULL;
11958c2ecf20Sopenharmony_ci	ib_destroy_qp(p->qp);
11968c2ecf20Sopenharmony_cierr_qp:
11978c2ecf20Sopenharmony_ci	p->qp = NULL;
11988c2ecf20Sopenharmony_ci	vfree(p->tx_ring);
11998c2ecf20Sopenharmony_cierr_tx:
12008c2ecf20Sopenharmony_ci	return ret;
12018c2ecf20Sopenharmony_ci}
12028c2ecf20Sopenharmony_ci
12038c2ecf20Sopenharmony_cistatic void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
12048c2ecf20Sopenharmony_ci{
12058c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
12068c2ecf20Sopenharmony_ci	struct ipoib_tx_buf *tx_req;
12078c2ecf20Sopenharmony_ci	unsigned long begin;
12088c2ecf20Sopenharmony_ci
12098c2ecf20Sopenharmony_ci	ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
12108c2ecf20Sopenharmony_ci		  p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
12118c2ecf20Sopenharmony_ci
12128c2ecf20Sopenharmony_ci	if (p->id)
12138c2ecf20Sopenharmony_ci		ib_destroy_cm_id(p->id);
12148c2ecf20Sopenharmony_ci
12158c2ecf20Sopenharmony_ci	if (p->tx_ring) {
12168c2ecf20Sopenharmony_ci		/* Wait for all sends to complete */
12178c2ecf20Sopenharmony_ci		begin = jiffies;
12188c2ecf20Sopenharmony_ci		while ((int) p->tx_tail - (int) p->tx_head < 0) {
12198c2ecf20Sopenharmony_ci			if (time_after(jiffies, begin + 5 * HZ)) {
12208c2ecf20Sopenharmony_ci				ipoib_warn(priv, "timing out; %d sends not completed\n",
12218c2ecf20Sopenharmony_ci					   p->tx_head - p->tx_tail);
12228c2ecf20Sopenharmony_ci				goto timeout;
12238c2ecf20Sopenharmony_ci			}
12248c2ecf20Sopenharmony_ci
12258c2ecf20Sopenharmony_ci			usleep_range(1000, 2000);
12268c2ecf20Sopenharmony_ci		}
12278c2ecf20Sopenharmony_ci	}
12288c2ecf20Sopenharmony_ci
12298c2ecf20Sopenharmony_citimeout:
12308c2ecf20Sopenharmony_ci
12318c2ecf20Sopenharmony_ci	while ((int) p->tx_tail - (int) p->tx_head < 0) {
12328c2ecf20Sopenharmony_ci		tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
12338c2ecf20Sopenharmony_ci		ipoib_dma_unmap_tx(priv, tx_req);
12348c2ecf20Sopenharmony_ci		dev_kfree_skb_any(tx_req->skb);
12358c2ecf20Sopenharmony_ci		netif_tx_lock_bh(p->dev);
12368c2ecf20Sopenharmony_ci		++p->tx_tail;
12378c2ecf20Sopenharmony_ci		++priv->global_tx_tail;
12388c2ecf20Sopenharmony_ci		if (unlikely((priv->global_tx_head - priv->global_tx_tail) <=
12398c2ecf20Sopenharmony_ci			     ipoib_sendq_size >> 1) &&
12408c2ecf20Sopenharmony_ci		    netif_queue_stopped(p->dev) &&
12418c2ecf20Sopenharmony_ci		    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
12428c2ecf20Sopenharmony_ci			netif_wake_queue(p->dev);
12438c2ecf20Sopenharmony_ci		netif_tx_unlock_bh(p->dev);
12448c2ecf20Sopenharmony_ci	}
12458c2ecf20Sopenharmony_ci
12468c2ecf20Sopenharmony_ci	if (p->qp)
12478c2ecf20Sopenharmony_ci		ib_destroy_qp(p->qp);
12488c2ecf20Sopenharmony_ci
12498c2ecf20Sopenharmony_ci	vfree(p->tx_ring);
12508c2ecf20Sopenharmony_ci	kfree(p);
12518c2ecf20Sopenharmony_ci}
12528c2ecf20Sopenharmony_ci
12538c2ecf20Sopenharmony_cistatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
12548c2ecf20Sopenharmony_ci			       const struct ib_cm_event *event)
12558c2ecf20Sopenharmony_ci{
12568c2ecf20Sopenharmony_ci	struct ipoib_cm_tx *tx = cm_id->context;
12578c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
12588c2ecf20Sopenharmony_ci	struct net_device *dev = priv->dev;
12598c2ecf20Sopenharmony_ci	struct ipoib_neigh *neigh;
12608c2ecf20Sopenharmony_ci	unsigned long flags;
12618c2ecf20Sopenharmony_ci	int ret;
12628c2ecf20Sopenharmony_ci
12638c2ecf20Sopenharmony_ci	switch (event->event) {
12648c2ecf20Sopenharmony_ci	case IB_CM_DREQ_RECEIVED:
12658c2ecf20Sopenharmony_ci		ipoib_dbg(priv, "DREQ received.\n");
12668c2ecf20Sopenharmony_ci		ib_send_cm_drep(cm_id, NULL, 0);
12678c2ecf20Sopenharmony_ci		break;
12688c2ecf20Sopenharmony_ci	case IB_CM_REP_RECEIVED:
12698c2ecf20Sopenharmony_ci		ipoib_dbg(priv, "REP received.\n");
12708c2ecf20Sopenharmony_ci		ret = ipoib_cm_rep_handler(cm_id, event);
12718c2ecf20Sopenharmony_ci		if (ret)
12728c2ecf20Sopenharmony_ci			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
12738c2ecf20Sopenharmony_ci				       NULL, 0, NULL, 0);
12748c2ecf20Sopenharmony_ci		break;
12758c2ecf20Sopenharmony_ci	case IB_CM_REQ_ERROR:
12768c2ecf20Sopenharmony_ci	case IB_CM_REJ_RECEIVED:
12778c2ecf20Sopenharmony_ci	case IB_CM_TIMEWAIT_EXIT:
12788c2ecf20Sopenharmony_ci		ipoib_dbg(priv, "CM error %d.\n", event->event);
12798c2ecf20Sopenharmony_ci		netif_tx_lock_bh(dev);
12808c2ecf20Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
12818c2ecf20Sopenharmony_ci		neigh = tx->neigh;
12828c2ecf20Sopenharmony_ci
12838c2ecf20Sopenharmony_ci		if (neigh) {
12848c2ecf20Sopenharmony_ci			neigh->cm = NULL;
12858c2ecf20Sopenharmony_ci			ipoib_neigh_free(neigh);
12868c2ecf20Sopenharmony_ci
12878c2ecf20Sopenharmony_ci			tx->neigh = NULL;
12888c2ecf20Sopenharmony_ci		}
12898c2ecf20Sopenharmony_ci
12908c2ecf20Sopenharmony_ci		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
12918c2ecf20Sopenharmony_ci			list_move(&tx->list, &priv->cm.reap_list);
12928c2ecf20Sopenharmony_ci			queue_work(priv->wq, &priv->cm.reap_task);
12938c2ecf20Sopenharmony_ci		}
12948c2ecf20Sopenharmony_ci
12958c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
12968c2ecf20Sopenharmony_ci		netif_tx_unlock_bh(dev);
12978c2ecf20Sopenharmony_ci		break;
12988c2ecf20Sopenharmony_ci	default:
12998c2ecf20Sopenharmony_ci		break;
13008c2ecf20Sopenharmony_ci	}
13018c2ecf20Sopenharmony_ci
13028c2ecf20Sopenharmony_ci	return 0;
13038c2ecf20Sopenharmony_ci}
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_cistruct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
13068c2ecf20Sopenharmony_ci				       struct ipoib_neigh *neigh)
13078c2ecf20Sopenharmony_ci{
13088c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
13098c2ecf20Sopenharmony_ci	struct ipoib_cm_tx *tx;
13108c2ecf20Sopenharmony_ci
13118c2ecf20Sopenharmony_ci	tx = kzalloc(sizeof(*tx), GFP_ATOMIC);
13128c2ecf20Sopenharmony_ci	if (!tx)
13138c2ecf20Sopenharmony_ci		return NULL;
13148c2ecf20Sopenharmony_ci
13158c2ecf20Sopenharmony_ci	neigh->cm = tx;
13168c2ecf20Sopenharmony_ci	tx->neigh = neigh;
13178c2ecf20Sopenharmony_ci	tx->dev = dev;
13188c2ecf20Sopenharmony_ci	list_add(&tx->list, &priv->cm.start_list);
13198c2ecf20Sopenharmony_ci	set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
13208c2ecf20Sopenharmony_ci	queue_work(priv->wq, &priv->cm.start_task);
13218c2ecf20Sopenharmony_ci	return tx;
13228c2ecf20Sopenharmony_ci}
13238c2ecf20Sopenharmony_ci
13248c2ecf20Sopenharmony_civoid ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
13258c2ecf20Sopenharmony_ci{
13268c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
13278c2ecf20Sopenharmony_ci	unsigned long flags;
13288c2ecf20Sopenharmony_ci	if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
13298c2ecf20Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
13308c2ecf20Sopenharmony_ci		list_move(&tx->list, &priv->cm.reap_list);
13318c2ecf20Sopenharmony_ci		queue_work(priv->wq, &priv->cm.reap_task);
13328c2ecf20Sopenharmony_ci		ipoib_dbg(priv, "Reap connection for gid %pI6\n",
13338c2ecf20Sopenharmony_ci			  tx->neigh->daddr + 4);
13348c2ecf20Sopenharmony_ci		tx->neigh = NULL;
13358c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
13368c2ecf20Sopenharmony_ci	}
13378c2ecf20Sopenharmony_ci}
13388c2ecf20Sopenharmony_ci
13398c2ecf20Sopenharmony_ci#define QPN_AND_OPTIONS_OFFSET	4
13408c2ecf20Sopenharmony_ci
13418c2ecf20Sopenharmony_cistatic void ipoib_cm_tx_start(struct work_struct *work)
13428c2ecf20Sopenharmony_ci{
13438c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
13448c2ecf20Sopenharmony_ci						   cm.start_task);
13458c2ecf20Sopenharmony_ci	struct net_device *dev = priv->dev;
13468c2ecf20Sopenharmony_ci	struct ipoib_neigh *neigh;
13478c2ecf20Sopenharmony_ci	struct ipoib_cm_tx *p;
13488c2ecf20Sopenharmony_ci	unsigned long flags;
13498c2ecf20Sopenharmony_ci	struct ipoib_path *path;
13508c2ecf20Sopenharmony_ci	int ret;
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci	struct sa_path_rec pathrec;
13538c2ecf20Sopenharmony_ci	u32 qpn;
13548c2ecf20Sopenharmony_ci
13558c2ecf20Sopenharmony_ci	netif_tx_lock_bh(dev);
13568c2ecf20Sopenharmony_ci	spin_lock_irqsave(&priv->lock, flags);
13578c2ecf20Sopenharmony_ci
13588c2ecf20Sopenharmony_ci	while (!list_empty(&priv->cm.start_list)) {
13598c2ecf20Sopenharmony_ci		p = list_entry(priv->cm.start_list.next, typeof(*p), list);
13608c2ecf20Sopenharmony_ci		list_del_init(&p->list);
13618c2ecf20Sopenharmony_ci		neigh = p->neigh;
13628c2ecf20Sopenharmony_ci
13638c2ecf20Sopenharmony_ci		qpn = IPOIB_QPN(neigh->daddr);
13648c2ecf20Sopenharmony_ci		/*
13658c2ecf20Sopenharmony_ci		 * As long as the search is with these 2 locks,
13668c2ecf20Sopenharmony_ci		 * path existence indicates its validity.
13678c2ecf20Sopenharmony_ci		 */
13688c2ecf20Sopenharmony_ci		path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET);
13698c2ecf20Sopenharmony_ci		if (!path) {
13708c2ecf20Sopenharmony_ci			pr_info("%s ignore not valid path %pI6\n",
13718c2ecf20Sopenharmony_ci				__func__,
13728c2ecf20Sopenharmony_ci				neigh->daddr + QPN_AND_OPTIONS_OFFSET);
13738c2ecf20Sopenharmony_ci			goto free_neigh;
13748c2ecf20Sopenharmony_ci		}
13758c2ecf20Sopenharmony_ci		memcpy(&pathrec, &path->pathrec, sizeof(pathrec));
13768c2ecf20Sopenharmony_ci
13778c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
13788c2ecf20Sopenharmony_ci		netif_tx_unlock_bh(dev);
13798c2ecf20Sopenharmony_ci
13808c2ecf20Sopenharmony_ci		ret = ipoib_cm_tx_init(p, qpn, &pathrec);
13818c2ecf20Sopenharmony_ci
13828c2ecf20Sopenharmony_ci		netif_tx_lock_bh(dev);
13838c2ecf20Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
13848c2ecf20Sopenharmony_ci
13858c2ecf20Sopenharmony_ci		if (ret) {
13868c2ecf20Sopenharmony_cifree_neigh:
13878c2ecf20Sopenharmony_ci			neigh = p->neigh;
13888c2ecf20Sopenharmony_ci			if (neigh) {
13898c2ecf20Sopenharmony_ci				neigh->cm = NULL;
13908c2ecf20Sopenharmony_ci				ipoib_neigh_free(neigh);
13918c2ecf20Sopenharmony_ci			}
13928c2ecf20Sopenharmony_ci			list_del(&p->list);
13938c2ecf20Sopenharmony_ci			kfree(p);
13948c2ecf20Sopenharmony_ci		}
13958c2ecf20Sopenharmony_ci	}
13968c2ecf20Sopenharmony_ci
13978c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&priv->lock, flags);
13988c2ecf20Sopenharmony_ci	netif_tx_unlock_bh(dev);
13998c2ecf20Sopenharmony_ci}
14008c2ecf20Sopenharmony_ci
14018c2ecf20Sopenharmony_cistatic void ipoib_cm_tx_reap(struct work_struct *work)
14028c2ecf20Sopenharmony_ci{
14038c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
14048c2ecf20Sopenharmony_ci						   cm.reap_task);
14058c2ecf20Sopenharmony_ci	struct net_device *dev = priv->dev;
14068c2ecf20Sopenharmony_ci	struct ipoib_cm_tx *p;
14078c2ecf20Sopenharmony_ci	unsigned long flags;
14088c2ecf20Sopenharmony_ci
14098c2ecf20Sopenharmony_ci	netif_tx_lock_bh(dev);
14108c2ecf20Sopenharmony_ci	spin_lock_irqsave(&priv->lock, flags);
14118c2ecf20Sopenharmony_ci
14128c2ecf20Sopenharmony_ci	while (!list_empty(&priv->cm.reap_list)) {
14138c2ecf20Sopenharmony_ci		p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
14148c2ecf20Sopenharmony_ci		list_del_init(&p->list);
14158c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
14168c2ecf20Sopenharmony_ci		netif_tx_unlock_bh(dev);
14178c2ecf20Sopenharmony_ci		ipoib_cm_tx_destroy(p);
14188c2ecf20Sopenharmony_ci		netif_tx_lock_bh(dev);
14198c2ecf20Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
14208c2ecf20Sopenharmony_ci	}
14218c2ecf20Sopenharmony_ci
14228c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&priv->lock, flags);
14238c2ecf20Sopenharmony_ci	netif_tx_unlock_bh(dev);
14248c2ecf20Sopenharmony_ci}
14258c2ecf20Sopenharmony_ci
14268c2ecf20Sopenharmony_cistatic void ipoib_cm_skb_reap(struct work_struct *work)
14278c2ecf20Sopenharmony_ci{
14288c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
14298c2ecf20Sopenharmony_ci						   cm.skb_task);
14308c2ecf20Sopenharmony_ci	struct net_device *dev = priv->dev;
14318c2ecf20Sopenharmony_ci	struct sk_buff *skb;
14328c2ecf20Sopenharmony_ci	unsigned long flags;
14338c2ecf20Sopenharmony_ci	unsigned int mtu = priv->mcast_mtu;
14348c2ecf20Sopenharmony_ci
14358c2ecf20Sopenharmony_ci	netif_tx_lock_bh(dev);
14368c2ecf20Sopenharmony_ci	spin_lock_irqsave(&priv->lock, flags);
14378c2ecf20Sopenharmony_ci
14388c2ecf20Sopenharmony_ci	while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
14398c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
14408c2ecf20Sopenharmony_ci		netif_tx_unlock_bh(dev);
14418c2ecf20Sopenharmony_ci
14428c2ecf20Sopenharmony_ci		if (skb->protocol == htons(ETH_P_IP)) {
14438c2ecf20Sopenharmony_ci			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
14448c2ecf20Sopenharmony_ci			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
14458c2ecf20Sopenharmony_ci		}
14468c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
14478c2ecf20Sopenharmony_ci		else if (skb->protocol == htons(ETH_P_IPV6)) {
14488c2ecf20Sopenharmony_ci			memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
14498c2ecf20Sopenharmony_ci			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
14508c2ecf20Sopenharmony_ci		}
14518c2ecf20Sopenharmony_ci#endif
14528c2ecf20Sopenharmony_ci		dev_kfree_skb_any(skb);
14538c2ecf20Sopenharmony_ci
14548c2ecf20Sopenharmony_ci		netif_tx_lock_bh(dev);
14558c2ecf20Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
14568c2ecf20Sopenharmony_ci	}
14578c2ecf20Sopenharmony_ci
14588c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&priv->lock, flags);
14598c2ecf20Sopenharmony_ci	netif_tx_unlock_bh(dev);
14608c2ecf20Sopenharmony_ci}
14618c2ecf20Sopenharmony_ci
14628c2ecf20Sopenharmony_civoid ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
14638c2ecf20Sopenharmony_ci			   unsigned int mtu)
14648c2ecf20Sopenharmony_ci{
14658c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
14668c2ecf20Sopenharmony_ci	int e = skb_queue_empty(&priv->cm.skb_queue);
14678c2ecf20Sopenharmony_ci
14688c2ecf20Sopenharmony_ci	skb_dst_update_pmtu(skb, mtu);
14698c2ecf20Sopenharmony_ci
14708c2ecf20Sopenharmony_ci	skb_queue_tail(&priv->cm.skb_queue, skb);
14718c2ecf20Sopenharmony_ci	if (e)
14728c2ecf20Sopenharmony_ci		queue_work(priv->wq, &priv->cm.skb_task);
14738c2ecf20Sopenharmony_ci}
14748c2ecf20Sopenharmony_ci
14758c2ecf20Sopenharmony_cistatic void ipoib_cm_rx_reap(struct work_struct *work)
14768c2ecf20Sopenharmony_ci{
14778c2ecf20Sopenharmony_ci	ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv,
14788c2ecf20Sopenharmony_ci						cm.rx_reap_task)->dev);
14798c2ecf20Sopenharmony_ci}
14808c2ecf20Sopenharmony_ci
14818c2ecf20Sopenharmony_cistatic void ipoib_cm_stale_task(struct work_struct *work)
14828c2ecf20Sopenharmony_ci{
14838c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
14848c2ecf20Sopenharmony_ci						   cm.stale_task.work);
14858c2ecf20Sopenharmony_ci	struct ipoib_cm_rx *p;
14868c2ecf20Sopenharmony_ci	int ret;
14878c2ecf20Sopenharmony_ci
14888c2ecf20Sopenharmony_ci	spin_lock_irq(&priv->lock);
14898c2ecf20Sopenharmony_ci	while (!list_empty(&priv->cm.passive_ids)) {
14908c2ecf20Sopenharmony_ci		/* List is sorted by LRU, start from tail,
14918c2ecf20Sopenharmony_ci		 * stop when we see a recently used entry */
14928c2ecf20Sopenharmony_ci		p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
14938c2ecf20Sopenharmony_ci		if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
14948c2ecf20Sopenharmony_ci			break;
14958c2ecf20Sopenharmony_ci		list_move(&p->list, &priv->cm.rx_error_list);
14968c2ecf20Sopenharmony_ci		p->state = IPOIB_CM_RX_ERROR;
14978c2ecf20Sopenharmony_ci		spin_unlock_irq(&priv->lock);
14988c2ecf20Sopenharmony_ci		ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
14998c2ecf20Sopenharmony_ci		if (ret)
15008c2ecf20Sopenharmony_ci			ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
15018c2ecf20Sopenharmony_ci		spin_lock_irq(&priv->lock);
15028c2ecf20Sopenharmony_ci	}
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_ci	if (!list_empty(&priv->cm.passive_ids))
15058c2ecf20Sopenharmony_ci		queue_delayed_work(priv->wq,
15068c2ecf20Sopenharmony_ci				   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
15078c2ecf20Sopenharmony_ci	spin_unlock_irq(&priv->lock);
15088c2ecf20Sopenharmony_ci}
15098c2ecf20Sopenharmony_ci
15108c2ecf20Sopenharmony_cistatic ssize_t show_mode(struct device *d, struct device_attribute *attr,
15118c2ecf20Sopenharmony_ci			 char *buf)
15128c2ecf20Sopenharmony_ci{
15138c2ecf20Sopenharmony_ci	struct net_device *dev = to_net_dev(d);
15148c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
15158c2ecf20Sopenharmony_ci
15168c2ecf20Sopenharmony_ci	if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
15178c2ecf20Sopenharmony_ci		return sprintf(buf, "connected\n");
15188c2ecf20Sopenharmony_ci	else
15198c2ecf20Sopenharmony_ci		return sprintf(buf, "datagram\n");
15208c2ecf20Sopenharmony_ci}
15218c2ecf20Sopenharmony_ci
15228c2ecf20Sopenharmony_cistatic ssize_t set_mode(struct device *d, struct device_attribute *attr,
15238c2ecf20Sopenharmony_ci			const char *buf, size_t count)
15248c2ecf20Sopenharmony_ci{
15258c2ecf20Sopenharmony_ci	struct net_device *dev = to_net_dev(d);
15268c2ecf20Sopenharmony_ci	int ret;
15278c2ecf20Sopenharmony_ci
15288c2ecf20Sopenharmony_ci	if (!rtnl_trylock()) {
15298c2ecf20Sopenharmony_ci		return restart_syscall();
15308c2ecf20Sopenharmony_ci	}
15318c2ecf20Sopenharmony_ci
15328c2ecf20Sopenharmony_ci	if (dev->reg_state != NETREG_REGISTERED) {
15338c2ecf20Sopenharmony_ci		rtnl_unlock();
15348c2ecf20Sopenharmony_ci		return -EPERM;
15358c2ecf20Sopenharmony_ci	}
15368c2ecf20Sopenharmony_ci
15378c2ecf20Sopenharmony_ci	ret = ipoib_set_mode(dev, buf);
15388c2ecf20Sopenharmony_ci
15398c2ecf20Sopenharmony_ci	/* The assumption is that the function ipoib_set_mode returned
15408c2ecf20Sopenharmony_ci	 * with the rtnl held by it, if not the value -EBUSY returned,
15418c2ecf20Sopenharmony_ci	 * then no need to rtnl_unlock
15428c2ecf20Sopenharmony_ci	 */
15438c2ecf20Sopenharmony_ci	if (ret != -EBUSY)
15448c2ecf20Sopenharmony_ci		rtnl_unlock();
15458c2ecf20Sopenharmony_ci
15468c2ecf20Sopenharmony_ci	return (!ret || ret == -EBUSY) ? count : ret;
15478c2ecf20Sopenharmony_ci}
15488c2ecf20Sopenharmony_ci
15498c2ecf20Sopenharmony_cistatic DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
15508c2ecf20Sopenharmony_ci
15518c2ecf20Sopenharmony_ciint ipoib_cm_add_mode_attr(struct net_device *dev)
15528c2ecf20Sopenharmony_ci{
15538c2ecf20Sopenharmony_ci	return device_create_file(&dev->dev, &dev_attr_mode);
15548c2ecf20Sopenharmony_ci}
15558c2ecf20Sopenharmony_ci
15568c2ecf20Sopenharmony_cistatic void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
15578c2ecf20Sopenharmony_ci{
15588c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
15598c2ecf20Sopenharmony_ci	struct ib_srq_init_attr srq_init_attr = {
15608c2ecf20Sopenharmony_ci		.srq_type = IB_SRQT_BASIC,
15618c2ecf20Sopenharmony_ci		.attr = {
15628c2ecf20Sopenharmony_ci			.max_wr  = ipoib_recvq_size,
15638c2ecf20Sopenharmony_ci			.max_sge = max_sge
15648c2ecf20Sopenharmony_ci		}
15658c2ecf20Sopenharmony_ci	};
15668c2ecf20Sopenharmony_ci
15678c2ecf20Sopenharmony_ci	priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
15688c2ecf20Sopenharmony_ci	if (IS_ERR(priv->cm.srq)) {
15698c2ecf20Sopenharmony_ci		if (PTR_ERR(priv->cm.srq) != -EOPNOTSUPP)
15708c2ecf20Sopenharmony_ci			pr_warn("%s: failed to allocate SRQ, error %ld\n",
15718c2ecf20Sopenharmony_ci			       priv->ca->name, PTR_ERR(priv->cm.srq));
15728c2ecf20Sopenharmony_ci		priv->cm.srq = NULL;
15738c2ecf20Sopenharmony_ci		return;
15748c2ecf20Sopenharmony_ci	}
15758c2ecf20Sopenharmony_ci
15768c2ecf20Sopenharmony_ci	priv->cm.srq_ring = vzalloc(array_size(ipoib_recvq_size,
15778c2ecf20Sopenharmony_ci					       sizeof(*priv->cm.srq_ring)));
15788c2ecf20Sopenharmony_ci	if (!priv->cm.srq_ring) {
15798c2ecf20Sopenharmony_ci		ib_destroy_srq(priv->cm.srq);
15808c2ecf20Sopenharmony_ci		priv->cm.srq = NULL;
15818c2ecf20Sopenharmony_ci		return;
15828c2ecf20Sopenharmony_ci	}
15838c2ecf20Sopenharmony_ci
15848c2ecf20Sopenharmony_ci}
15858c2ecf20Sopenharmony_ci
15868c2ecf20Sopenharmony_ciint ipoib_cm_dev_init(struct net_device *dev)
15878c2ecf20Sopenharmony_ci{
15888c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
15898c2ecf20Sopenharmony_ci	int max_srq_sge, i;
15908c2ecf20Sopenharmony_ci
15918c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.passive_ids);
15928c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.reap_list);
15938c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.start_list);
15948c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.rx_error_list);
15958c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.rx_flush_list);
15968c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.rx_drain_list);
15978c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.rx_reap_list);
15988c2ecf20Sopenharmony_ci	INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
15998c2ecf20Sopenharmony_ci	INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
16008c2ecf20Sopenharmony_ci	INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
16018c2ecf20Sopenharmony_ci	INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap);
16028c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
16038c2ecf20Sopenharmony_ci
16048c2ecf20Sopenharmony_ci	skb_queue_head_init(&priv->cm.skb_queue);
16058c2ecf20Sopenharmony_ci
16068c2ecf20Sopenharmony_ci	ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
16078c2ecf20Sopenharmony_ci
16088c2ecf20Sopenharmony_ci	max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
16098c2ecf20Sopenharmony_ci	ipoib_cm_create_srq(dev, max_srq_sge);
16108c2ecf20Sopenharmony_ci	if (ipoib_cm_has_srq(dev)) {
16118c2ecf20Sopenharmony_ci		priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
16128c2ecf20Sopenharmony_ci		priv->cm.num_frags  = max_srq_sge;
16138c2ecf20Sopenharmony_ci		ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
16148c2ecf20Sopenharmony_ci			  priv->cm.max_cm_mtu, priv->cm.num_frags);
16158c2ecf20Sopenharmony_ci	} else {
16168c2ecf20Sopenharmony_ci		priv->cm.max_cm_mtu = IPOIB_CM_MTU;
16178c2ecf20Sopenharmony_ci		priv->cm.num_frags  = IPOIB_CM_RX_SG;
16188c2ecf20Sopenharmony_ci	}
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci	ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
16218c2ecf20Sopenharmony_ci
16228c2ecf20Sopenharmony_ci	if (ipoib_cm_has_srq(dev)) {
16238c2ecf20Sopenharmony_ci		for (i = 0; i < ipoib_recvq_size; ++i) {
16248c2ecf20Sopenharmony_ci			if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
16258c2ecf20Sopenharmony_ci						   priv->cm.num_frags - 1,
16268c2ecf20Sopenharmony_ci						   priv->cm.srq_ring[i].mapping,
16278c2ecf20Sopenharmony_ci						   GFP_KERNEL)) {
16288c2ecf20Sopenharmony_ci				ipoib_warn(priv, "failed to allocate "
16298c2ecf20Sopenharmony_ci					   "receive buffer %d\n", i);
16308c2ecf20Sopenharmony_ci				ipoib_cm_dev_cleanup(dev);
16318c2ecf20Sopenharmony_ci				return -ENOMEM;
16328c2ecf20Sopenharmony_ci			}
16338c2ecf20Sopenharmony_ci
16348c2ecf20Sopenharmony_ci			if (ipoib_cm_post_receive_srq(dev, i)) {
16358c2ecf20Sopenharmony_ci				ipoib_warn(priv, "ipoib_cm_post_receive_srq "
16368c2ecf20Sopenharmony_ci					   "failed for buf %d\n", i);
16378c2ecf20Sopenharmony_ci				ipoib_cm_dev_cleanup(dev);
16388c2ecf20Sopenharmony_ci				return -EIO;
16398c2ecf20Sopenharmony_ci			}
16408c2ecf20Sopenharmony_ci		}
16418c2ecf20Sopenharmony_ci	}
16428c2ecf20Sopenharmony_ci
16438c2ecf20Sopenharmony_ci	priv->dev->dev_addr[0] = IPOIB_FLAGS_RC;
16448c2ecf20Sopenharmony_ci	return 0;
16458c2ecf20Sopenharmony_ci}
16468c2ecf20Sopenharmony_ci
16478c2ecf20Sopenharmony_civoid ipoib_cm_dev_cleanup(struct net_device *dev)
16488c2ecf20Sopenharmony_ci{
16498c2ecf20Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
16508c2ecf20Sopenharmony_ci
16518c2ecf20Sopenharmony_ci	if (!priv->cm.srq)
16528c2ecf20Sopenharmony_ci		return;
16538c2ecf20Sopenharmony_ci
16548c2ecf20Sopenharmony_ci	ipoib_dbg(priv, "Cleanup ipoib connected mode.\n");
16558c2ecf20Sopenharmony_ci
16568c2ecf20Sopenharmony_ci	ib_destroy_srq(priv->cm.srq);
16578c2ecf20Sopenharmony_ci	priv->cm.srq = NULL;
16588c2ecf20Sopenharmony_ci	if (!priv->cm.srq_ring)
16598c2ecf20Sopenharmony_ci		return;
16608c2ecf20Sopenharmony_ci
16618c2ecf20Sopenharmony_ci	ipoib_cm_free_rx_ring(dev, priv->cm.srq_ring);
16628c2ecf20Sopenharmony_ci	priv->cm.srq_ring = NULL;
16638c2ecf20Sopenharmony_ci}
1664