162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright (c) 2006 Mellanox Technologies. All rights reserved
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * This software is available to you under a choice of one of two
562306a36Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the
862306a36Sopenharmony_ci * OpenIB.org BSD license below:
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
1162306a36Sopenharmony_ci *     without modification, are permitted provided that the following
1262306a36Sopenharmony_ci *     conditions are met:
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci *      - Redistributions of source code must retain the above
1562306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
1662306a36Sopenharmony_ci *        disclaimer.
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
1962306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
2062306a36Sopenharmony_ci *        disclaimer in the documentation and/or other materials
2162306a36Sopenharmony_ci *        provided with the distribution.
2262306a36Sopenharmony_ci *
2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3062306a36Sopenharmony_ci * SOFTWARE.
3162306a36Sopenharmony_ci */
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#include <rdma/ib_cm.h>
3462306a36Sopenharmony_ci#include <net/dst.h>
3562306a36Sopenharmony_ci#include <net/icmp.h>
3662306a36Sopenharmony_ci#include <linux/icmpv6.h>
3762306a36Sopenharmony_ci#include <linux/delay.h>
3862306a36Sopenharmony_ci#include <linux/slab.h>
3962306a36Sopenharmony_ci#include <linux/vmalloc.h>
4062306a36Sopenharmony_ci#include <linux/moduleparam.h>
4162306a36Sopenharmony_ci#include <linux/sched/signal.h>
4262306a36Sopenharmony_ci#include <linux/sched/mm.h>
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci#include "ipoib.h"
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ciint ipoib_max_conn_qp = 128;
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_cimodule_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444);
4962306a36Sopenharmony_ciMODULE_PARM_DESC(max_nonsrq_conn_qp,
5062306a36Sopenharmony_ci		 "Max number of connected-mode QPs per interface "
5162306a36Sopenharmony_ci		 "(applied only if shared receive queue is not available)");
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
5462306a36Sopenharmony_cistatic int data_debug_level;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_cimodule_param_named(cm_data_debug_level, data_debug_level, int, 0644);
5762306a36Sopenharmony_ciMODULE_PARM_DESC(cm_data_debug_level,
5862306a36Sopenharmony_ci		 "Enable data path debug tracing for connected mode if > 0");
5962306a36Sopenharmony_ci#endif
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci#define IPOIB_CM_IETF_ID 0x1000000000000000ULL
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ)
6462306a36Sopenharmony_ci#define IPOIB_CM_RX_TIMEOUT     (2 * 256 * HZ)
6562306a36Sopenharmony_ci#define IPOIB_CM_RX_DELAY       (3 * 256 * HZ)
6662306a36Sopenharmony_ci#define IPOIB_CM_RX_UPDATE_MASK (0x3)
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci#define IPOIB_CM_RX_RESERVE     (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN)
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_cistatic struct ib_qp_attr ipoib_cm_err_attr = {
7162306a36Sopenharmony_ci	.qp_state = IB_QPS_ERR
7262306a36Sopenharmony_ci};
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_cistatic struct ib_send_wr ipoib_cm_rx_drain_wr = {
7762306a36Sopenharmony_ci	.opcode = IB_WR_SEND,
7862306a36Sopenharmony_ci};
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_cistatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
8162306a36Sopenharmony_ci			       const struct ib_cm_event *event);
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_cistatic void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
8462306a36Sopenharmony_ci				  u64 mapping[IPOIB_CM_RX_SG])
8562306a36Sopenharmony_ci{
8662306a36Sopenharmony_ci	int i;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	for (i = 0; i < frags; ++i)
9162306a36Sopenharmony_ci		ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
9262306a36Sopenharmony_ci}
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_cistatic int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
9762306a36Sopenharmony_ci	int i, ret;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	for (i = 0; i < priv->cm.num_frags; ++i)
10262306a36Sopenharmony_ci		priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, NULL);
10562306a36Sopenharmony_ci	if (unlikely(ret)) {
10662306a36Sopenharmony_ci		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
10762306a36Sopenharmony_ci		ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
10862306a36Sopenharmony_ci				      priv->cm.srq_ring[id].mapping);
10962306a36Sopenharmony_ci		dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
11062306a36Sopenharmony_ci		priv->cm.srq_ring[id].skb = NULL;
11162306a36Sopenharmony_ci	}
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	return ret;
11462306a36Sopenharmony_ci}
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_cistatic int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
11762306a36Sopenharmony_ci					struct ipoib_cm_rx *rx,
11862306a36Sopenharmony_ci					struct ib_recv_wr *wr,
11962306a36Sopenharmony_ci					struct ib_sge *sge, int id)
12062306a36Sopenharmony_ci{
12162306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
12262306a36Sopenharmony_ci	int i, ret;
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	for (i = 0; i < IPOIB_CM_RX_SG; ++i)
12762306a36Sopenharmony_ci		sge[i].addr = rx->rx_ring[id].mapping[i];
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	ret = ib_post_recv(rx->qp, wr, NULL);
13062306a36Sopenharmony_ci	if (unlikely(ret)) {
13162306a36Sopenharmony_ci		ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
13262306a36Sopenharmony_ci		ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
13362306a36Sopenharmony_ci				      rx->rx_ring[id].mapping);
13462306a36Sopenharmony_ci		dev_kfree_skb_any(rx->rx_ring[id].skb);
13562306a36Sopenharmony_ci		rx->rx_ring[id].skb = NULL;
13662306a36Sopenharmony_ci	}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	return ret;
13962306a36Sopenharmony_ci}
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_cistatic struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
14262306a36Sopenharmony_ci					     struct ipoib_cm_rx_buf *rx_ring,
14362306a36Sopenharmony_ci					     int id, int frags,
14462306a36Sopenharmony_ci					     u64 mapping[IPOIB_CM_RX_SG],
14562306a36Sopenharmony_ci					     gfp_t gfp)
14662306a36Sopenharmony_ci{
14762306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
14862306a36Sopenharmony_ci	struct sk_buff *skb;
14962306a36Sopenharmony_ci	int i;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16));
15262306a36Sopenharmony_ci	if (unlikely(!skb))
15362306a36Sopenharmony_ci		return NULL;
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	/*
15662306a36Sopenharmony_ci	 * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the
15762306a36Sopenharmony_ci	 * IP header to a multiple of 16.
15862306a36Sopenharmony_ci	 */
15962306a36Sopenharmony_ci	skb_reserve(skb, IPOIB_CM_RX_RESERVE);
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
16262306a36Sopenharmony_ci				       DMA_FROM_DEVICE);
16362306a36Sopenharmony_ci	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
16462306a36Sopenharmony_ci		dev_kfree_skb_any(skb);
16562306a36Sopenharmony_ci		return NULL;
16662306a36Sopenharmony_ci	}
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	for (i = 0; i < frags; i++) {
16962306a36Sopenharmony_ci		struct page *page = alloc_page(gfp);
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci		if (!page)
17262306a36Sopenharmony_ci			goto partial_error;
17362306a36Sopenharmony_ci		skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci		mapping[i + 1] = ib_dma_map_page(priv->ca, page,
17662306a36Sopenharmony_ci						 0, PAGE_SIZE, DMA_FROM_DEVICE);
17762306a36Sopenharmony_ci		if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
17862306a36Sopenharmony_ci			goto partial_error;
17962306a36Sopenharmony_ci	}
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	rx_ring[id].skb = skb;
18262306a36Sopenharmony_ci	return skb;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cipartial_error:
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	for (; i > 0; --i)
18962306a36Sopenharmony_ci		ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	dev_kfree_skb_any(skb);
19262306a36Sopenharmony_ci	return NULL;
19362306a36Sopenharmony_ci}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_cistatic void ipoib_cm_free_rx_ring(struct net_device *dev,
19662306a36Sopenharmony_ci				  struct ipoib_cm_rx_buf *rx_ring)
19762306a36Sopenharmony_ci{
19862306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
19962306a36Sopenharmony_ci	int i;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	for (i = 0; i < ipoib_recvq_size; ++i)
20262306a36Sopenharmony_ci		if (rx_ring[i].skb) {
20362306a36Sopenharmony_ci			ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
20462306a36Sopenharmony_ci					      rx_ring[i].mapping);
20562306a36Sopenharmony_ci			dev_kfree_skb_any(rx_ring[i].skb);
20662306a36Sopenharmony_ci		}
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	vfree(rx_ring);
20962306a36Sopenharmony_ci}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_cistatic void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
21262306a36Sopenharmony_ci{
21362306a36Sopenharmony_ci	struct ipoib_cm_rx *p;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	/* We only reserved 1 extra slot in CQ for drain WRs, so
21662306a36Sopenharmony_ci	 * make sure we have at most 1 outstanding WR. */
21762306a36Sopenharmony_ci	if (list_empty(&priv->cm.rx_flush_list) ||
21862306a36Sopenharmony_ci	    !list_empty(&priv->cm.rx_drain_list))
21962306a36Sopenharmony_ci		return;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	/*
22262306a36Sopenharmony_ci	 * QPs on flush list are error state.  This way, a "flush
22362306a36Sopenharmony_ci	 * error" WC will be immediately generated for each WR we post.
22462306a36Sopenharmony_ci	 */
22562306a36Sopenharmony_ci	p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
22662306a36Sopenharmony_ci	ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
22762306a36Sopenharmony_ci	if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL))
22862306a36Sopenharmony_ci		ipoib_warn(priv, "failed to post drain wr\n");
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_cistatic void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	struct ipoib_cm_rx *p = ctx;
23662306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
23762306a36Sopenharmony_ci	unsigned long flags;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
24062306a36Sopenharmony_ci		return;
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	spin_lock_irqsave(&priv->lock, flags);
24362306a36Sopenharmony_ci	list_move(&p->list, &priv->cm.rx_flush_list);
24462306a36Sopenharmony_ci	p->state = IPOIB_CM_RX_FLUSH;
24562306a36Sopenharmony_ci	ipoib_cm_start_rx_drain(priv);
24662306a36Sopenharmony_ci	spin_unlock_irqrestore(&priv->lock, flags);
24762306a36Sopenharmony_ci}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_cistatic struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
25062306a36Sopenharmony_ci					   struct ipoib_cm_rx *p)
25162306a36Sopenharmony_ci{
25262306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
25362306a36Sopenharmony_ci	struct ib_qp_init_attr attr = {
25462306a36Sopenharmony_ci		.event_handler = ipoib_cm_rx_event_handler,
25562306a36Sopenharmony_ci		.send_cq = priv->recv_cq, /* For drain WR */
25662306a36Sopenharmony_ci		.recv_cq = priv->recv_cq,
25762306a36Sopenharmony_ci		.srq = priv->cm.srq,
25862306a36Sopenharmony_ci		.cap.max_send_wr = 1, /* For drain WR */
25962306a36Sopenharmony_ci		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
26062306a36Sopenharmony_ci		.sq_sig_type = IB_SIGNAL_ALL_WR,
26162306a36Sopenharmony_ci		.qp_type = IB_QPT_RC,
26262306a36Sopenharmony_ci		.qp_context = p,
26362306a36Sopenharmony_ci	};
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	if (!ipoib_cm_has_srq(dev)) {
26662306a36Sopenharmony_ci		attr.cap.max_recv_wr  = ipoib_recvq_size;
26762306a36Sopenharmony_ci		attr.cap.max_recv_sge = IPOIB_CM_RX_SG;
26862306a36Sopenharmony_ci	}
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	return ib_create_qp(priv->pd, &attr);
27162306a36Sopenharmony_ci}
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_cistatic int ipoib_cm_modify_rx_qp(struct net_device *dev,
27462306a36Sopenharmony_ci				 struct ib_cm_id *cm_id, struct ib_qp *qp,
27562306a36Sopenharmony_ci				 unsigned int psn)
27662306a36Sopenharmony_ci{
27762306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
27862306a36Sopenharmony_ci	struct ib_qp_attr qp_attr;
27962306a36Sopenharmony_ci	int qp_attr_mask, ret;
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	qp_attr.qp_state = IB_QPS_INIT;
28262306a36Sopenharmony_ci	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
28362306a36Sopenharmony_ci	if (ret) {
28462306a36Sopenharmony_ci		ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
28562306a36Sopenharmony_ci		return ret;
28662306a36Sopenharmony_ci	}
28762306a36Sopenharmony_ci	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
28862306a36Sopenharmony_ci	if (ret) {
28962306a36Sopenharmony_ci		ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret);
29062306a36Sopenharmony_ci		return ret;
29162306a36Sopenharmony_ci	}
29262306a36Sopenharmony_ci	qp_attr.qp_state = IB_QPS_RTR;
29362306a36Sopenharmony_ci	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
29462306a36Sopenharmony_ci	if (ret) {
29562306a36Sopenharmony_ci		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
29662306a36Sopenharmony_ci		return ret;
29762306a36Sopenharmony_ci	}
29862306a36Sopenharmony_ci	qp_attr.rq_psn = psn;
29962306a36Sopenharmony_ci	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
30062306a36Sopenharmony_ci	if (ret) {
30162306a36Sopenharmony_ci		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
30262306a36Sopenharmony_ci		return ret;
30362306a36Sopenharmony_ci	}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	/*
30662306a36Sopenharmony_ci	 * Current Mellanox HCA firmware won't generate completions
30762306a36Sopenharmony_ci	 * with error for drain WRs unless the QP has been moved to
30862306a36Sopenharmony_ci	 * RTS first. This work-around leaves a window where a QP has
30962306a36Sopenharmony_ci	 * moved to error asynchronously, but this will eventually get
31062306a36Sopenharmony_ci	 * fixed in firmware, so let's not error out if modify QP
31162306a36Sopenharmony_ci	 * fails.
31262306a36Sopenharmony_ci	 */
31362306a36Sopenharmony_ci	qp_attr.qp_state = IB_QPS_RTS;
31462306a36Sopenharmony_ci	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
31562306a36Sopenharmony_ci	if (ret) {
31662306a36Sopenharmony_ci		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
31762306a36Sopenharmony_ci		return 0;
31862306a36Sopenharmony_ci	}
31962306a36Sopenharmony_ci	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
32062306a36Sopenharmony_ci	if (ret) {
32162306a36Sopenharmony_ci		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
32262306a36Sopenharmony_ci		return 0;
32362306a36Sopenharmony_ci	}
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	return 0;
32662306a36Sopenharmony_ci}
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_cistatic void ipoib_cm_init_rx_wr(struct net_device *dev,
32962306a36Sopenharmony_ci				struct ib_recv_wr *wr,
33062306a36Sopenharmony_ci				struct ib_sge *sge)
33162306a36Sopenharmony_ci{
33262306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
33362306a36Sopenharmony_ci	int i;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	for (i = 0; i < priv->cm.num_frags; ++i)
33662306a36Sopenharmony_ci		sge[i].lkey = priv->pd->local_dma_lkey;
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	sge[0].length = IPOIB_CM_HEAD_SIZE;
33962306a36Sopenharmony_ci	for (i = 1; i < priv->cm.num_frags; ++i)
34062306a36Sopenharmony_ci		sge[i].length = PAGE_SIZE;
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	wr->next    = NULL;
34362306a36Sopenharmony_ci	wr->sg_list = sge;
34462306a36Sopenharmony_ci	wr->num_sge = priv->cm.num_frags;
34562306a36Sopenharmony_ci}
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_cistatic int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
34862306a36Sopenharmony_ci				   struct ipoib_cm_rx *rx)
34962306a36Sopenharmony_ci{
35062306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
35162306a36Sopenharmony_ci	struct {
35262306a36Sopenharmony_ci		struct ib_recv_wr wr;
35362306a36Sopenharmony_ci		struct ib_sge sge[IPOIB_CM_RX_SG];
35462306a36Sopenharmony_ci	} *t;
35562306a36Sopenharmony_ci	int ret;
35662306a36Sopenharmony_ci	int i;
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_ci	rx->rx_ring = vzalloc(array_size(ipoib_recvq_size,
35962306a36Sopenharmony_ci					 sizeof(*rx->rx_ring)));
36062306a36Sopenharmony_ci	if (!rx->rx_ring)
36162306a36Sopenharmony_ci		return -ENOMEM;
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci	t = kmalloc(sizeof(*t), GFP_KERNEL);
36462306a36Sopenharmony_ci	if (!t) {
36562306a36Sopenharmony_ci		ret = -ENOMEM;
36662306a36Sopenharmony_ci		goto err_free_1;
36762306a36Sopenharmony_ci	}
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	spin_lock_irq(&priv->lock);
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
37462306a36Sopenharmony_ci		spin_unlock_irq(&priv->lock);
37562306a36Sopenharmony_ci		ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0);
37662306a36Sopenharmony_ci		ret = -EINVAL;
37762306a36Sopenharmony_ci		goto err_free;
37862306a36Sopenharmony_ci	} else
37962306a36Sopenharmony_ci		++priv->cm.nonsrq_conn_qp;
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci	spin_unlock_irq(&priv->lock);
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci	for (i = 0; i < ipoib_recvq_size; ++i) {
38462306a36Sopenharmony_ci		if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
38562306a36Sopenharmony_ci					   rx->rx_ring[i].mapping,
38662306a36Sopenharmony_ci					   GFP_KERNEL)) {
38762306a36Sopenharmony_ci			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
38862306a36Sopenharmony_ci			ret = -ENOMEM;
38962306a36Sopenharmony_ci			goto err_count;
39062306a36Sopenharmony_ci		}
39162306a36Sopenharmony_ci		ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
39262306a36Sopenharmony_ci		if (ret) {
39362306a36Sopenharmony_ci			ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
39462306a36Sopenharmony_ci				   "failed for buf %d\n", i);
39562306a36Sopenharmony_ci			ret = -EIO;
39662306a36Sopenharmony_ci			goto err_count;
39762306a36Sopenharmony_ci		}
39862306a36Sopenharmony_ci	}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	rx->recv_count = ipoib_recvq_size;
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	kfree(t);
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	return 0;
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_cierr_count:
40762306a36Sopenharmony_ci	spin_lock_irq(&priv->lock);
40862306a36Sopenharmony_ci	--priv->cm.nonsrq_conn_qp;
40962306a36Sopenharmony_ci	spin_unlock_irq(&priv->lock);
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_cierr_free:
41262306a36Sopenharmony_ci	kfree(t);
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_cierr_free_1:
41562306a36Sopenharmony_ci	ipoib_cm_free_rx_ring(dev, rx->rx_ring);
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	return ret;
41862306a36Sopenharmony_ci}
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_cistatic int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
42162306a36Sopenharmony_ci			     struct ib_qp *qp,
42262306a36Sopenharmony_ci			     const struct ib_cm_req_event_param *req,
42362306a36Sopenharmony_ci			     unsigned int psn)
42462306a36Sopenharmony_ci{
42562306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
42662306a36Sopenharmony_ci	struct ipoib_cm_data data = {};
42762306a36Sopenharmony_ci	struct ib_cm_rep_param rep = {};
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	data.qpn = cpu_to_be32(priv->qp->qp_num);
43062306a36Sopenharmony_ci	data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	rep.private_data = &data;
43362306a36Sopenharmony_ci	rep.private_data_len = sizeof(data);
43462306a36Sopenharmony_ci	rep.flow_control = 0;
43562306a36Sopenharmony_ci	rep.rnr_retry_count = req->rnr_retry_count;
43662306a36Sopenharmony_ci	rep.srq = ipoib_cm_has_srq(dev);
43762306a36Sopenharmony_ci	rep.qp_num = qp->qp_num;
43862306a36Sopenharmony_ci	rep.starting_psn = psn;
43962306a36Sopenharmony_ci	return ib_send_cm_rep(cm_id, &rep);
44062306a36Sopenharmony_ci}
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_cistatic int ipoib_cm_req_handler(struct ib_cm_id *cm_id,
44362306a36Sopenharmony_ci				const struct ib_cm_event *event)
44462306a36Sopenharmony_ci{
44562306a36Sopenharmony_ci	struct net_device *dev = cm_id->context;
44662306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
44762306a36Sopenharmony_ci	struct ipoib_cm_rx *p;
44862306a36Sopenharmony_ci	unsigned int psn;
44962306a36Sopenharmony_ci	int ret;
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci	ipoib_dbg(priv, "REQ arrived\n");
45262306a36Sopenharmony_ci	p = kzalloc(sizeof(*p), GFP_KERNEL);
45362306a36Sopenharmony_ci	if (!p)
45462306a36Sopenharmony_ci		return -ENOMEM;
45562306a36Sopenharmony_ci	p->dev = dev;
45662306a36Sopenharmony_ci	p->id = cm_id;
45762306a36Sopenharmony_ci	cm_id->context = p;
45862306a36Sopenharmony_ci	p->state = IPOIB_CM_RX_LIVE;
45962306a36Sopenharmony_ci	p->jiffies = jiffies;
46062306a36Sopenharmony_ci	INIT_LIST_HEAD(&p->list);
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	p->qp = ipoib_cm_create_rx_qp(dev, p);
46362306a36Sopenharmony_ci	if (IS_ERR(p->qp)) {
46462306a36Sopenharmony_ci		ret = PTR_ERR(p->qp);
46562306a36Sopenharmony_ci		goto err_qp;
46662306a36Sopenharmony_ci	}
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	psn = get_random_u32() & 0xffffff;
46962306a36Sopenharmony_ci	ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
47062306a36Sopenharmony_ci	if (ret)
47162306a36Sopenharmony_ci		goto err_modify;
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	if (!ipoib_cm_has_srq(dev)) {
47462306a36Sopenharmony_ci		ret = ipoib_cm_nonsrq_init_rx(dev, cm_id, p);
47562306a36Sopenharmony_ci		if (ret)
47662306a36Sopenharmony_ci			goto err_modify;
47762306a36Sopenharmony_ci	}
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	spin_lock_irq(&priv->lock);
48062306a36Sopenharmony_ci	queue_delayed_work(priv->wq,
48162306a36Sopenharmony_ci			   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
48262306a36Sopenharmony_ci	/* Add this entry to passive ids list head, but do not re-add it
48362306a36Sopenharmony_ci	 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
48462306a36Sopenharmony_ci	p->jiffies = jiffies;
48562306a36Sopenharmony_ci	if (p->state == IPOIB_CM_RX_LIVE)
48662306a36Sopenharmony_ci		list_move(&p->list, &priv->cm.passive_ids);
48762306a36Sopenharmony_ci	spin_unlock_irq(&priv->lock);
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci	ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
49062306a36Sopenharmony_ci	if (ret) {
49162306a36Sopenharmony_ci		ipoib_warn(priv, "failed to send REP: %d\n", ret);
49262306a36Sopenharmony_ci		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
49362306a36Sopenharmony_ci			ipoib_warn(priv, "unable to move qp to error state\n");
49462306a36Sopenharmony_ci	}
49562306a36Sopenharmony_ci	return 0;
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_cierr_modify:
49862306a36Sopenharmony_ci	ib_destroy_qp(p->qp);
49962306a36Sopenharmony_cierr_qp:
50062306a36Sopenharmony_ci	kfree(p);
50162306a36Sopenharmony_ci	return ret;
50262306a36Sopenharmony_ci}
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_cistatic int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
50562306a36Sopenharmony_ci			       const struct ib_cm_event *event)
50662306a36Sopenharmony_ci{
50762306a36Sopenharmony_ci	struct ipoib_cm_rx *p;
50862306a36Sopenharmony_ci	struct ipoib_dev_priv *priv;
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci	switch (event->event) {
51162306a36Sopenharmony_ci	case IB_CM_REQ_RECEIVED:
51262306a36Sopenharmony_ci		return ipoib_cm_req_handler(cm_id, event);
51362306a36Sopenharmony_ci	case IB_CM_DREQ_RECEIVED:
51462306a36Sopenharmony_ci		ib_send_cm_drep(cm_id, NULL, 0);
51562306a36Sopenharmony_ci		fallthrough;
51662306a36Sopenharmony_ci	case IB_CM_REJ_RECEIVED:
51762306a36Sopenharmony_ci		p = cm_id->context;
51862306a36Sopenharmony_ci		priv = ipoib_priv(p->dev);
51962306a36Sopenharmony_ci		if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
52062306a36Sopenharmony_ci			ipoib_warn(priv, "unable to move qp to error state\n");
52162306a36Sopenharmony_ci		fallthrough;
52262306a36Sopenharmony_ci	default:
52362306a36Sopenharmony_ci		return 0;
52462306a36Sopenharmony_ci	}
52562306a36Sopenharmony_ci}
52662306a36Sopenharmony_ci/* Adjust length of skb with fragments to match received data */
52762306a36Sopenharmony_cistatic void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
52862306a36Sopenharmony_ci			  unsigned int length, struct sk_buff *toskb)
52962306a36Sopenharmony_ci{
53062306a36Sopenharmony_ci	int i, num_frags;
53162306a36Sopenharmony_ci	unsigned int size;
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	/* put header into skb */
53462306a36Sopenharmony_ci	size = min(length, hdr_space);
53562306a36Sopenharmony_ci	skb->tail += size;
53662306a36Sopenharmony_ci	skb->len += size;
53762306a36Sopenharmony_ci	length -= size;
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci	num_frags = skb_shinfo(skb)->nr_frags;
54062306a36Sopenharmony_ci	for (i = 0; i < num_frags; i++) {
54162306a36Sopenharmony_ci		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci		if (length == 0) {
54462306a36Sopenharmony_ci			/* don't need this page */
54562306a36Sopenharmony_ci			skb_fill_page_desc(toskb, i, skb_frag_page(frag),
54662306a36Sopenharmony_ci					   0, PAGE_SIZE);
54762306a36Sopenharmony_ci			--skb_shinfo(skb)->nr_frags;
54862306a36Sopenharmony_ci		} else {
54962306a36Sopenharmony_ci			size = min_t(unsigned int, length, PAGE_SIZE);
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ci			skb_frag_size_set(frag, size);
55262306a36Sopenharmony_ci			skb->data_len += size;
55362306a36Sopenharmony_ci			skb->truesize += size;
55462306a36Sopenharmony_ci			skb->len += size;
55562306a36Sopenharmony_ci			length -= size;
55662306a36Sopenharmony_ci		}
55762306a36Sopenharmony_ci	}
55862306a36Sopenharmony_ci}
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_civoid ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
56162306a36Sopenharmony_ci{
56262306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
56362306a36Sopenharmony_ci	struct ipoib_cm_rx_buf *rx_ring;
56462306a36Sopenharmony_ci	unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
56562306a36Sopenharmony_ci	struct sk_buff *skb, *newskb;
56662306a36Sopenharmony_ci	struct ipoib_cm_rx *p;
56762306a36Sopenharmony_ci	unsigned long flags;
56862306a36Sopenharmony_ci	u64 mapping[IPOIB_CM_RX_SG];
56962306a36Sopenharmony_ci	int frags;
57062306a36Sopenharmony_ci	int has_srq;
57162306a36Sopenharmony_ci	struct sk_buff *small_skb;
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci	ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
57462306a36Sopenharmony_ci		       wr_id, wc->status);
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	if (unlikely(wr_id >= ipoib_recvq_size)) {
57762306a36Sopenharmony_ci		if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) {
57862306a36Sopenharmony_ci			spin_lock_irqsave(&priv->lock, flags);
57962306a36Sopenharmony_ci			list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
58062306a36Sopenharmony_ci			ipoib_cm_start_rx_drain(priv);
58162306a36Sopenharmony_ci			queue_work(priv->wq, &priv->cm.rx_reap_task);
58262306a36Sopenharmony_ci			spin_unlock_irqrestore(&priv->lock, flags);
58362306a36Sopenharmony_ci		} else
58462306a36Sopenharmony_ci			ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
58562306a36Sopenharmony_ci				   wr_id, ipoib_recvq_size);
58662306a36Sopenharmony_ci		return;
58762306a36Sopenharmony_ci	}
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	p = wc->qp->qp_context;
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci	has_srq = ipoib_cm_has_srq(dev);
59262306a36Sopenharmony_ci	rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring;
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	skb = rx_ring[wr_id].skb;
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	if (unlikely(wc->status != IB_WC_SUCCESS)) {
59762306a36Sopenharmony_ci		ipoib_dbg(priv,
59862306a36Sopenharmony_ci			  "cm recv error (status=%d, wrid=%d vend_err %#x)\n",
59962306a36Sopenharmony_ci			  wc->status, wr_id, wc->vendor_err);
60062306a36Sopenharmony_ci		++dev->stats.rx_dropped;
60162306a36Sopenharmony_ci		if (has_srq)
60262306a36Sopenharmony_ci			goto repost;
60362306a36Sopenharmony_ci		else {
60462306a36Sopenharmony_ci			if (!--p->recv_count) {
60562306a36Sopenharmony_ci				spin_lock_irqsave(&priv->lock, flags);
60662306a36Sopenharmony_ci				list_move(&p->list, &priv->cm.rx_reap_list);
60762306a36Sopenharmony_ci				spin_unlock_irqrestore(&priv->lock, flags);
60862306a36Sopenharmony_ci				queue_work(priv->wq, &priv->cm.rx_reap_task);
60962306a36Sopenharmony_ci			}
61062306a36Sopenharmony_ci			return;
61162306a36Sopenharmony_ci		}
61262306a36Sopenharmony_ci	}
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci	if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) {
61562306a36Sopenharmony_ci		if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
61662306a36Sopenharmony_ci			spin_lock_irqsave(&priv->lock, flags);
61762306a36Sopenharmony_ci			p->jiffies = jiffies;
61862306a36Sopenharmony_ci			/* Move this entry to list head, but do not re-add it
61962306a36Sopenharmony_ci			 * if it has been moved out of list. */
62062306a36Sopenharmony_ci			if (p->state == IPOIB_CM_RX_LIVE)
62162306a36Sopenharmony_ci				list_move(&p->list, &priv->cm.passive_ids);
62262306a36Sopenharmony_ci			spin_unlock_irqrestore(&priv->lock, flags);
62362306a36Sopenharmony_ci		}
62462306a36Sopenharmony_ci	}
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	if (wc->byte_len < IPOIB_CM_COPYBREAK) {
62762306a36Sopenharmony_ci		int dlen = wc->byte_len;
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci		small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE);
63062306a36Sopenharmony_ci		if (small_skb) {
63162306a36Sopenharmony_ci			skb_reserve(small_skb, IPOIB_CM_RX_RESERVE);
63262306a36Sopenharmony_ci			ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
63362306a36Sopenharmony_ci						   dlen, DMA_FROM_DEVICE);
63462306a36Sopenharmony_ci			skb_copy_from_linear_data(skb, small_skb->data, dlen);
63562306a36Sopenharmony_ci			ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0],
63662306a36Sopenharmony_ci						      dlen, DMA_FROM_DEVICE);
63762306a36Sopenharmony_ci			skb_put(small_skb, dlen);
63862306a36Sopenharmony_ci			skb = small_skb;
63962306a36Sopenharmony_ci			goto copied;
64062306a36Sopenharmony_ci		}
64162306a36Sopenharmony_ci	}
64262306a36Sopenharmony_ci
64362306a36Sopenharmony_ci	frags = PAGE_ALIGN(wc->byte_len -
64462306a36Sopenharmony_ci			   min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) /
64562306a36Sopenharmony_ci		PAGE_SIZE;
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci	newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
64862306a36Sopenharmony_ci				       mapping, GFP_ATOMIC);
64962306a36Sopenharmony_ci	if (unlikely(!newskb)) {
65062306a36Sopenharmony_ci		/*
65162306a36Sopenharmony_ci		 * If we can't allocate a new RX buffer, dump
65262306a36Sopenharmony_ci		 * this packet and reuse the old buffer.
65362306a36Sopenharmony_ci		 */
65462306a36Sopenharmony_ci		ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
65562306a36Sopenharmony_ci		++dev->stats.rx_dropped;
65662306a36Sopenharmony_ci		goto repost;
65762306a36Sopenharmony_ci	}
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci	ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
66062306a36Sopenharmony_ci	memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping));
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
66362306a36Sopenharmony_ci		       wc->byte_len, wc->slid);
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_ci	skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_cicopied:
66862306a36Sopenharmony_ci	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
66962306a36Sopenharmony_ci	skb_add_pseudo_hdr(skb);
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci	++dev->stats.rx_packets;
67262306a36Sopenharmony_ci	dev->stats.rx_bytes += skb->len;
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci	skb->dev = dev;
67562306a36Sopenharmony_ci	/* XXX get correct PACKET_ type here */
67662306a36Sopenharmony_ci	skb->pkt_type = PACKET_HOST;
67762306a36Sopenharmony_ci	netif_receive_skb(skb);
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_cirepost:
68062306a36Sopenharmony_ci	if (has_srq) {
68162306a36Sopenharmony_ci		if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id)))
68262306a36Sopenharmony_ci			ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
68362306a36Sopenharmony_ci				   "for buf %d\n", wr_id);
68462306a36Sopenharmony_ci	} else {
68562306a36Sopenharmony_ci		if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p,
68662306a36Sopenharmony_ci							  &priv->cm.rx_wr,
68762306a36Sopenharmony_ci							  priv->cm.rx_sge,
68862306a36Sopenharmony_ci							  wr_id))) {
68962306a36Sopenharmony_ci			--p->recv_count;
69062306a36Sopenharmony_ci			ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
69162306a36Sopenharmony_ci				   "for buf %d\n", wr_id);
69262306a36Sopenharmony_ci		}
69362306a36Sopenharmony_ci	}
69462306a36Sopenharmony_ci}
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_cistatic inline int post_send(struct ipoib_dev_priv *priv,
69762306a36Sopenharmony_ci			    struct ipoib_cm_tx *tx,
69862306a36Sopenharmony_ci			    unsigned int wr_id,
69962306a36Sopenharmony_ci			    struct ipoib_tx_buf *tx_req)
70062306a36Sopenharmony_ci{
70162306a36Sopenharmony_ci	ipoib_build_sge(priv, tx_req);
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci	priv->tx_wr.wr.wr_id	= wr_id | IPOIB_OP_CM;
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	return ib_post_send(tx->qp, &priv->tx_wr.wr, NULL);
70662306a36Sopenharmony_ci}
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_civoid ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
70962306a36Sopenharmony_ci{
71062306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
71162306a36Sopenharmony_ci	struct ipoib_tx_buf *tx_req;
71262306a36Sopenharmony_ci	int rc;
71362306a36Sopenharmony_ci	unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb);
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	if (unlikely(skb->len > tx->mtu)) {
71662306a36Sopenharmony_ci		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
71762306a36Sopenharmony_ci			   skb->len, tx->mtu);
71862306a36Sopenharmony_ci		++dev->stats.tx_dropped;
71962306a36Sopenharmony_ci		++dev->stats.tx_errors;
72062306a36Sopenharmony_ci		ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN);
72162306a36Sopenharmony_ci		return;
72262306a36Sopenharmony_ci	}
72362306a36Sopenharmony_ci	if (skb_shinfo(skb)->nr_frags > usable_sge) {
72462306a36Sopenharmony_ci		if (skb_linearize(skb) < 0) {
72562306a36Sopenharmony_ci			ipoib_warn(priv, "skb could not be linearized\n");
72662306a36Sopenharmony_ci			++dev->stats.tx_dropped;
72762306a36Sopenharmony_ci			++dev->stats.tx_errors;
72862306a36Sopenharmony_ci			dev_kfree_skb_any(skb);
72962306a36Sopenharmony_ci			return;
73062306a36Sopenharmony_ci		}
73162306a36Sopenharmony_ci		/* Does skb_linearize return ok without reducing nr_frags? */
73262306a36Sopenharmony_ci		if (skb_shinfo(skb)->nr_frags > usable_sge) {
73362306a36Sopenharmony_ci			ipoib_warn(priv, "too many frags after skb linearize\n");
73462306a36Sopenharmony_ci			++dev->stats.tx_dropped;
73562306a36Sopenharmony_ci			++dev->stats.tx_errors;
73662306a36Sopenharmony_ci			dev_kfree_skb_any(skb);
73762306a36Sopenharmony_ci			return;
73862306a36Sopenharmony_ci		}
73962306a36Sopenharmony_ci	}
74062306a36Sopenharmony_ci	ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
74162306a36Sopenharmony_ci		       tx->tx_head, skb->len, tx->qp->qp_num);
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci	/*
74462306a36Sopenharmony_ci	 * We put the skb into the tx_ring _before_ we call post_send()
74562306a36Sopenharmony_ci	 * because it's entirely possible that the completion handler will
74662306a36Sopenharmony_ci	 * run before we execute anything after the post_send().  That
74762306a36Sopenharmony_ci	 * means we have to make sure everything is properly recorded and
74862306a36Sopenharmony_ci	 * our state is consistent before we call post_send().
74962306a36Sopenharmony_ci	 */
75062306a36Sopenharmony_ci	tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
75162306a36Sopenharmony_ci	tx_req->skb = skb;
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
75462306a36Sopenharmony_ci		++dev->stats.tx_errors;
75562306a36Sopenharmony_ci		dev_kfree_skb_any(skb);
75662306a36Sopenharmony_ci		return;
75762306a36Sopenharmony_ci	}
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	if ((priv->global_tx_head - priv->global_tx_tail) ==
76062306a36Sopenharmony_ci	    ipoib_sendq_size - 1) {
76162306a36Sopenharmony_ci		ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
76262306a36Sopenharmony_ci			  tx->qp->qp_num);
76362306a36Sopenharmony_ci		netif_stop_queue(dev);
76462306a36Sopenharmony_ci	}
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_ci	skb_orphan(skb);
76762306a36Sopenharmony_ci	skb_dst_drop(skb);
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci	if (netif_queue_stopped(dev)) {
77062306a36Sopenharmony_ci		rc = ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
77162306a36Sopenharmony_ci				      IB_CQ_REPORT_MISSED_EVENTS);
77262306a36Sopenharmony_ci		if (unlikely(rc < 0))
77362306a36Sopenharmony_ci			ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n");
77462306a36Sopenharmony_ci		else if (rc)
77562306a36Sopenharmony_ci			napi_schedule(&priv->send_napi);
77662306a36Sopenharmony_ci	}
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci	rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req);
77962306a36Sopenharmony_ci	if (unlikely(rc)) {
78062306a36Sopenharmony_ci		ipoib_warn(priv, "IPoIB/CM:post_send failed, error %d\n", rc);
78162306a36Sopenharmony_ci		++dev->stats.tx_errors;
78262306a36Sopenharmony_ci		ipoib_dma_unmap_tx(priv, tx_req);
78362306a36Sopenharmony_ci		dev_kfree_skb_any(skb);
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci		if (netif_queue_stopped(dev))
78662306a36Sopenharmony_ci			netif_wake_queue(dev);
78762306a36Sopenharmony_ci	} else {
78862306a36Sopenharmony_ci		netif_trans_update(dev);
78962306a36Sopenharmony_ci		++tx->tx_head;
79062306a36Sopenharmony_ci		++priv->global_tx_head;
79162306a36Sopenharmony_ci	}
79262306a36Sopenharmony_ci}
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_civoid ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
79562306a36Sopenharmony_ci{
79662306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
79762306a36Sopenharmony_ci	struct ipoib_cm_tx *tx = wc->qp->qp_context;
79862306a36Sopenharmony_ci	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM;
79962306a36Sopenharmony_ci	struct ipoib_tx_buf *tx_req;
80062306a36Sopenharmony_ci	unsigned long flags;
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_ci	ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n",
80362306a36Sopenharmony_ci		       wr_id, wc->status);
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci	if (unlikely(wr_id >= ipoib_sendq_size)) {
80662306a36Sopenharmony_ci		ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
80762306a36Sopenharmony_ci			   wr_id, ipoib_sendq_size);
80862306a36Sopenharmony_ci		return;
80962306a36Sopenharmony_ci	}
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	tx_req = &tx->tx_ring[wr_id];
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci	ipoib_dma_unmap_tx(priv, tx_req);
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci	/* FIXME: is this right? Shouldn't we only increment on success? */
81662306a36Sopenharmony_ci	++dev->stats.tx_packets;
81762306a36Sopenharmony_ci	dev->stats.tx_bytes += tx_req->skb->len;
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci	dev_kfree_skb_any(tx_req->skb);
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ci	netif_tx_lock(dev);
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci	++tx->tx_tail;
82462306a36Sopenharmony_ci	++priv->global_tx_tail;
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	if (unlikely(netif_queue_stopped(dev) &&
82762306a36Sopenharmony_ci		     ((priv->global_tx_head - priv->global_tx_tail) <=
82862306a36Sopenharmony_ci		      ipoib_sendq_size >> 1) &&
82962306a36Sopenharmony_ci		     test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
83062306a36Sopenharmony_ci		netif_wake_queue(dev);
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci	if (wc->status != IB_WC_SUCCESS &&
83362306a36Sopenharmony_ci	    wc->status != IB_WC_WR_FLUSH_ERR) {
83462306a36Sopenharmony_ci		struct ipoib_neigh *neigh;
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci		/* IB_WC[_RNR]_RETRY_EXC_ERR error is part of the life cycle,
83762306a36Sopenharmony_ci		 * so don't make waves.
83862306a36Sopenharmony_ci		 */
83962306a36Sopenharmony_ci		if (wc->status == IB_WC_RNR_RETRY_EXC_ERR ||
84062306a36Sopenharmony_ci		    wc->status == IB_WC_RETRY_EXC_ERR)
84162306a36Sopenharmony_ci			ipoib_dbg(priv,
84262306a36Sopenharmony_ci				  "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n",
84362306a36Sopenharmony_ci				   __func__, wc->status, wr_id, wc->vendor_err);
84462306a36Sopenharmony_ci		else
84562306a36Sopenharmony_ci			ipoib_warn(priv,
84662306a36Sopenharmony_ci				    "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n",
84762306a36Sopenharmony_ci				   __func__, wc->status, wr_id, wc->vendor_err);
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
85062306a36Sopenharmony_ci		neigh = tx->neigh;
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci		if (neigh) {
85362306a36Sopenharmony_ci			neigh->cm = NULL;
85462306a36Sopenharmony_ci			ipoib_neigh_free(neigh);
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_ci			tx->neigh = NULL;
85762306a36Sopenharmony_ci		}
85862306a36Sopenharmony_ci
85962306a36Sopenharmony_ci		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
86062306a36Sopenharmony_ci			list_move(&tx->list, &priv->cm.reap_list);
86162306a36Sopenharmony_ci			queue_work(priv->wq, &priv->cm.reap_task);
86262306a36Sopenharmony_ci		}
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_ci		clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
86762306a36Sopenharmony_ci	}
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ci	netif_tx_unlock(dev);
87062306a36Sopenharmony_ci}
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ciint ipoib_cm_dev_open(struct net_device *dev)
87362306a36Sopenharmony_ci{
87462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
87562306a36Sopenharmony_ci	int ret;
87662306a36Sopenharmony_ci
87762306a36Sopenharmony_ci	if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
87862306a36Sopenharmony_ci		return 0;
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
88162306a36Sopenharmony_ci	if (IS_ERR(priv->cm.id)) {
88262306a36Sopenharmony_ci		pr_warn("%s: failed to create CM ID\n", priv->ca->name);
88362306a36Sopenharmony_ci		ret = PTR_ERR(priv->cm.id);
88462306a36Sopenharmony_ci		goto err_cm;
88562306a36Sopenharmony_ci	}
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci	ret = ib_cm_listen(priv->cm.id,
88862306a36Sopenharmony_ci			   cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num));
88962306a36Sopenharmony_ci	if (ret) {
89062306a36Sopenharmony_ci		pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name,
89162306a36Sopenharmony_ci			IPOIB_CM_IETF_ID | priv->qp->qp_num);
89262306a36Sopenharmony_ci		goto err_listen;
89362306a36Sopenharmony_ci	}
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci	return 0;
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_cierr_listen:
89862306a36Sopenharmony_ci	ib_destroy_cm_id(priv->cm.id);
89962306a36Sopenharmony_cierr_cm:
90062306a36Sopenharmony_ci	priv->cm.id = NULL;
90162306a36Sopenharmony_ci	return ret;
90262306a36Sopenharmony_ci}
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_cistatic void ipoib_cm_free_rx_reap_list(struct net_device *dev)
90562306a36Sopenharmony_ci{
90662306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
90762306a36Sopenharmony_ci	struct ipoib_cm_rx *rx, *n;
90862306a36Sopenharmony_ci	LIST_HEAD(list);
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_ci	spin_lock_irq(&priv->lock);
91162306a36Sopenharmony_ci	list_splice_init(&priv->cm.rx_reap_list, &list);
91262306a36Sopenharmony_ci	spin_unlock_irq(&priv->lock);
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	list_for_each_entry_safe(rx, n, &list, list) {
91562306a36Sopenharmony_ci		ib_destroy_cm_id(rx->id);
91662306a36Sopenharmony_ci		ib_destroy_qp(rx->qp);
91762306a36Sopenharmony_ci		if (!ipoib_cm_has_srq(dev)) {
91862306a36Sopenharmony_ci			ipoib_cm_free_rx_ring(priv->dev, rx->rx_ring);
91962306a36Sopenharmony_ci			spin_lock_irq(&priv->lock);
92062306a36Sopenharmony_ci			--priv->cm.nonsrq_conn_qp;
92162306a36Sopenharmony_ci			spin_unlock_irq(&priv->lock);
92262306a36Sopenharmony_ci		}
92362306a36Sopenharmony_ci		kfree(rx);
92462306a36Sopenharmony_ci	}
92562306a36Sopenharmony_ci}
92662306a36Sopenharmony_ci
92762306a36Sopenharmony_civoid ipoib_cm_dev_stop(struct net_device *dev)
92862306a36Sopenharmony_ci{
92962306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
93062306a36Sopenharmony_ci	struct ipoib_cm_rx *p;
93162306a36Sopenharmony_ci	unsigned long begin;
93262306a36Sopenharmony_ci	int ret;
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci	if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
93562306a36Sopenharmony_ci		return;
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci	ib_destroy_cm_id(priv->cm.id);
93862306a36Sopenharmony_ci	priv->cm.id = NULL;
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_ci	spin_lock_irq(&priv->lock);
94162306a36Sopenharmony_ci	while (!list_empty(&priv->cm.passive_ids)) {
94262306a36Sopenharmony_ci		p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
94362306a36Sopenharmony_ci		list_move(&p->list, &priv->cm.rx_error_list);
94462306a36Sopenharmony_ci		p->state = IPOIB_CM_RX_ERROR;
94562306a36Sopenharmony_ci		spin_unlock_irq(&priv->lock);
94662306a36Sopenharmony_ci		ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
94762306a36Sopenharmony_ci		if (ret)
94862306a36Sopenharmony_ci			ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
94962306a36Sopenharmony_ci		spin_lock_irq(&priv->lock);
95062306a36Sopenharmony_ci	}
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	/* Wait for all RX to be drained */
95362306a36Sopenharmony_ci	begin = jiffies;
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ci	while (!list_empty(&priv->cm.rx_error_list) ||
95662306a36Sopenharmony_ci	       !list_empty(&priv->cm.rx_flush_list) ||
95762306a36Sopenharmony_ci	       !list_empty(&priv->cm.rx_drain_list)) {
95862306a36Sopenharmony_ci		if (time_after(jiffies, begin + 5 * HZ)) {
95962306a36Sopenharmony_ci			ipoib_warn(priv, "RX drain timing out\n");
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci			/*
96262306a36Sopenharmony_ci			 * assume the HW is wedged and just free up everything.
96362306a36Sopenharmony_ci			 */
96462306a36Sopenharmony_ci			list_splice_init(&priv->cm.rx_flush_list,
96562306a36Sopenharmony_ci					 &priv->cm.rx_reap_list);
96662306a36Sopenharmony_ci			list_splice_init(&priv->cm.rx_error_list,
96762306a36Sopenharmony_ci					 &priv->cm.rx_reap_list);
96862306a36Sopenharmony_ci			list_splice_init(&priv->cm.rx_drain_list,
96962306a36Sopenharmony_ci					 &priv->cm.rx_reap_list);
97062306a36Sopenharmony_ci			break;
97162306a36Sopenharmony_ci		}
97262306a36Sopenharmony_ci		spin_unlock_irq(&priv->lock);
97362306a36Sopenharmony_ci		usleep_range(1000, 2000);
97462306a36Sopenharmony_ci		ipoib_drain_cq(dev);
97562306a36Sopenharmony_ci		spin_lock_irq(&priv->lock);
97662306a36Sopenharmony_ci	}
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_ci	spin_unlock_irq(&priv->lock);
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci	ipoib_cm_free_rx_reap_list(dev);
98162306a36Sopenharmony_ci
98262306a36Sopenharmony_ci	cancel_delayed_work(&priv->cm.stale_task);
98362306a36Sopenharmony_ci}
98462306a36Sopenharmony_ci
98562306a36Sopenharmony_cistatic int ipoib_cm_rep_handler(struct ib_cm_id *cm_id,
98662306a36Sopenharmony_ci				const struct ib_cm_event *event)
98762306a36Sopenharmony_ci{
98862306a36Sopenharmony_ci	struct ipoib_cm_tx *p = cm_id->context;
98962306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
99062306a36Sopenharmony_ci	struct ipoib_cm_data *data = event->private_data;
99162306a36Sopenharmony_ci	struct sk_buff_head skqueue;
99262306a36Sopenharmony_ci	struct ib_qp_attr qp_attr;
99362306a36Sopenharmony_ci	int qp_attr_mask, ret;
99462306a36Sopenharmony_ci	struct sk_buff *skb;
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci	p->mtu = be32_to_cpu(data->mtu);
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci	if (p->mtu <= IPOIB_ENCAP_LEN) {
99962306a36Sopenharmony_ci		ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n",
100062306a36Sopenharmony_ci			   p->mtu, IPOIB_ENCAP_LEN);
100162306a36Sopenharmony_ci		return -EINVAL;
100262306a36Sopenharmony_ci	}
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci	qp_attr.qp_state = IB_QPS_RTR;
100562306a36Sopenharmony_ci	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
100662306a36Sopenharmony_ci	if (ret) {
100762306a36Sopenharmony_ci		ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
100862306a36Sopenharmony_ci		return ret;
100962306a36Sopenharmony_ci	}
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci	qp_attr.rq_psn = 0 /* FIXME */;
101262306a36Sopenharmony_ci	ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
101362306a36Sopenharmony_ci	if (ret) {
101462306a36Sopenharmony_ci		ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
101562306a36Sopenharmony_ci		return ret;
101662306a36Sopenharmony_ci	}
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci	qp_attr.qp_state = IB_QPS_RTS;
101962306a36Sopenharmony_ci	ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
102062306a36Sopenharmony_ci	if (ret) {
102162306a36Sopenharmony_ci		ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
102262306a36Sopenharmony_ci		return ret;
102362306a36Sopenharmony_ci	}
102462306a36Sopenharmony_ci	ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
102562306a36Sopenharmony_ci	if (ret) {
102662306a36Sopenharmony_ci		ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
102762306a36Sopenharmony_ci		return ret;
102862306a36Sopenharmony_ci	}
102962306a36Sopenharmony_ci
103062306a36Sopenharmony_ci	skb_queue_head_init(&skqueue);
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	netif_tx_lock_bh(p->dev);
103362306a36Sopenharmony_ci	spin_lock_irq(&priv->lock);
103462306a36Sopenharmony_ci	set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
103562306a36Sopenharmony_ci	if (p->neigh)
103662306a36Sopenharmony_ci		while ((skb = __skb_dequeue(&p->neigh->queue)))
103762306a36Sopenharmony_ci			__skb_queue_tail(&skqueue, skb);
103862306a36Sopenharmony_ci	spin_unlock_irq(&priv->lock);
103962306a36Sopenharmony_ci	netif_tx_unlock_bh(p->dev);
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci	while ((skb = __skb_dequeue(&skqueue))) {
104262306a36Sopenharmony_ci		skb->dev = p->dev;
104362306a36Sopenharmony_ci		ret = dev_queue_xmit(skb);
104462306a36Sopenharmony_ci		if (ret)
104562306a36Sopenharmony_ci			ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
104662306a36Sopenharmony_ci				   __func__, ret);
104762306a36Sopenharmony_ci	}
104862306a36Sopenharmony_ci
104962306a36Sopenharmony_ci	ret = ib_send_cm_rtu(cm_id, NULL, 0);
105062306a36Sopenharmony_ci	if (ret) {
105162306a36Sopenharmony_ci		ipoib_warn(priv, "failed to send RTU: %d\n", ret);
105262306a36Sopenharmony_ci		return ret;
105362306a36Sopenharmony_ci	}
105462306a36Sopenharmony_ci	return 0;
105562306a36Sopenharmony_ci}
105662306a36Sopenharmony_ci
105762306a36Sopenharmony_cistatic struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx)
105862306a36Sopenharmony_ci{
105962306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
106062306a36Sopenharmony_ci	struct ib_qp_init_attr attr = {
106162306a36Sopenharmony_ci		.send_cq		= priv->send_cq,
106262306a36Sopenharmony_ci		.recv_cq		= priv->recv_cq,
106362306a36Sopenharmony_ci		.srq			= priv->cm.srq,
106462306a36Sopenharmony_ci		.cap.max_send_wr	= ipoib_sendq_size,
106562306a36Sopenharmony_ci		.cap.max_send_sge	= 1,
106662306a36Sopenharmony_ci		.sq_sig_type		= IB_SIGNAL_ALL_WR,
106762306a36Sopenharmony_ci		.qp_type		= IB_QPT_RC,
106862306a36Sopenharmony_ci		.qp_context		= tx,
106962306a36Sopenharmony_ci		.create_flags		= 0
107062306a36Sopenharmony_ci	};
107162306a36Sopenharmony_ci	struct ib_qp *tx_qp;
107262306a36Sopenharmony_ci
107362306a36Sopenharmony_ci	if (dev->features & NETIF_F_SG)
107462306a36Sopenharmony_ci		attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
107562306a36Sopenharmony_ci					      MAX_SKB_FRAGS + 1);
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_ci	tx_qp = ib_create_qp(priv->pd, &attr);
107862306a36Sopenharmony_ci	tx->max_send_sge = attr.cap.max_send_sge;
107962306a36Sopenharmony_ci	return tx_qp;
108062306a36Sopenharmony_ci}
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_cistatic int ipoib_cm_send_req(struct net_device *dev,
108362306a36Sopenharmony_ci			     struct ib_cm_id *id, struct ib_qp *qp,
108462306a36Sopenharmony_ci			     u32 qpn,
108562306a36Sopenharmony_ci			     struct sa_path_rec *pathrec)
108662306a36Sopenharmony_ci{
108762306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
108862306a36Sopenharmony_ci	struct ipoib_cm_data data = {};
108962306a36Sopenharmony_ci	struct ib_cm_req_param req = {};
109062306a36Sopenharmony_ci
109162306a36Sopenharmony_ci	data.qpn = cpu_to_be32(priv->qp->qp_num);
109262306a36Sopenharmony_ci	data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci	req.primary_path		= pathrec;
109562306a36Sopenharmony_ci	req.alternate_path		= NULL;
109662306a36Sopenharmony_ci	req.service_id			= cpu_to_be64(IPOIB_CM_IETF_ID | qpn);
109762306a36Sopenharmony_ci	req.qp_num			= qp->qp_num;
109862306a36Sopenharmony_ci	req.qp_type			= qp->qp_type;
109962306a36Sopenharmony_ci	req.private_data		= &data;
110062306a36Sopenharmony_ci	req.private_data_len		= sizeof(data);
110162306a36Sopenharmony_ci	req.flow_control		= 0;
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	req.starting_psn		= 0; /* FIXME */
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	/*
110662306a36Sopenharmony_ci	 * Pick some arbitrary defaults here; we could make these
110762306a36Sopenharmony_ci	 * module parameters if anyone cared about setting them.
110862306a36Sopenharmony_ci	 */
110962306a36Sopenharmony_ci	req.responder_resources		= 4;
111062306a36Sopenharmony_ci	req.remote_cm_response_timeout	= 20;
111162306a36Sopenharmony_ci	req.local_cm_response_timeout	= 20;
111262306a36Sopenharmony_ci	req.retry_count			= 0; /* RFC draft warns against retries */
111362306a36Sopenharmony_ci	req.rnr_retry_count		= 0; /* RFC draft warns against retries */
111462306a36Sopenharmony_ci	req.max_cm_retries		= 15;
111562306a36Sopenharmony_ci	req.srq				= ipoib_cm_has_srq(dev);
111662306a36Sopenharmony_ci	return ib_send_cm_req(id, &req);
111762306a36Sopenharmony_ci}
111862306a36Sopenharmony_ci
111962306a36Sopenharmony_cistatic int ipoib_cm_modify_tx_init(struct net_device *dev,
112062306a36Sopenharmony_ci				  struct ib_cm_id *cm_id, struct ib_qp *qp)
112162306a36Sopenharmony_ci{
112262306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
112362306a36Sopenharmony_ci	struct ib_qp_attr qp_attr;
112462306a36Sopenharmony_ci	int qp_attr_mask, ret;
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	qp_attr.pkey_index = priv->pkey_index;
112762306a36Sopenharmony_ci	qp_attr.qp_state = IB_QPS_INIT;
112862306a36Sopenharmony_ci	qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
112962306a36Sopenharmony_ci	qp_attr.port_num = priv->port;
113062306a36Sopenharmony_ci	qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
113362306a36Sopenharmony_ci	if (ret) {
113462306a36Sopenharmony_ci		ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret);
113562306a36Sopenharmony_ci		return ret;
113662306a36Sopenharmony_ci	}
113762306a36Sopenharmony_ci	return 0;
113862306a36Sopenharmony_ci}
113962306a36Sopenharmony_ci
114062306a36Sopenharmony_cistatic int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
114162306a36Sopenharmony_ci			    struct sa_path_rec *pathrec)
114262306a36Sopenharmony_ci{
114362306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
114462306a36Sopenharmony_ci	unsigned int noio_flag;
114562306a36Sopenharmony_ci	int ret;
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci	noio_flag = memalloc_noio_save();
114862306a36Sopenharmony_ci	p->tx_ring = vzalloc(array_size(ipoib_sendq_size, sizeof(*p->tx_ring)));
114962306a36Sopenharmony_ci	if (!p->tx_ring) {
115062306a36Sopenharmony_ci		memalloc_noio_restore(noio_flag);
115162306a36Sopenharmony_ci		ret = -ENOMEM;
115262306a36Sopenharmony_ci		goto err_tx;
115362306a36Sopenharmony_ci	}
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci	p->qp = ipoib_cm_create_tx_qp(p->dev, p);
115662306a36Sopenharmony_ci	memalloc_noio_restore(noio_flag);
115762306a36Sopenharmony_ci	if (IS_ERR(p->qp)) {
115862306a36Sopenharmony_ci		ret = PTR_ERR(p->qp);
115962306a36Sopenharmony_ci		ipoib_warn(priv, "failed to create tx qp: %d\n", ret);
116062306a36Sopenharmony_ci		goto err_qp;
116162306a36Sopenharmony_ci	}
116262306a36Sopenharmony_ci
116362306a36Sopenharmony_ci	p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p);
116462306a36Sopenharmony_ci	if (IS_ERR(p->id)) {
116562306a36Sopenharmony_ci		ret = PTR_ERR(p->id);
116662306a36Sopenharmony_ci		ipoib_warn(priv, "failed to create tx cm id: %d\n", ret);
116762306a36Sopenharmony_ci		goto err_id;
116862306a36Sopenharmony_ci	}
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	ret = ipoib_cm_modify_tx_init(p->dev, p->id,  p->qp);
117162306a36Sopenharmony_ci	if (ret) {
117262306a36Sopenharmony_ci		ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
117362306a36Sopenharmony_ci		goto err_modify_send;
117462306a36Sopenharmony_ci	}
117562306a36Sopenharmony_ci
117662306a36Sopenharmony_ci	ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec);
117762306a36Sopenharmony_ci	if (ret) {
117862306a36Sopenharmony_ci		ipoib_warn(priv, "failed to send cm req: %d\n", ret);
117962306a36Sopenharmony_ci		goto err_modify_send;
118062306a36Sopenharmony_ci	}
118162306a36Sopenharmony_ci
118262306a36Sopenharmony_ci	ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n",
118362306a36Sopenharmony_ci		  p->qp->qp_num, pathrec->dgid.raw, qpn);
118462306a36Sopenharmony_ci
118562306a36Sopenharmony_ci	return 0;
118662306a36Sopenharmony_ci
118762306a36Sopenharmony_cierr_modify_send:
118862306a36Sopenharmony_ci	ib_destroy_cm_id(p->id);
118962306a36Sopenharmony_cierr_id:
119062306a36Sopenharmony_ci	p->id = NULL;
119162306a36Sopenharmony_ci	ib_destroy_qp(p->qp);
119262306a36Sopenharmony_cierr_qp:
119362306a36Sopenharmony_ci	p->qp = NULL;
119462306a36Sopenharmony_ci	vfree(p->tx_ring);
119562306a36Sopenharmony_cierr_tx:
119662306a36Sopenharmony_ci	return ret;
119762306a36Sopenharmony_ci}
119862306a36Sopenharmony_ci
119962306a36Sopenharmony_cistatic void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
120062306a36Sopenharmony_ci{
120162306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
120262306a36Sopenharmony_ci	struct ipoib_tx_buf *tx_req;
120362306a36Sopenharmony_ci	unsigned long begin;
120462306a36Sopenharmony_ci
120562306a36Sopenharmony_ci	ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
120662306a36Sopenharmony_ci		  p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
120762306a36Sopenharmony_ci
120862306a36Sopenharmony_ci	if (p->id)
120962306a36Sopenharmony_ci		ib_destroy_cm_id(p->id);
121062306a36Sopenharmony_ci
121162306a36Sopenharmony_ci	if (p->tx_ring) {
121262306a36Sopenharmony_ci		/* Wait for all sends to complete */
121362306a36Sopenharmony_ci		begin = jiffies;
121462306a36Sopenharmony_ci		while ((int) p->tx_tail - (int) p->tx_head < 0) {
121562306a36Sopenharmony_ci			if (time_after(jiffies, begin + 5 * HZ)) {
121662306a36Sopenharmony_ci				ipoib_warn(priv, "timing out; %d sends not completed\n",
121762306a36Sopenharmony_ci					   p->tx_head - p->tx_tail);
121862306a36Sopenharmony_ci				goto timeout;
121962306a36Sopenharmony_ci			}
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_ci			usleep_range(1000, 2000);
122262306a36Sopenharmony_ci		}
122362306a36Sopenharmony_ci	}
122462306a36Sopenharmony_ci
122562306a36Sopenharmony_citimeout:
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci	while ((int) p->tx_tail - (int) p->tx_head < 0) {
122862306a36Sopenharmony_ci		tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
122962306a36Sopenharmony_ci		ipoib_dma_unmap_tx(priv, tx_req);
123062306a36Sopenharmony_ci		dev_kfree_skb_any(tx_req->skb);
123162306a36Sopenharmony_ci		netif_tx_lock_bh(p->dev);
123262306a36Sopenharmony_ci		++p->tx_tail;
123362306a36Sopenharmony_ci		++priv->global_tx_tail;
123462306a36Sopenharmony_ci		if (unlikely((priv->global_tx_head - priv->global_tx_tail) <=
123562306a36Sopenharmony_ci			     ipoib_sendq_size >> 1) &&
123662306a36Sopenharmony_ci		    netif_queue_stopped(p->dev) &&
123762306a36Sopenharmony_ci		    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
123862306a36Sopenharmony_ci			netif_wake_queue(p->dev);
123962306a36Sopenharmony_ci		netif_tx_unlock_bh(p->dev);
124062306a36Sopenharmony_ci	}
124162306a36Sopenharmony_ci
124262306a36Sopenharmony_ci	if (p->qp)
124362306a36Sopenharmony_ci		ib_destroy_qp(p->qp);
124462306a36Sopenharmony_ci
124562306a36Sopenharmony_ci	vfree(p->tx_ring);
124662306a36Sopenharmony_ci	kfree(p);
124762306a36Sopenharmony_ci}
124862306a36Sopenharmony_ci
124962306a36Sopenharmony_cistatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
125062306a36Sopenharmony_ci			       const struct ib_cm_event *event)
125162306a36Sopenharmony_ci{
125262306a36Sopenharmony_ci	struct ipoib_cm_tx *tx = cm_id->context;
125362306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
125462306a36Sopenharmony_ci	struct net_device *dev = priv->dev;
125562306a36Sopenharmony_ci	struct ipoib_neigh *neigh;
125662306a36Sopenharmony_ci	unsigned long flags;
125762306a36Sopenharmony_ci	int ret;
125862306a36Sopenharmony_ci
125962306a36Sopenharmony_ci	switch (event->event) {
126062306a36Sopenharmony_ci	case IB_CM_DREQ_RECEIVED:
126162306a36Sopenharmony_ci		ipoib_dbg(priv, "DREQ received.\n");
126262306a36Sopenharmony_ci		ib_send_cm_drep(cm_id, NULL, 0);
126362306a36Sopenharmony_ci		break;
126462306a36Sopenharmony_ci	case IB_CM_REP_RECEIVED:
126562306a36Sopenharmony_ci		ipoib_dbg(priv, "REP received.\n");
126662306a36Sopenharmony_ci		ret = ipoib_cm_rep_handler(cm_id, event);
126762306a36Sopenharmony_ci		if (ret)
126862306a36Sopenharmony_ci			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
126962306a36Sopenharmony_ci				       NULL, 0, NULL, 0);
127062306a36Sopenharmony_ci		break;
127162306a36Sopenharmony_ci	case IB_CM_REQ_ERROR:
127262306a36Sopenharmony_ci	case IB_CM_REJ_RECEIVED:
127362306a36Sopenharmony_ci	case IB_CM_TIMEWAIT_EXIT:
127462306a36Sopenharmony_ci		ipoib_dbg(priv, "CM error %d.\n", event->event);
127562306a36Sopenharmony_ci		netif_tx_lock_bh(dev);
127662306a36Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
127762306a36Sopenharmony_ci		neigh = tx->neigh;
127862306a36Sopenharmony_ci
127962306a36Sopenharmony_ci		if (neigh) {
128062306a36Sopenharmony_ci			neigh->cm = NULL;
128162306a36Sopenharmony_ci			ipoib_neigh_free(neigh);
128262306a36Sopenharmony_ci
128362306a36Sopenharmony_ci			tx->neigh = NULL;
128462306a36Sopenharmony_ci		}
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_ci		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
128762306a36Sopenharmony_ci			list_move(&tx->list, &priv->cm.reap_list);
128862306a36Sopenharmony_ci			queue_work(priv->wq, &priv->cm.reap_task);
128962306a36Sopenharmony_ci		}
129062306a36Sopenharmony_ci
129162306a36Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
129262306a36Sopenharmony_ci		netif_tx_unlock_bh(dev);
129362306a36Sopenharmony_ci		break;
129462306a36Sopenharmony_ci	default:
129562306a36Sopenharmony_ci		break;
129662306a36Sopenharmony_ci	}
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_ci	return 0;
129962306a36Sopenharmony_ci}
130062306a36Sopenharmony_ci
130162306a36Sopenharmony_cistruct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
130262306a36Sopenharmony_ci				       struct ipoib_neigh *neigh)
130362306a36Sopenharmony_ci{
130462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
130562306a36Sopenharmony_ci	struct ipoib_cm_tx *tx;
130662306a36Sopenharmony_ci
130762306a36Sopenharmony_ci	tx = kzalloc(sizeof(*tx), GFP_ATOMIC);
130862306a36Sopenharmony_ci	if (!tx)
130962306a36Sopenharmony_ci		return NULL;
131062306a36Sopenharmony_ci
131162306a36Sopenharmony_ci	neigh->cm = tx;
131262306a36Sopenharmony_ci	tx->neigh = neigh;
131362306a36Sopenharmony_ci	tx->dev = dev;
131462306a36Sopenharmony_ci	list_add(&tx->list, &priv->cm.start_list);
131562306a36Sopenharmony_ci	set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
131662306a36Sopenharmony_ci	queue_work(priv->wq, &priv->cm.start_task);
131762306a36Sopenharmony_ci	return tx;
131862306a36Sopenharmony_ci}
131962306a36Sopenharmony_ci
132062306a36Sopenharmony_civoid ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
132162306a36Sopenharmony_ci{
132262306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
132362306a36Sopenharmony_ci	unsigned long flags;
132462306a36Sopenharmony_ci	if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
132562306a36Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
132662306a36Sopenharmony_ci		list_move(&tx->list, &priv->cm.reap_list);
132762306a36Sopenharmony_ci		queue_work(priv->wq, &priv->cm.reap_task);
132862306a36Sopenharmony_ci		ipoib_dbg(priv, "Reap connection for gid %pI6\n",
132962306a36Sopenharmony_ci			  tx->neigh->daddr + 4);
133062306a36Sopenharmony_ci		tx->neigh = NULL;
133162306a36Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
133262306a36Sopenharmony_ci	}
133362306a36Sopenharmony_ci}
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci#define QPN_AND_OPTIONS_OFFSET	4
133662306a36Sopenharmony_ci
133762306a36Sopenharmony_cistatic void ipoib_cm_tx_start(struct work_struct *work)
133862306a36Sopenharmony_ci{
133962306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
134062306a36Sopenharmony_ci						   cm.start_task);
134162306a36Sopenharmony_ci	struct net_device *dev = priv->dev;
134262306a36Sopenharmony_ci	struct ipoib_neigh *neigh;
134362306a36Sopenharmony_ci	struct ipoib_cm_tx *p;
134462306a36Sopenharmony_ci	unsigned long flags;
134562306a36Sopenharmony_ci	struct ipoib_path *path;
134662306a36Sopenharmony_ci	int ret;
134762306a36Sopenharmony_ci
134862306a36Sopenharmony_ci	struct sa_path_rec pathrec;
134962306a36Sopenharmony_ci	u32 qpn;
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ci	netif_tx_lock_bh(dev);
135262306a36Sopenharmony_ci	spin_lock_irqsave(&priv->lock, flags);
135362306a36Sopenharmony_ci
135462306a36Sopenharmony_ci	while (!list_empty(&priv->cm.start_list)) {
135562306a36Sopenharmony_ci		p = list_entry(priv->cm.start_list.next, typeof(*p), list);
135662306a36Sopenharmony_ci		list_del_init(&p->list);
135762306a36Sopenharmony_ci		neigh = p->neigh;
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci		qpn = IPOIB_QPN(neigh->daddr);
136062306a36Sopenharmony_ci		/*
136162306a36Sopenharmony_ci		 * As long as the search is with these 2 locks,
136262306a36Sopenharmony_ci		 * path existence indicates its validity.
136362306a36Sopenharmony_ci		 */
136462306a36Sopenharmony_ci		path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET);
136562306a36Sopenharmony_ci		if (!path) {
136662306a36Sopenharmony_ci			pr_info("%s ignore not valid path %pI6\n",
136762306a36Sopenharmony_ci				__func__,
136862306a36Sopenharmony_ci				neigh->daddr + QPN_AND_OPTIONS_OFFSET);
136962306a36Sopenharmony_ci			goto free_neigh;
137062306a36Sopenharmony_ci		}
137162306a36Sopenharmony_ci		memcpy(&pathrec, &path->pathrec, sizeof(pathrec));
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
137462306a36Sopenharmony_ci		netif_tx_unlock_bh(dev);
137562306a36Sopenharmony_ci
137662306a36Sopenharmony_ci		ret = ipoib_cm_tx_init(p, qpn, &pathrec);
137762306a36Sopenharmony_ci
137862306a36Sopenharmony_ci		netif_tx_lock_bh(dev);
137962306a36Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
138062306a36Sopenharmony_ci
138162306a36Sopenharmony_ci		if (ret) {
138262306a36Sopenharmony_cifree_neigh:
138362306a36Sopenharmony_ci			neigh = p->neigh;
138462306a36Sopenharmony_ci			if (neigh) {
138562306a36Sopenharmony_ci				neigh->cm = NULL;
138662306a36Sopenharmony_ci				ipoib_neigh_free(neigh);
138762306a36Sopenharmony_ci			}
138862306a36Sopenharmony_ci			list_del(&p->list);
138962306a36Sopenharmony_ci			kfree(p);
139062306a36Sopenharmony_ci		}
139162306a36Sopenharmony_ci	}
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_ci	spin_unlock_irqrestore(&priv->lock, flags);
139462306a36Sopenharmony_ci	netif_tx_unlock_bh(dev);
139562306a36Sopenharmony_ci}
139662306a36Sopenharmony_ci
139762306a36Sopenharmony_cistatic void ipoib_cm_tx_reap(struct work_struct *work)
139862306a36Sopenharmony_ci{
139962306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
140062306a36Sopenharmony_ci						   cm.reap_task);
140162306a36Sopenharmony_ci	struct net_device *dev = priv->dev;
140262306a36Sopenharmony_ci	struct ipoib_cm_tx *p;
140362306a36Sopenharmony_ci	unsigned long flags;
140462306a36Sopenharmony_ci
140562306a36Sopenharmony_ci	netif_tx_lock_bh(dev);
140662306a36Sopenharmony_ci	spin_lock_irqsave(&priv->lock, flags);
140762306a36Sopenharmony_ci
140862306a36Sopenharmony_ci	while (!list_empty(&priv->cm.reap_list)) {
140962306a36Sopenharmony_ci		p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
141062306a36Sopenharmony_ci		list_del_init(&p->list);
141162306a36Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
141262306a36Sopenharmony_ci		netif_tx_unlock_bh(dev);
141362306a36Sopenharmony_ci		ipoib_cm_tx_destroy(p);
141462306a36Sopenharmony_ci		netif_tx_lock_bh(dev);
141562306a36Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
141662306a36Sopenharmony_ci	}
141762306a36Sopenharmony_ci
141862306a36Sopenharmony_ci	spin_unlock_irqrestore(&priv->lock, flags);
141962306a36Sopenharmony_ci	netif_tx_unlock_bh(dev);
142062306a36Sopenharmony_ci}
142162306a36Sopenharmony_ci
142262306a36Sopenharmony_cistatic void ipoib_cm_skb_reap(struct work_struct *work)
142362306a36Sopenharmony_ci{
142462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
142562306a36Sopenharmony_ci						   cm.skb_task);
142662306a36Sopenharmony_ci	struct net_device *dev = priv->dev;
142762306a36Sopenharmony_ci	struct sk_buff *skb;
142862306a36Sopenharmony_ci	unsigned long flags;
142962306a36Sopenharmony_ci	unsigned int mtu = priv->mcast_mtu;
143062306a36Sopenharmony_ci
143162306a36Sopenharmony_ci	netif_tx_lock_bh(dev);
143262306a36Sopenharmony_ci	spin_lock_irqsave(&priv->lock, flags);
143362306a36Sopenharmony_ci
143462306a36Sopenharmony_ci	while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
143562306a36Sopenharmony_ci		spin_unlock_irqrestore(&priv->lock, flags);
143662306a36Sopenharmony_ci		netif_tx_unlock_bh(dev);
143762306a36Sopenharmony_ci
143862306a36Sopenharmony_ci		if (skb->protocol == htons(ETH_P_IP)) {
143962306a36Sopenharmony_ci			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
144062306a36Sopenharmony_ci			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
144162306a36Sopenharmony_ci		}
144262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
144362306a36Sopenharmony_ci		else if (skb->protocol == htons(ETH_P_IPV6)) {
144462306a36Sopenharmony_ci			memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
144562306a36Sopenharmony_ci			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
144662306a36Sopenharmony_ci		}
144762306a36Sopenharmony_ci#endif
144862306a36Sopenharmony_ci		dev_kfree_skb_any(skb);
144962306a36Sopenharmony_ci
145062306a36Sopenharmony_ci		netif_tx_lock_bh(dev);
145162306a36Sopenharmony_ci		spin_lock_irqsave(&priv->lock, flags);
145262306a36Sopenharmony_ci	}
145362306a36Sopenharmony_ci
145462306a36Sopenharmony_ci	spin_unlock_irqrestore(&priv->lock, flags);
145562306a36Sopenharmony_ci	netif_tx_unlock_bh(dev);
145662306a36Sopenharmony_ci}
145762306a36Sopenharmony_ci
145862306a36Sopenharmony_civoid ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
145962306a36Sopenharmony_ci			   unsigned int mtu)
146062306a36Sopenharmony_ci{
146162306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
146262306a36Sopenharmony_ci	int e = skb_queue_empty(&priv->cm.skb_queue);
146362306a36Sopenharmony_ci
146462306a36Sopenharmony_ci	skb_dst_update_pmtu(skb, mtu);
146562306a36Sopenharmony_ci
146662306a36Sopenharmony_ci	skb_queue_tail(&priv->cm.skb_queue, skb);
146762306a36Sopenharmony_ci	if (e)
146862306a36Sopenharmony_ci		queue_work(priv->wq, &priv->cm.skb_task);
146962306a36Sopenharmony_ci}
147062306a36Sopenharmony_ci
147162306a36Sopenharmony_cistatic void ipoib_cm_rx_reap(struct work_struct *work)
147262306a36Sopenharmony_ci{
147362306a36Sopenharmony_ci	ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv,
147462306a36Sopenharmony_ci						cm.rx_reap_task)->dev);
147562306a36Sopenharmony_ci}
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_cistatic void ipoib_cm_stale_task(struct work_struct *work)
147862306a36Sopenharmony_ci{
147962306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
148062306a36Sopenharmony_ci						   cm.stale_task.work);
148162306a36Sopenharmony_ci	struct ipoib_cm_rx *p;
148262306a36Sopenharmony_ci	int ret;
148362306a36Sopenharmony_ci
148462306a36Sopenharmony_ci	spin_lock_irq(&priv->lock);
148562306a36Sopenharmony_ci	while (!list_empty(&priv->cm.passive_ids)) {
148662306a36Sopenharmony_ci		/* List is sorted by LRU, start from tail,
148762306a36Sopenharmony_ci		 * stop when we see a recently used entry */
148862306a36Sopenharmony_ci		p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
148962306a36Sopenharmony_ci		if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
149062306a36Sopenharmony_ci			break;
149162306a36Sopenharmony_ci		list_move(&p->list, &priv->cm.rx_error_list);
149262306a36Sopenharmony_ci		p->state = IPOIB_CM_RX_ERROR;
149362306a36Sopenharmony_ci		spin_unlock_irq(&priv->lock);
149462306a36Sopenharmony_ci		ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
149562306a36Sopenharmony_ci		if (ret)
149662306a36Sopenharmony_ci			ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
149762306a36Sopenharmony_ci		spin_lock_irq(&priv->lock);
149862306a36Sopenharmony_ci	}
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci	if (!list_empty(&priv->cm.passive_ids))
150162306a36Sopenharmony_ci		queue_delayed_work(priv->wq,
150262306a36Sopenharmony_ci				   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
150362306a36Sopenharmony_ci	spin_unlock_irq(&priv->lock);
150462306a36Sopenharmony_ci}
150562306a36Sopenharmony_ci
150662306a36Sopenharmony_cistatic ssize_t mode_show(struct device *d, struct device_attribute *attr,
150762306a36Sopenharmony_ci			 char *buf)
150862306a36Sopenharmony_ci{
150962306a36Sopenharmony_ci	struct net_device *dev = to_net_dev(d);
151062306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
151162306a36Sopenharmony_ci
151262306a36Sopenharmony_ci	if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
151362306a36Sopenharmony_ci		return sysfs_emit(buf, "connected\n");
151462306a36Sopenharmony_ci	else
151562306a36Sopenharmony_ci		return sysfs_emit(buf, "datagram\n");
151662306a36Sopenharmony_ci}
151762306a36Sopenharmony_ci
151862306a36Sopenharmony_cistatic ssize_t mode_store(struct device *d, struct device_attribute *attr,
151962306a36Sopenharmony_ci			  const char *buf, size_t count)
152062306a36Sopenharmony_ci{
152162306a36Sopenharmony_ci	struct net_device *dev = to_net_dev(d);
152262306a36Sopenharmony_ci	int ret;
152362306a36Sopenharmony_ci
152462306a36Sopenharmony_ci	if (!rtnl_trylock()) {
152562306a36Sopenharmony_ci		return restart_syscall();
152662306a36Sopenharmony_ci	}
152762306a36Sopenharmony_ci
152862306a36Sopenharmony_ci	if (dev->reg_state != NETREG_REGISTERED) {
152962306a36Sopenharmony_ci		rtnl_unlock();
153062306a36Sopenharmony_ci		return -EPERM;
153162306a36Sopenharmony_ci	}
153262306a36Sopenharmony_ci
153362306a36Sopenharmony_ci	ret = ipoib_set_mode(dev, buf);
153462306a36Sopenharmony_ci
153562306a36Sopenharmony_ci	/* The assumption is that the function ipoib_set_mode returned
153662306a36Sopenharmony_ci	 * with the rtnl held by it, if not the value -EBUSY returned,
153762306a36Sopenharmony_ci	 * then no need to rtnl_unlock
153862306a36Sopenharmony_ci	 */
153962306a36Sopenharmony_ci	if (ret != -EBUSY)
154062306a36Sopenharmony_ci		rtnl_unlock();
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_ci	return (!ret || ret == -EBUSY) ? count : ret;
154362306a36Sopenharmony_ci}
154462306a36Sopenharmony_ci
154562306a36Sopenharmony_cistatic DEVICE_ATTR_RW(mode);
154662306a36Sopenharmony_ci
154762306a36Sopenharmony_ciint ipoib_cm_add_mode_attr(struct net_device *dev)
154862306a36Sopenharmony_ci{
154962306a36Sopenharmony_ci	return device_create_file(&dev->dev, &dev_attr_mode);
155062306a36Sopenharmony_ci}
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_cistatic void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
155362306a36Sopenharmony_ci{
155462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
155562306a36Sopenharmony_ci	struct ib_srq_init_attr srq_init_attr = {
155662306a36Sopenharmony_ci		.srq_type = IB_SRQT_BASIC,
155762306a36Sopenharmony_ci		.attr = {
155862306a36Sopenharmony_ci			.max_wr  = ipoib_recvq_size,
155962306a36Sopenharmony_ci			.max_sge = max_sge
156062306a36Sopenharmony_ci		}
156162306a36Sopenharmony_ci	};
156262306a36Sopenharmony_ci
156362306a36Sopenharmony_ci	priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
156462306a36Sopenharmony_ci	if (IS_ERR(priv->cm.srq)) {
156562306a36Sopenharmony_ci		if (PTR_ERR(priv->cm.srq) != -EOPNOTSUPP)
156662306a36Sopenharmony_ci			pr_warn("%s: failed to allocate SRQ, error %ld\n",
156762306a36Sopenharmony_ci			       priv->ca->name, PTR_ERR(priv->cm.srq));
156862306a36Sopenharmony_ci		priv->cm.srq = NULL;
156962306a36Sopenharmony_ci		return;
157062306a36Sopenharmony_ci	}
157162306a36Sopenharmony_ci
157262306a36Sopenharmony_ci	priv->cm.srq_ring = vzalloc(array_size(ipoib_recvq_size,
157362306a36Sopenharmony_ci					       sizeof(*priv->cm.srq_ring)));
157462306a36Sopenharmony_ci	if (!priv->cm.srq_ring) {
157562306a36Sopenharmony_ci		ib_destroy_srq(priv->cm.srq);
157662306a36Sopenharmony_ci		priv->cm.srq = NULL;
157762306a36Sopenharmony_ci		return;
157862306a36Sopenharmony_ci	}
157962306a36Sopenharmony_ci
158062306a36Sopenharmony_ci}
158162306a36Sopenharmony_ci
158262306a36Sopenharmony_ciint ipoib_cm_dev_init(struct net_device *dev)
158362306a36Sopenharmony_ci{
158462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
158562306a36Sopenharmony_ci	int max_srq_sge, i;
158662306a36Sopenharmony_ci	u8 addr;
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.passive_ids);
158962306a36Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.reap_list);
159062306a36Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.start_list);
159162306a36Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.rx_error_list);
159262306a36Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.rx_flush_list);
159362306a36Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.rx_drain_list);
159462306a36Sopenharmony_ci	INIT_LIST_HEAD(&priv->cm.rx_reap_list);
159562306a36Sopenharmony_ci	INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
159662306a36Sopenharmony_ci	INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
159762306a36Sopenharmony_ci	INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
159862306a36Sopenharmony_ci	INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap);
159962306a36Sopenharmony_ci	INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
160062306a36Sopenharmony_ci
160162306a36Sopenharmony_ci	skb_queue_head_init(&priv->cm.skb_queue);
160262306a36Sopenharmony_ci
160362306a36Sopenharmony_ci	ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
160462306a36Sopenharmony_ci
160562306a36Sopenharmony_ci	max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
160662306a36Sopenharmony_ci	ipoib_cm_create_srq(dev, max_srq_sge);
160762306a36Sopenharmony_ci	if (ipoib_cm_has_srq(dev)) {
160862306a36Sopenharmony_ci		priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
160962306a36Sopenharmony_ci		priv->cm.num_frags  = max_srq_sge;
161062306a36Sopenharmony_ci		ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
161162306a36Sopenharmony_ci			  priv->cm.max_cm_mtu, priv->cm.num_frags);
161262306a36Sopenharmony_ci	} else {
161362306a36Sopenharmony_ci		priv->cm.max_cm_mtu = IPOIB_CM_MTU;
161462306a36Sopenharmony_ci		priv->cm.num_frags  = IPOIB_CM_RX_SG;
161562306a36Sopenharmony_ci	}
161662306a36Sopenharmony_ci
161762306a36Sopenharmony_ci	ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci	if (ipoib_cm_has_srq(dev)) {
162062306a36Sopenharmony_ci		for (i = 0; i < ipoib_recvq_size; ++i) {
162162306a36Sopenharmony_ci			if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
162262306a36Sopenharmony_ci						   priv->cm.num_frags - 1,
162362306a36Sopenharmony_ci						   priv->cm.srq_ring[i].mapping,
162462306a36Sopenharmony_ci						   GFP_KERNEL)) {
162562306a36Sopenharmony_ci				ipoib_warn(priv, "failed to allocate "
162662306a36Sopenharmony_ci					   "receive buffer %d\n", i);
162762306a36Sopenharmony_ci				ipoib_cm_dev_cleanup(dev);
162862306a36Sopenharmony_ci				return -ENOMEM;
162962306a36Sopenharmony_ci			}
163062306a36Sopenharmony_ci
163162306a36Sopenharmony_ci			if (ipoib_cm_post_receive_srq(dev, i)) {
163262306a36Sopenharmony_ci				ipoib_warn(priv, "ipoib_cm_post_receive_srq "
163362306a36Sopenharmony_ci					   "failed for buf %d\n", i);
163462306a36Sopenharmony_ci				ipoib_cm_dev_cleanup(dev);
163562306a36Sopenharmony_ci				return -EIO;
163662306a36Sopenharmony_ci			}
163762306a36Sopenharmony_ci		}
163862306a36Sopenharmony_ci	}
163962306a36Sopenharmony_ci
164062306a36Sopenharmony_ci	addr = IPOIB_FLAGS_RC;
164162306a36Sopenharmony_ci	dev_addr_mod(dev, 0, &addr, 1);
164262306a36Sopenharmony_ci	return 0;
164362306a36Sopenharmony_ci}
164462306a36Sopenharmony_ci
164562306a36Sopenharmony_civoid ipoib_cm_dev_cleanup(struct net_device *dev)
164662306a36Sopenharmony_ci{
164762306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
164862306a36Sopenharmony_ci
164962306a36Sopenharmony_ci	if (!priv->cm.srq)
165062306a36Sopenharmony_ci		return;
165162306a36Sopenharmony_ci
165262306a36Sopenharmony_ci	ipoib_dbg(priv, "Cleanup ipoib connected mode.\n");
165362306a36Sopenharmony_ci
165462306a36Sopenharmony_ci	ib_destroy_srq(priv->cm.srq);
165562306a36Sopenharmony_ci	priv->cm.srq = NULL;
165662306a36Sopenharmony_ci	if (!priv->cm.srq_ring)
165762306a36Sopenharmony_ci		return;
165862306a36Sopenharmony_ci
165962306a36Sopenharmony_ci	ipoib_cm_free_rx_ring(dev, priv->cm.srq_ring);
166062306a36Sopenharmony_ci	priv->cm.srq_ring = NULL;
166162306a36Sopenharmony_ci}
1662