162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
362306a36Sopenharmony_ci * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
462306a36Sopenharmony_ci * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
562306a36Sopenharmony_ci * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * This software is available to you under a choice of one of two
862306a36Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
962306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
1062306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the
1162306a36Sopenharmony_ci * OpenIB.org BSD license below:
1262306a36Sopenharmony_ci *
1362306a36Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
1462306a36Sopenharmony_ci *     without modification, are permitted provided that the following
1562306a36Sopenharmony_ci *     conditions are met:
1662306a36Sopenharmony_ci *
1762306a36Sopenharmony_ci *      - Redistributions of source code must retain the above
1862306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
1962306a36Sopenharmony_ci *        disclaimer.
2062306a36Sopenharmony_ci *
2162306a36Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
2262306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
2362306a36Sopenharmony_ci *        disclaimer in the documentation and/or other materials
2462306a36Sopenharmony_ci *        provided with the distribution.
2562306a36Sopenharmony_ci *
2662306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
2762306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2862306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
2962306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
3062306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
3162306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
3262306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3362306a36Sopenharmony_ci * SOFTWARE.
3462306a36Sopenharmony_ci */
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci#include <linux/delay.h>
3762306a36Sopenharmony_ci#include <linux/moduleparam.h>
3862306a36Sopenharmony_ci#include <linux/dma-mapping.h>
3962306a36Sopenharmony_ci#include <linux/slab.h>
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci#include <linux/ip.h>
4262306a36Sopenharmony_ci#include <linux/tcp.h>
4362306a36Sopenharmony_ci#include <rdma/ib_cache.h>
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci#include "ipoib.h"
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
4862306a36Sopenharmony_cistatic int data_debug_level;
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_cimodule_param(data_debug_level, int, 0644);
5162306a36Sopenharmony_ciMODULE_PARM_DESC(data_debug_level,
5262306a36Sopenharmony_ci		 "Enable data path debug tracing if > 0");
5362306a36Sopenharmony_ci#endif
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cistruct ipoib_ah *ipoib_create_ah(struct net_device *dev,
5662306a36Sopenharmony_ci				 struct ib_pd *pd, struct rdma_ah_attr *attr)
5762306a36Sopenharmony_ci{
5862306a36Sopenharmony_ci	struct ipoib_ah *ah;
5962306a36Sopenharmony_ci	struct ib_ah *vah;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	ah = kmalloc(sizeof(*ah), GFP_KERNEL);
6262306a36Sopenharmony_ci	if (!ah)
6362306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	ah->dev       = dev;
6662306a36Sopenharmony_ci	ah->last_send = 0;
6762306a36Sopenharmony_ci	kref_init(&ah->ref);
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	vah = rdma_create_ah(pd, attr, RDMA_CREATE_AH_SLEEPABLE);
7062306a36Sopenharmony_ci	if (IS_ERR(vah)) {
7162306a36Sopenharmony_ci		kfree(ah);
7262306a36Sopenharmony_ci		ah = (struct ipoib_ah *)vah;
7362306a36Sopenharmony_ci	} else {
7462306a36Sopenharmony_ci		ah->ah = vah;
7562306a36Sopenharmony_ci		ipoib_dbg(ipoib_priv(dev), "Created ah %p\n", ah->ah);
7662306a36Sopenharmony_ci	}
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	return ah;
7962306a36Sopenharmony_ci}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_civoid ipoib_free_ah(struct kref *kref)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci	struct ipoib_ah *ah = container_of(kref, struct ipoib_ah, ref);
8462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(ah->dev);
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	unsigned long flags;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	spin_lock_irqsave(&priv->lock, flags);
8962306a36Sopenharmony_ci	list_add_tail(&ah->list, &priv->dead_ahs);
9062306a36Sopenharmony_ci	spin_unlock_irqrestore(&priv->lock, flags);
9162306a36Sopenharmony_ci}
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_cistatic void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
9462306a36Sopenharmony_ci				  u64 mapping[IPOIB_UD_RX_SG])
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	ib_dma_unmap_single(priv->ca, mapping[0],
9762306a36Sopenharmony_ci			    IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
9862306a36Sopenharmony_ci			    DMA_FROM_DEVICE);
9962306a36Sopenharmony_ci}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_cistatic int ipoib_ib_post_receive(struct net_device *dev, int id)
10262306a36Sopenharmony_ci{
10362306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
10462306a36Sopenharmony_ci	int ret;
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	priv->rx_wr.wr_id   = id | IPOIB_OP_RECV;
10762306a36Sopenharmony_ci	priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0];
10862306a36Sopenharmony_ci	priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	ret = ib_post_recv(priv->qp, &priv->rx_wr, NULL);
11262306a36Sopenharmony_ci	if (unlikely(ret)) {
11362306a36Sopenharmony_ci		ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
11462306a36Sopenharmony_ci		ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
11562306a36Sopenharmony_ci		dev_kfree_skb_any(priv->rx_ring[id].skb);
11662306a36Sopenharmony_ci		priv->rx_ring[id].skb = NULL;
11762306a36Sopenharmony_ci	}
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	return ret;
12062306a36Sopenharmony_ci}
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_cistatic struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
12362306a36Sopenharmony_ci{
12462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
12562306a36Sopenharmony_ci	struct sk_buff *skb;
12662306a36Sopenharmony_ci	int buf_size;
12762306a36Sopenharmony_ci	u64 *mapping;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	skb = dev_alloc_skb(buf_size + IPOIB_HARD_LEN);
13262306a36Sopenharmony_ci	if (unlikely(!skb))
13362306a36Sopenharmony_ci		return NULL;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	/*
13662306a36Sopenharmony_ci	 * the IP header will be at IPOIP_HARD_LEN + IB_GRH_BYTES, that is
13762306a36Sopenharmony_ci	 * 64 bytes aligned
13862306a36Sopenharmony_ci	 */
13962306a36Sopenharmony_ci	skb_reserve(skb, sizeof(struct ipoib_pseudo_header));
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci	mapping = priv->rx_ring[id].mapping;
14262306a36Sopenharmony_ci	mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
14362306a36Sopenharmony_ci				       DMA_FROM_DEVICE);
14462306a36Sopenharmony_ci	if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
14562306a36Sopenharmony_ci		goto error;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	priv->rx_ring[id].skb = skb;
14862306a36Sopenharmony_ci	return skb;
14962306a36Sopenharmony_cierror:
15062306a36Sopenharmony_ci	dev_kfree_skb_any(skb);
15162306a36Sopenharmony_ci	return NULL;
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_cistatic int ipoib_ib_post_receives(struct net_device *dev)
15562306a36Sopenharmony_ci{
15662306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
15762306a36Sopenharmony_ci	int i;
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	for (i = 0; i < ipoib_recvq_size; ++i) {
16062306a36Sopenharmony_ci		if (!ipoib_alloc_rx_skb(dev, i)) {
16162306a36Sopenharmony_ci			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
16262306a36Sopenharmony_ci			return -ENOMEM;
16362306a36Sopenharmony_ci		}
16462306a36Sopenharmony_ci		if (ipoib_ib_post_receive(dev, i)) {
16562306a36Sopenharmony_ci			ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
16662306a36Sopenharmony_ci			return -EIO;
16762306a36Sopenharmony_ci		}
16862306a36Sopenharmony_ci	}
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	return 0;
17162306a36Sopenharmony_ci}
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_cistatic void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
17462306a36Sopenharmony_ci{
17562306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
17662306a36Sopenharmony_ci	unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
17762306a36Sopenharmony_ci	struct sk_buff *skb;
17862306a36Sopenharmony_ci	u64 mapping[IPOIB_UD_RX_SG];
17962306a36Sopenharmony_ci	union ib_gid *dgid;
18062306a36Sopenharmony_ci	union ib_gid *sgid;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
18362306a36Sopenharmony_ci		       wr_id, wc->status);
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	if (unlikely(wr_id >= ipoib_recvq_size)) {
18662306a36Sopenharmony_ci		ipoib_warn(priv, "recv completion event with wrid %d (> %d)\n",
18762306a36Sopenharmony_ci			   wr_id, ipoib_recvq_size);
18862306a36Sopenharmony_ci		return;
18962306a36Sopenharmony_ci	}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	skb  = priv->rx_ring[wr_id].skb;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	if (unlikely(wc->status != IB_WC_SUCCESS)) {
19462306a36Sopenharmony_ci		if (wc->status != IB_WC_WR_FLUSH_ERR)
19562306a36Sopenharmony_ci			ipoib_warn(priv,
19662306a36Sopenharmony_ci				   "failed recv event (status=%d, wrid=%d vend_err %#x)\n",
19762306a36Sopenharmony_ci				   wc->status, wr_id, wc->vendor_err);
19862306a36Sopenharmony_ci		ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
19962306a36Sopenharmony_ci		dev_kfree_skb_any(skb);
20062306a36Sopenharmony_ci		priv->rx_ring[wr_id].skb = NULL;
20162306a36Sopenharmony_ci		return;
20262306a36Sopenharmony_ci	}
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	memcpy(mapping, priv->rx_ring[wr_id].mapping,
20562306a36Sopenharmony_ci	       IPOIB_UD_RX_SG * sizeof(*mapping));
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	/*
20862306a36Sopenharmony_ci	 * If we can't allocate a new RX buffer, dump
20962306a36Sopenharmony_ci	 * this packet and reuse the old buffer.
21062306a36Sopenharmony_ci	 */
21162306a36Sopenharmony_ci	if (unlikely(!ipoib_alloc_rx_skb(dev, wr_id))) {
21262306a36Sopenharmony_ci		++dev->stats.rx_dropped;
21362306a36Sopenharmony_ci		goto repost;
21462306a36Sopenharmony_ci	}
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
21762306a36Sopenharmony_ci		       wc->byte_len, wc->slid);
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	ipoib_ud_dma_unmap_rx(priv, mapping);
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	skb_put(skb, wc->byte_len);
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	/* First byte of dgid signals multicast when 0xff */
22462306a36Sopenharmony_ci	dgid = &((struct ib_grh *)skb->data)->dgid;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	if (!(wc->wc_flags & IB_WC_GRH) || dgid->raw[0] != 0xff)
22762306a36Sopenharmony_ci		skb->pkt_type = PACKET_HOST;
22862306a36Sopenharmony_ci	else if (memcmp(dgid, dev->broadcast + 4, sizeof(union ib_gid)) == 0)
22962306a36Sopenharmony_ci		skb->pkt_type = PACKET_BROADCAST;
23062306a36Sopenharmony_ci	else
23162306a36Sopenharmony_ci		skb->pkt_type = PACKET_MULTICAST;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	sgid = &((struct ib_grh *)skb->data)->sgid;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	/*
23662306a36Sopenharmony_ci	 * Drop packets that this interface sent, ie multicast packets
23762306a36Sopenharmony_ci	 * that the HCA has replicated.
23862306a36Sopenharmony_ci	 */
23962306a36Sopenharmony_ci	if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num) {
24062306a36Sopenharmony_ci		int need_repost = 1;
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci		if ((wc->wc_flags & IB_WC_GRH) &&
24362306a36Sopenharmony_ci		    sgid->global.interface_id != priv->local_gid.global.interface_id)
24462306a36Sopenharmony_ci			need_repost = 0;
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci		if (need_repost) {
24762306a36Sopenharmony_ci			dev_kfree_skb_any(skb);
24862306a36Sopenharmony_ci			goto repost;
24962306a36Sopenharmony_ci		}
25062306a36Sopenharmony_ci	}
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	skb_pull(skb, IB_GRH_BYTES);
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
25562306a36Sopenharmony_ci	skb_add_pseudo_hdr(skb);
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci	++dev->stats.rx_packets;
25862306a36Sopenharmony_ci	dev->stats.rx_bytes += skb->len;
25962306a36Sopenharmony_ci	if (skb->pkt_type == PACKET_MULTICAST)
26062306a36Sopenharmony_ci		dev->stats.multicast++;
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	skb->dev = dev;
26362306a36Sopenharmony_ci	if ((dev->features & NETIF_F_RXCSUM) &&
26462306a36Sopenharmony_ci			likely(wc->wc_flags & IB_WC_IP_CSUM_OK))
26562306a36Sopenharmony_ci		skb->ip_summed = CHECKSUM_UNNECESSARY;
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	napi_gro_receive(&priv->recv_napi, skb);
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_cirepost:
27062306a36Sopenharmony_ci	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
27162306a36Sopenharmony_ci		ipoib_warn(priv, "ipoib_ib_post_receive failed "
27262306a36Sopenharmony_ci			   "for buf %d\n", wr_id);
27362306a36Sopenharmony_ci}
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ciint ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req)
27662306a36Sopenharmony_ci{
27762306a36Sopenharmony_ci	struct sk_buff *skb = tx_req->skb;
27862306a36Sopenharmony_ci	u64 *mapping = tx_req->mapping;
27962306a36Sopenharmony_ci	int i;
28062306a36Sopenharmony_ci	int off;
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci	if (skb_headlen(skb)) {
28362306a36Sopenharmony_ci		mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
28462306a36Sopenharmony_ci					       DMA_TO_DEVICE);
28562306a36Sopenharmony_ci		if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
28662306a36Sopenharmony_ci			return -EIO;
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci		off = 1;
28962306a36Sopenharmony_ci	} else
29062306a36Sopenharmony_ci		off = 0;
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
29362306a36Sopenharmony_ci		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
29462306a36Sopenharmony_ci		mapping[i + off] = ib_dma_map_page(ca,
29562306a36Sopenharmony_ci						 skb_frag_page(frag),
29662306a36Sopenharmony_ci						 skb_frag_off(frag),
29762306a36Sopenharmony_ci						 skb_frag_size(frag),
29862306a36Sopenharmony_ci						 DMA_TO_DEVICE);
29962306a36Sopenharmony_ci		if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
30062306a36Sopenharmony_ci			goto partial_error;
30162306a36Sopenharmony_ci	}
30262306a36Sopenharmony_ci	return 0;
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_cipartial_error:
30562306a36Sopenharmony_ci	for (; i > 0; --i) {
30662306a36Sopenharmony_ci		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci		ib_dma_unmap_page(ca, mapping[i - !off], skb_frag_size(frag), DMA_TO_DEVICE);
30962306a36Sopenharmony_ci	}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	if (off)
31262306a36Sopenharmony_ci		ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci	return -EIO;
31562306a36Sopenharmony_ci}
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_civoid ipoib_dma_unmap_tx(struct ipoib_dev_priv *priv,
31862306a36Sopenharmony_ci			struct ipoib_tx_buf *tx_req)
31962306a36Sopenharmony_ci{
32062306a36Sopenharmony_ci	struct sk_buff *skb = tx_req->skb;
32162306a36Sopenharmony_ci	u64 *mapping = tx_req->mapping;
32262306a36Sopenharmony_ci	int i;
32362306a36Sopenharmony_ci	int off;
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	if (skb_headlen(skb)) {
32662306a36Sopenharmony_ci		ib_dma_unmap_single(priv->ca, mapping[0], skb_headlen(skb),
32762306a36Sopenharmony_ci				    DMA_TO_DEVICE);
32862306a36Sopenharmony_ci		off = 1;
32962306a36Sopenharmony_ci	} else
33062306a36Sopenharmony_ci		off = 0;
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
33362306a36Sopenharmony_ci		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci		ib_dma_unmap_page(priv->ca, mapping[i + off],
33662306a36Sopenharmony_ci				  skb_frag_size(frag), DMA_TO_DEVICE);
33762306a36Sopenharmony_ci	}
33862306a36Sopenharmony_ci}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci/*
34162306a36Sopenharmony_ci * As the result of a completion error the QP Can be transferred to SQE states.
34262306a36Sopenharmony_ci * The function checks if the (send)QP is in SQE state and
34362306a36Sopenharmony_ci * moves it back to RTS state, that in order to have it functional again.
34462306a36Sopenharmony_ci */
34562306a36Sopenharmony_cistatic void ipoib_qp_state_validate_work(struct work_struct *work)
34662306a36Sopenharmony_ci{
34762306a36Sopenharmony_ci	struct ipoib_qp_state_validate *qp_work =
34862306a36Sopenharmony_ci		container_of(work, struct ipoib_qp_state_validate, work);
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = qp_work->priv;
35162306a36Sopenharmony_ci	struct ib_qp_attr qp_attr;
35262306a36Sopenharmony_ci	struct ib_qp_init_attr query_init_attr;
35362306a36Sopenharmony_ci	int ret;
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci	ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
35662306a36Sopenharmony_ci	if (ret) {
35762306a36Sopenharmony_ci		ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
35862306a36Sopenharmony_ci			   __func__, ret);
35962306a36Sopenharmony_ci		goto free_res;
36062306a36Sopenharmony_ci	}
36162306a36Sopenharmony_ci	pr_info("%s: QP: 0x%x is in state: %d\n",
36262306a36Sopenharmony_ci		__func__, priv->qp->qp_num, qp_attr.qp_state);
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	/* currently support only in SQE->RTS transition*/
36562306a36Sopenharmony_ci	if (qp_attr.qp_state == IB_QPS_SQE) {
36662306a36Sopenharmony_ci		qp_attr.qp_state = IB_QPS_RTS;
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci		ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
36962306a36Sopenharmony_ci		if (ret) {
37062306a36Sopenharmony_ci			pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
37162306a36Sopenharmony_ci				ret, priv->qp->qp_num);
37262306a36Sopenharmony_ci			goto free_res;
37362306a36Sopenharmony_ci		}
37462306a36Sopenharmony_ci		pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
37562306a36Sopenharmony_ci			__func__, priv->qp->qp_num);
37662306a36Sopenharmony_ci	} else {
37762306a36Sopenharmony_ci		pr_warn("QP (%d) will stay in state: %d\n",
37862306a36Sopenharmony_ci			priv->qp->qp_num, qp_attr.qp_state);
37962306a36Sopenharmony_ci	}
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_cifree_res:
38262306a36Sopenharmony_ci	kfree(qp_work);
38362306a36Sopenharmony_ci}
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_cistatic void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
38662306a36Sopenharmony_ci{
38762306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
38862306a36Sopenharmony_ci	unsigned int wr_id = wc->wr_id;
38962306a36Sopenharmony_ci	struct ipoib_tx_buf *tx_req;
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
39262306a36Sopenharmony_ci		       wr_id, wc->status);
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	if (unlikely(wr_id >= ipoib_sendq_size)) {
39562306a36Sopenharmony_ci		ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
39662306a36Sopenharmony_ci			   wr_id, ipoib_sendq_size);
39762306a36Sopenharmony_ci		return;
39862306a36Sopenharmony_ci	}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	tx_req = &priv->tx_ring[wr_id];
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	ipoib_dma_unmap_tx(priv, tx_req);
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	++dev->stats.tx_packets;
40562306a36Sopenharmony_ci	dev->stats.tx_bytes += tx_req->skb->len;
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	dev_kfree_skb_any(tx_req->skb);
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	++priv->tx_tail;
41062306a36Sopenharmony_ci	++priv->global_tx_tail;
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	if (unlikely(netif_queue_stopped(dev) &&
41362306a36Sopenharmony_ci		     ((priv->global_tx_head - priv->global_tx_tail) <=
41462306a36Sopenharmony_ci		      ipoib_sendq_size >> 1) &&
41562306a36Sopenharmony_ci		     test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
41662306a36Sopenharmony_ci		netif_wake_queue(dev);
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	if (wc->status != IB_WC_SUCCESS &&
41962306a36Sopenharmony_ci	    wc->status != IB_WC_WR_FLUSH_ERR) {
42062306a36Sopenharmony_ci		struct ipoib_qp_state_validate *qp_work;
42162306a36Sopenharmony_ci		ipoib_warn(priv,
42262306a36Sopenharmony_ci			   "failed send event (status=%d, wrid=%d vend_err %#x)\n",
42362306a36Sopenharmony_ci			   wc->status, wr_id, wc->vendor_err);
42462306a36Sopenharmony_ci		qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
42562306a36Sopenharmony_ci		if (!qp_work)
42662306a36Sopenharmony_ci			return;
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci		INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
42962306a36Sopenharmony_ci		qp_work->priv = priv;
43062306a36Sopenharmony_ci		queue_work(priv->wq, &qp_work->work);
43162306a36Sopenharmony_ci	}
43262306a36Sopenharmony_ci}
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_cistatic int poll_tx(struct ipoib_dev_priv *priv)
43562306a36Sopenharmony_ci{
43662306a36Sopenharmony_ci	int n, i;
43762306a36Sopenharmony_ci	struct ib_wc *wc;
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
44062306a36Sopenharmony_ci	for (i = 0; i < n; ++i) {
44162306a36Sopenharmony_ci		wc = priv->send_wc + i;
44262306a36Sopenharmony_ci		if (wc->wr_id & IPOIB_OP_CM)
44362306a36Sopenharmony_ci			ipoib_cm_handle_tx_wc(priv->dev, priv->send_wc + i);
44462306a36Sopenharmony_ci		else
44562306a36Sopenharmony_ci			ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
44662306a36Sopenharmony_ci	}
44762306a36Sopenharmony_ci	return n == MAX_SEND_CQE;
44862306a36Sopenharmony_ci}
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ciint ipoib_rx_poll(struct napi_struct *napi, int budget)
45162306a36Sopenharmony_ci{
45262306a36Sopenharmony_ci	struct ipoib_dev_priv *priv =
45362306a36Sopenharmony_ci		container_of(napi, struct ipoib_dev_priv, recv_napi);
45462306a36Sopenharmony_ci	struct net_device *dev = priv->dev;
45562306a36Sopenharmony_ci	int done;
45662306a36Sopenharmony_ci	int t;
45762306a36Sopenharmony_ci	int n, i;
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci	done  = 0;
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_cipoll_more:
46262306a36Sopenharmony_ci	while (done < budget) {
46362306a36Sopenharmony_ci		int max = (budget - done);
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci		t = min(IPOIB_NUM_WC, max);
46662306a36Sopenharmony_ci		n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci		for (i = 0; i < n; i++) {
46962306a36Sopenharmony_ci			struct ib_wc *wc = priv->ibwc + i;
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci			if (wc->wr_id & IPOIB_OP_RECV) {
47262306a36Sopenharmony_ci				++done;
47362306a36Sopenharmony_ci				if (wc->wr_id & IPOIB_OP_CM)
47462306a36Sopenharmony_ci					ipoib_cm_handle_rx_wc(dev, wc);
47562306a36Sopenharmony_ci				else
47662306a36Sopenharmony_ci					ipoib_ib_handle_rx_wc(dev, wc);
47762306a36Sopenharmony_ci			} else {
47862306a36Sopenharmony_ci				pr_warn("%s: Got unexpected wqe id\n", __func__);
47962306a36Sopenharmony_ci			}
48062306a36Sopenharmony_ci		}
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci		if (n != t)
48362306a36Sopenharmony_ci			break;
48462306a36Sopenharmony_ci	}
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci	if (done < budget) {
48762306a36Sopenharmony_ci		napi_complete(napi);
48862306a36Sopenharmony_ci		if (unlikely(ib_req_notify_cq(priv->recv_cq,
48962306a36Sopenharmony_ci					      IB_CQ_NEXT_COMP |
49062306a36Sopenharmony_ci					      IB_CQ_REPORT_MISSED_EVENTS)) &&
49162306a36Sopenharmony_ci		    napi_reschedule(napi))
49262306a36Sopenharmony_ci			goto poll_more;
49362306a36Sopenharmony_ci	}
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci	return done;
49662306a36Sopenharmony_ci}
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ciint ipoib_tx_poll(struct napi_struct *napi, int budget)
49962306a36Sopenharmony_ci{
50062306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv,
50162306a36Sopenharmony_ci						   send_napi);
50262306a36Sopenharmony_ci	struct net_device *dev = priv->dev;
50362306a36Sopenharmony_ci	int n, i;
50462306a36Sopenharmony_ci	struct ib_wc *wc;
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_cipoll_more:
50762306a36Sopenharmony_ci	n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	for (i = 0; i < n; i++) {
51062306a36Sopenharmony_ci		wc = priv->send_wc + i;
51162306a36Sopenharmony_ci		if (wc->wr_id & IPOIB_OP_CM)
51262306a36Sopenharmony_ci			ipoib_cm_handle_tx_wc(dev, wc);
51362306a36Sopenharmony_ci		else
51462306a36Sopenharmony_ci			ipoib_ib_handle_tx_wc(dev, wc);
51562306a36Sopenharmony_ci	}
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	if (n < budget) {
51862306a36Sopenharmony_ci		napi_complete(napi);
51962306a36Sopenharmony_ci		if (unlikely(ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
52062306a36Sopenharmony_ci					      IB_CQ_REPORT_MISSED_EVENTS)) &&
52162306a36Sopenharmony_ci		    napi_reschedule(napi))
52262306a36Sopenharmony_ci			goto poll_more;
52362306a36Sopenharmony_ci	}
52462306a36Sopenharmony_ci	return n < 0 ? 0 : n;
52562306a36Sopenharmony_ci}
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_civoid ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr)
52862306a36Sopenharmony_ci{
52962306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ctx_ptr;
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	napi_schedule(&priv->recv_napi);
53262306a36Sopenharmony_ci}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_civoid ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr)
53562306a36Sopenharmony_ci{
53662306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ctx_ptr;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	napi_schedule(&priv->send_napi);
53962306a36Sopenharmony_ci}
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_cistatic inline int post_send(struct ipoib_dev_priv *priv,
54262306a36Sopenharmony_ci			    unsigned int wr_id,
54362306a36Sopenharmony_ci			    struct ib_ah *address, u32 dqpn,
54462306a36Sopenharmony_ci			    struct ipoib_tx_buf *tx_req,
54562306a36Sopenharmony_ci			    void *head, int hlen)
54662306a36Sopenharmony_ci{
54762306a36Sopenharmony_ci	struct sk_buff *skb = tx_req->skb;
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	ipoib_build_sge(priv, tx_req);
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ci	priv->tx_wr.wr.wr_id	= wr_id;
55262306a36Sopenharmony_ci	priv->tx_wr.remote_qpn	= dqpn;
55362306a36Sopenharmony_ci	priv->tx_wr.ah		= address;
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci	if (head) {
55662306a36Sopenharmony_ci		priv->tx_wr.mss		= skb_shinfo(skb)->gso_size;
55762306a36Sopenharmony_ci		priv->tx_wr.header	= head;
55862306a36Sopenharmony_ci		priv->tx_wr.hlen	= hlen;
55962306a36Sopenharmony_ci		priv->tx_wr.wr.opcode	= IB_WR_LSO;
56062306a36Sopenharmony_ci	} else
56162306a36Sopenharmony_ci		priv->tx_wr.wr.opcode	= IB_WR_SEND;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	return ib_post_send(priv->qp, &priv->tx_wr.wr, NULL);
56462306a36Sopenharmony_ci}
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ciint ipoib_send(struct net_device *dev, struct sk_buff *skb,
56762306a36Sopenharmony_ci	       struct ib_ah *address, u32 dqpn)
56862306a36Sopenharmony_ci{
56962306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
57062306a36Sopenharmony_ci	struct ipoib_tx_buf *tx_req;
57162306a36Sopenharmony_ci	int hlen, rc;
57262306a36Sopenharmony_ci	void *phead;
57362306a36Sopenharmony_ci	unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb);
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci	if (skb_is_gso(skb)) {
57662306a36Sopenharmony_ci		hlen = skb_tcp_all_headers(skb);
57762306a36Sopenharmony_ci		phead = skb->data;
57862306a36Sopenharmony_ci		if (unlikely(!skb_pull(skb, hlen))) {
57962306a36Sopenharmony_ci			ipoib_warn(priv, "linear data too small\n");
58062306a36Sopenharmony_ci			++dev->stats.tx_dropped;
58162306a36Sopenharmony_ci			++dev->stats.tx_errors;
58262306a36Sopenharmony_ci			dev_kfree_skb_any(skb);
58362306a36Sopenharmony_ci			return -1;
58462306a36Sopenharmony_ci		}
58562306a36Sopenharmony_ci	} else {
58662306a36Sopenharmony_ci		if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
58762306a36Sopenharmony_ci			ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
58862306a36Sopenharmony_ci				   skb->len, priv->mcast_mtu + IPOIB_ENCAP_LEN);
58962306a36Sopenharmony_ci			++dev->stats.tx_dropped;
59062306a36Sopenharmony_ci			++dev->stats.tx_errors;
59162306a36Sopenharmony_ci			ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
59262306a36Sopenharmony_ci			return -1;
59362306a36Sopenharmony_ci		}
59462306a36Sopenharmony_ci		phead = NULL;
59562306a36Sopenharmony_ci		hlen  = 0;
59662306a36Sopenharmony_ci	}
59762306a36Sopenharmony_ci	if (skb_shinfo(skb)->nr_frags > usable_sge) {
59862306a36Sopenharmony_ci		if (skb_linearize(skb) < 0) {
59962306a36Sopenharmony_ci			ipoib_warn(priv, "skb could not be linearized\n");
60062306a36Sopenharmony_ci			++dev->stats.tx_dropped;
60162306a36Sopenharmony_ci			++dev->stats.tx_errors;
60262306a36Sopenharmony_ci			dev_kfree_skb_any(skb);
60362306a36Sopenharmony_ci			return -1;
60462306a36Sopenharmony_ci		}
60562306a36Sopenharmony_ci		/* Does skb_linearize return ok without reducing nr_frags? */
60662306a36Sopenharmony_ci		if (skb_shinfo(skb)->nr_frags > usable_sge) {
60762306a36Sopenharmony_ci			ipoib_warn(priv, "too many frags after skb linearize\n");
60862306a36Sopenharmony_ci			++dev->stats.tx_dropped;
60962306a36Sopenharmony_ci			++dev->stats.tx_errors;
61062306a36Sopenharmony_ci			dev_kfree_skb_any(skb);
61162306a36Sopenharmony_ci			return -1;
61262306a36Sopenharmony_ci		}
61362306a36Sopenharmony_ci	}
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	ipoib_dbg_data(priv,
61662306a36Sopenharmony_ci		       "sending packet, length=%d address=%p dqpn=0x%06x\n",
61762306a36Sopenharmony_ci		       skb->len, address, dqpn);
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	/*
62062306a36Sopenharmony_ci	 * We put the skb into the tx_ring _before_ we call post_send()
62162306a36Sopenharmony_ci	 * because it's entirely possible that the completion handler will
62262306a36Sopenharmony_ci	 * run before we execute anything after the post_send().  That
62362306a36Sopenharmony_ci	 * means we have to make sure everything is properly recorded and
62462306a36Sopenharmony_ci	 * our state is consistent before we call post_send().
62562306a36Sopenharmony_ci	 */
62662306a36Sopenharmony_ci	tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
62762306a36Sopenharmony_ci	tx_req->skb = skb;
62862306a36Sopenharmony_ci	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
62962306a36Sopenharmony_ci		++dev->stats.tx_errors;
63062306a36Sopenharmony_ci		dev_kfree_skb_any(skb);
63162306a36Sopenharmony_ci		return -1;
63262306a36Sopenharmony_ci	}
63362306a36Sopenharmony_ci
63462306a36Sopenharmony_ci	if (skb->ip_summed == CHECKSUM_PARTIAL)
63562306a36Sopenharmony_ci		priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM;
63662306a36Sopenharmony_ci	else
63762306a36Sopenharmony_ci		priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
63862306a36Sopenharmony_ci	/* increase the tx_head after send success, but use it for queue state */
63962306a36Sopenharmony_ci	if ((priv->global_tx_head - priv->global_tx_tail) ==
64062306a36Sopenharmony_ci	    ipoib_sendq_size - 1) {
64162306a36Sopenharmony_ci		ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
64262306a36Sopenharmony_ci		netif_stop_queue(dev);
64362306a36Sopenharmony_ci	}
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	skb_orphan(skb);
64662306a36Sopenharmony_ci	skb_dst_drop(skb);
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	if (netif_queue_stopped(dev))
64962306a36Sopenharmony_ci		if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
65062306a36Sopenharmony_ci				     IB_CQ_REPORT_MISSED_EVENTS) < 0)
65162306a36Sopenharmony_ci			ipoib_warn(priv, "request notify on send CQ failed\n");
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
65462306a36Sopenharmony_ci		       address, dqpn, tx_req, phead, hlen);
65562306a36Sopenharmony_ci	if (unlikely(rc)) {
65662306a36Sopenharmony_ci		ipoib_warn(priv, "post_send failed, error %d\n", rc);
65762306a36Sopenharmony_ci		++dev->stats.tx_errors;
65862306a36Sopenharmony_ci		ipoib_dma_unmap_tx(priv, tx_req);
65962306a36Sopenharmony_ci		dev_kfree_skb_any(skb);
66062306a36Sopenharmony_ci		if (netif_queue_stopped(dev))
66162306a36Sopenharmony_ci			netif_wake_queue(dev);
66262306a36Sopenharmony_ci		rc = 0;
66362306a36Sopenharmony_ci	} else {
66462306a36Sopenharmony_ci		netif_trans_update(dev);
66562306a36Sopenharmony_ci
66662306a36Sopenharmony_ci		rc = priv->tx_head;
66762306a36Sopenharmony_ci		++priv->tx_head;
66862306a36Sopenharmony_ci		++priv->global_tx_head;
66962306a36Sopenharmony_ci	}
67062306a36Sopenharmony_ci	return rc;
67162306a36Sopenharmony_ci}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_cistatic void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv)
67462306a36Sopenharmony_ci{
67562306a36Sopenharmony_ci	struct ipoib_ah *ah, *tah;
67662306a36Sopenharmony_ci	unsigned long flags;
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	netif_tx_lock_bh(priv->dev);
67962306a36Sopenharmony_ci	spin_lock_irqsave(&priv->lock, flags);
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
68262306a36Sopenharmony_ci		if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
68362306a36Sopenharmony_ci			list_del(&ah->list);
68462306a36Sopenharmony_ci			rdma_destroy_ah(ah->ah, 0);
68562306a36Sopenharmony_ci			kfree(ah);
68662306a36Sopenharmony_ci		}
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci	spin_unlock_irqrestore(&priv->lock, flags);
68962306a36Sopenharmony_ci	netif_tx_unlock_bh(priv->dev);
69062306a36Sopenharmony_ci}
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_civoid ipoib_reap_ah(struct work_struct *work)
69362306a36Sopenharmony_ci{
69462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv =
69562306a36Sopenharmony_ci		container_of(work, struct ipoib_dev_priv, ah_reap_task.work);
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	ipoib_reap_dead_ahs(priv);
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
70062306a36Sopenharmony_ci		queue_delayed_work(priv->wq, &priv->ah_reap_task,
70162306a36Sopenharmony_ci				   round_jiffies_relative(HZ));
70262306a36Sopenharmony_ci}
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_cistatic void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv)
70562306a36Sopenharmony_ci{
70662306a36Sopenharmony_ci	clear_bit(IPOIB_STOP_REAPER, &priv->flags);
70762306a36Sopenharmony_ci	queue_delayed_work(priv->wq, &priv->ah_reap_task,
70862306a36Sopenharmony_ci			   round_jiffies_relative(HZ));
70962306a36Sopenharmony_ci}
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_cistatic void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv)
71262306a36Sopenharmony_ci{
71362306a36Sopenharmony_ci	set_bit(IPOIB_STOP_REAPER, &priv->flags);
71462306a36Sopenharmony_ci	cancel_delayed_work(&priv->ah_reap_task);
71562306a36Sopenharmony_ci	/*
71662306a36Sopenharmony_ci	 * After ipoib_stop_ah_reaper() we always go through
71762306a36Sopenharmony_ci	 * ipoib_reap_dead_ahs() which ensures the work is really stopped and
71862306a36Sopenharmony_ci	 * does a final flush out of the dead_ah's list
71962306a36Sopenharmony_ci	 */
72062306a36Sopenharmony_ci}
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_cistatic int recvs_pending(struct net_device *dev)
72362306a36Sopenharmony_ci{
72462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
72562306a36Sopenharmony_ci	int pending = 0;
72662306a36Sopenharmony_ci	int i;
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci	for (i = 0; i < ipoib_recvq_size; ++i)
72962306a36Sopenharmony_ci		if (priv->rx_ring[i].skb)
73062306a36Sopenharmony_ci			++pending;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	return pending;
73362306a36Sopenharmony_ci}
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_cistatic void check_qp_movement_and_print(struct ipoib_dev_priv *priv,
73662306a36Sopenharmony_ci					struct ib_qp *qp,
73762306a36Sopenharmony_ci					enum ib_qp_state new_state)
73862306a36Sopenharmony_ci{
73962306a36Sopenharmony_ci	struct ib_qp_attr qp_attr;
74062306a36Sopenharmony_ci	struct ib_qp_init_attr query_init_attr;
74162306a36Sopenharmony_ci	int ret;
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci	ret = ib_query_qp(qp, &qp_attr, IB_QP_STATE, &query_init_attr);
74462306a36Sopenharmony_ci	if (ret) {
74562306a36Sopenharmony_ci		ipoib_warn(priv, "%s: Failed to query QP\n", __func__);
74662306a36Sopenharmony_ci		return;
74762306a36Sopenharmony_ci	}
74862306a36Sopenharmony_ci	/* print according to the new-state and the previous state.*/
74962306a36Sopenharmony_ci	if (new_state == IB_QPS_ERR && qp_attr.qp_state == IB_QPS_RESET)
75062306a36Sopenharmony_ci		ipoib_dbg(priv, "Failed modify QP, IB_QPS_RESET to IB_QPS_ERR, acceptable\n");
75162306a36Sopenharmony_ci	else
75262306a36Sopenharmony_ci		ipoib_warn(priv, "Failed to modify QP to state: %d from state: %d\n",
75362306a36Sopenharmony_ci			   new_state, qp_attr.qp_state);
75462306a36Sopenharmony_ci}
75562306a36Sopenharmony_ci
75662306a36Sopenharmony_cistatic void ipoib_napi_enable(struct net_device *dev)
75762306a36Sopenharmony_ci{
75862306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci	napi_enable(&priv->recv_napi);
76162306a36Sopenharmony_ci	napi_enable(&priv->send_napi);
76262306a36Sopenharmony_ci}
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_cistatic void ipoib_napi_disable(struct net_device *dev)
76562306a36Sopenharmony_ci{
76662306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_ci	napi_disable(&priv->recv_napi);
76962306a36Sopenharmony_ci	napi_disable(&priv->send_napi);
77062306a36Sopenharmony_ci}
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ciint ipoib_ib_dev_stop_default(struct net_device *dev)
77362306a36Sopenharmony_ci{
77462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
77562306a36Sopenharmony_ci	struct ib_qp_attr qp_attr;
77662306a36Sopenharmony_ci	unsigned long begin;
77762306a36Sopenharmony_ci	struct ipoib_tx_buf *tx_req;
77862306a36Sopenharmony_ci	int i;
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci	if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
78162306a36Sopenharmony_ci		ipoib_napi_disable(dev);
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci	ipoib_cm_dev_stop(dev);
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	/*
78662306a36Sopenharmony_ci	 * Move our QP to the error state and then reinitialize in
78762306a36Sopenharmony_ci	 * when all work requests have completed or have been flushed.
78862306a36Sopenharmony_ci	 */
78962306a36Sopenharmony_ci	qp_attr.qp_state = IB_QPS_ERR;
79062306a36Sopenharmony_ci	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
79162306a36Sopenharmony_ci		check_qp_movement_and_print(priv, priv->qp, IB_QPS_ERR);
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci	/* Wait for all sends and receives to complete */
79462306a36Sopenharmony_ci	begin = jiffies;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
79762306a36Sopenharmony_ci		if (time_after(jiffies, begin + 5 * HZ)) {
79862306a36Sopenharmony_ci			ipoib_warn(priv,
79962306a36Sopenharmony_ci				   "timing out; %d sends %d receives not completed\n",
80062306a36Sopenharmony_ci				   priv->tx_head - priv->tx_tail,
80162306a36Sopenharmony_ci				   recvs_pending(dev));
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci			/*
80462306a36Sopenharmony_ci			 * assume the HW is wedged and just free up
80562306a36Sopenharmony_ci			 * all our pending work requests.
80662306a36Sopenharmony_ci			 */
80762306a36Sopenharmony_ci			while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
80862306a36Sopenharmony_ci				tx_req = &priv->tx_ring[priv->tx_tail &
80962306a36Sopenharmony_ci							(ipoib_sendq_size - 1)];
81062306a36Sopenharmony_ci				ipoib_dma_unmap_tx(priv, tx_req);
81162306a36Sopenharmony_ci				dev_kfree_skb_any(tx_req->skb);
81262306a36Sopenharmony_ci				++priv->tx_tail;
81362306a36Sopenharmony_ci				++priv->global_tx_tail;
81462306a36Sopenharmony_ci			}
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci			for (i = 0; i < ipoib_recvq_size; ++i) {
81762306a36Sopenharmony_ci				struct ipoib_rx_buf *rx_req;
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci				rx_req = &priv->rx_ring[i];
82062306a36Sopenharmony_ci				if (!rx_req->skb)
82162306a36Sopenharmony_ci					continue;
82262306a36Sopenharmony_ci				ipoib_ud_dma_unmap_rx(priv,
82362306a36Sopenharmony_ci						      priv->rx_ring[i].mapping);
82462306a36Sopenharmony_ci				dev_kfree_skb_any(rx_req->skb);
82562306a36Sopenharmony_ci				rx_req->skb = NULL;
82662306a36Sopenharmony_ci			}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci			goto timeout;
82962306a36Sopenharmony_ci		}
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci		ipoib_drain_cq(dev);
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ci		usleep_range(1000, 2000);
83462306a36Sopenharmony_ci	}
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	ipoib_dbg(priv, "All sends and receives done.\n");
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_citimeout:
83962306a36Sopenharmony_ci	qp_attr.qp_state = IB_QPS_RESET;
84062306a36Sopenharmony_ci	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
84162306a36Sopenharmony_ci		ipoib_warn(priv, "Failed to modify QP to RESET state\n");
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_ci	ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_ci	return 0;
84662306a36Sopenharmony_ci}
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ciint ipoib_ib_dev_open_default(struct net_device *dev)
84962306a36Sopenharmony_ci{
85062306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
85162306a36Sopenharmony_ci	int ret;
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci	ret = ipoib_init_qp(dev);
85462306a36Sopenharmony_ci	if (ret) {
85562306a36Sopenharmony_ci		ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
85662306a36Sopenharmony_ci		return -1;
85762306a36Sopenharmony_ci	}
85862306a36Sopenharmony_ci
85962306a36Sopenharmony_ci	ret = ipoib_ib_post_receives(dev);
86062306a36Sopenharmony_ci	if (ret) {
86162306a36Sopenharmony_ci		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
86262306a36Sopenharmony_ci		goto out;
86362306a36Sopenharmony_ci	}
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci	ret = ipoib_cm_dev_open(dev);
86662306a36Sopenharmony_ci	if (ret) {
86762306a36Sopenharmony_ci		ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
86862306a36Sopenharmony_ci		goto out;
86962306a36Sopenharmony_ci	}
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
87262306a36Sopenharmony_ci		ipoib_napi_enable(dev);
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	return 0;
87562306a36Sopenharmony_ciout:
87662306a36Sopenharmony_ci	return -1;
87762306a36Sopenharmony_ci}
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ciint ipoib_ib_dev_open(struct net_device *dev)
88062306a36Sopenharmony_ci{
88162306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci	ipoib_pkey_dev_check_presence(dev);
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
88662306a36Sopenharmony_ci		ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
88762306a36Sopenharmony_ci			   (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
88862306a36Sopenharmony_ci		return -1;
88962306a36Sopenharmony_ci	}
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci	ipoib_start_ah_reaper(priv);
89262306a36Sopenharmony_ci	if (priv->rn_ops->ndo_open(dev)) {
89362306a36Sopenharmony_ci		pr_warn("%s: Failed to open dev\n", dev->name);
89462306a36Sopenharmony_ci		goto dev_stop;
89562306a36Sopenharmony_ci	}
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	return 0;
90062306a36Sopenharmony_ci
90162306a36Sopenharmony_cidev_stop:
90262306a36Sopenharmony_ci	ipoib_stop_ah_reaper(priv);
90362306a36Sopenharmony_ci	return -1;
90462306a36Sopenharmony_ci}
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_civoid ipoib_ib_dev_stop(struct net_device *dev)
90762306a36Sopenharmony_ci{
90862306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_ci	priv->rn_ops->ndo_stop(dev);
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci	clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
91362306a36Sopenharmony_ci	ipoib_stop_ah_reaper(priv);
91462306a36Sopenharmony_ci}
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_civoid ipoib_pkey_dev_check_presence(struct net_device *dev)
91762306a36Sopenharmony_ci{
91862306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
91962306a36Sopenharmony_ci	struct rdma_netdev *rn = netdev_priv(dev);
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	if (!(priv->pkey & 0x7fff) ||
92262306a36Sopenharmony_ci	    ib_find_pkey(priv->ca, priv->port, priv->pkey,
92362306a36Sopenharmony_ci			 &priv->pkey_index)) {
92462306a36Sopenharmony_ci		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
92562306a36Sopenharmony_ci	} else {
92662306a36Sopenharmony_ci		if (rn->set_id)
92762306a36Sopenharmony_ci			rn->set_id(dev, priv->pkey_index);
92862306a36Sopenharmony_ci		set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
92962306a36Sopenharmony_ci	}
93062306a36Sopenharmony_ci}
93162306a36Sopenharmony_ci
93262306a36Sopenharmony_civoid ipoib_ib_dev_up(struct net_device *dev)
93362306a36Sopenharmony_ci{
93462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	ipoib_pkey_dev_check_presence(dev);
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
93962306a36Sopenharmony_ci		ipoib_dbg(priv, "PKEY is not assigned.\n");
94062306a36Sopenharmony_ci		return;
94162306a36Sopenharmony_ci	}
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci	set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci	ipoib_mcast_start_thread(dev);
94662306a36Sopenharmony_ci}
94762306a36Sopenharmony_ci
94862306a36Sopenharmony_civoid ipoib_ib_dev_down(struct net_device *dev)
94962306a36Sopenharmony_ci{
95062306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	ipoib_dbg(priv, "downing ib_dev\n");
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ci	clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
95562306a36Sopenharmony_ci	netif_carrier_off(dev);
95662306a36Sopenharmony_ci
95762306a36Sopenharmony_ci	ipoib_mcast_stop_thread(dev);
95862306a36Sopenharmony_ci	ipoib_mcast_dev_flush(dev);
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	ipoib_flush_paths(dev);
96162306a36Sopenharmony_ci}
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_civoid ipoib_drain_cq(struct net_device *dev)
96462306a36Sopenharmony_ci{
96562306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
96662306a36Sopenharmony_ci	int i, n;
96762306a36Sopenharmony_ci
96862306a36Sopenharmony_ci	/*
96962306a36Sopenharmony_ci	 * We call completion handling routines that expect to be
97062306a36Sopenharmony_ci	 * called from the BH-disabled NAPI poll context, so disable
97162306a36Sopenharmony_ci	 * BHs here too.
97262306a36Sopenharmony_ci	 */
97362306a36Sopenharmony_ci	local_bh_disable();
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci	do {
97662306a36Sopenharmony_ci		n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
97762306a36Sopenharmony_ci		for (i = 0; i < n; ++i) {
97862306a36Sopenharmony_ci			/*
97962306a36Sopenharmony_ci			 * Convert any successful completions to flush
98062306a36Sopenharmony_ci			 * errors to avoid passing packets up the
98162306a36Sopenharmony_ci			 * stack after bringing the device down.
98262306a36Sopenharmony_ci			 */
98362306a36Sopenharmony_ci			if (priv->ibwc[i].status == IB_WC_SUCCESS)
98462306a36Sopenharmony_ci				priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_ci			if (priv->ibwc[i].wr_id & IPOIB_OP_RECV) {
98762306a36Sopenharmony_ci				if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
98862306a36Sopenharmony_ci					ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
98962306a36Sopenharmony_ci				else
99062306a36Sopenharmony_ci					ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
99162306a36Sopenharmony_ci			} else {
99262306a36Sopenharmony_ci				pr_warn("%s: Got unexpected wqe id\n", __func__);
99362306a36Sopenharmony_ci			}
99462306a36Sopenharmony_ci		}
99562306a36Sopenharmony_ci	} while (n == IPOIB_NUM_WC);
99662306a36Sopenharmony_ci
99762306a36Sopenharmony_ci	while (poll_tx(priv))
99862306a36Sopenharmony_ci		; /* nothing */
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci	local_bh_enable();
100162306a36Sopenharmony_ci}
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci/*
100462306a36Sopenharmony_ci * Takes whatever value which is in pkey index 0 and updates priv->pkey
100562306a36Sopenharmony_ci * returns 0 if the pkey value was changed.
100662306a36Sopenharmony_ci */
100762306a36Sopenharmony_cistatic inline int update_parent_pkey(struct ipoib_dev_priv *priv)
100862306a36Sopenharmony_ci{
100962306a36Sopenharmony_ci	int result;
101062306a36Sopenharmony_ci	u16 prev_pkey;
101162306a36Sopenharmony_ci
101262306a36Sopenharmony_ci	prev_pkey = priv->pkey;
101362306a36Sopenharmony_ci	result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
101462306a36Sopenharmony_ci	if (result) {
101562306a36Sopenharmony_ci		ipoib_warn(priv, "ib_query_pkey port %d failed (ret = %d)\n",
101662306a36Sopenharmony_ci			   priv->port, result);
101762306a36Sopenharmony_ci		return result;
101862306a36Sopenharmony_ci	}
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci	priv->pkey |= 0x8000;
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci	if (prev_pkey != priv->pkey) {
102362306a36Sopenharmony_ci		ipoib_dbg(priv, "pkey changed from 0x%x to 0x%x\n",
102462306a36Sopenharmony_ci			  prev_pkey, priv->pkey);
102562306a36Sopenharmony_ci		/*
102662306a36Sopenharmony_ci		 * Update the pkey in the broadcast address, while making sure to set
102762306a36Sopenharmony_ci		 * the full membership bit, so that we join the right broadcast group.
102862306a36Sopenharmony_ci		 */
102962306a36Sopenharmony_ci		priv->dev->broadcast[8] = priv->pkey >> 8;
103062306a36Sopenharmony_ci		priv->dev->broadcast[9] = priv->pkey & 0xff;
103162306a36Sopenharmony_ci		return 0;
103262306a36Sopenharmony_ci	}
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ci	return 1;
103562306a36Sopenharmony_ci}
103662306a36Sopenharmony_ci/*
103762306a36Sopenharmony_ci * returns 0 if pkey value was found in a different slot.
103862306a36Sopenharmony_ci */
103962306a36Sopenharmony_cistatic inline int update_child_pkey(struct ipoib_dev_priv *priv)
104062306a36Sopenharmony_ci{
104162306a36Sopenharmony_ci	u16 old_index = priv->pkey_index;
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ci	priv->pkey_index = 0;
104462306a36Sopenharmony_ci	ipoib_pkey_dev_check_presence(priv->dev);
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ci	if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
104762306a36Sopenharmony_ci	    (old_index == priv->pkey_index))
104862306a36Sopenharmony_ci		return 1;
104962306a36Sopenharmony_ci	return 0;
105062306a36Sopenharmony_ci}
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci/*
105362306a36Sopenharmony_ci * returns true if the device address of the ipoib interface has changed and the
105462306a36Sopenharmony_ci * new address is a valid one (i.e in the gid table), return false otherwise.
105562306a36Sopenharmony_ci */
105662306a36Sopenharmony_cistatic bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
105762306a36Sopenharmony_ci{
105862306a36Sopenharmony_ci	union ib_gid search_gid;
105962306a36Sopenharmony_ci	union ib_gid gid0;
106062306a36Sopenharmony_ci	int err;
106162306a36Sopenharmony_ci	u16 index;
106262306a36Sopenharmony_ci	u32 port;
106362306a36Sopenharmony_ci	bool ret = false;
106462306a36Sopenharmony_ci
106562306a36Sopenharmony_ci	if (rdma_query_gid(priv->ca, priv->port, 0, &gid0))
106662306a36Sopenharmony_ci		return false;
106762306a36Sopenharmony_ci
106862306a36Sopenharmony_ci	netif_addr_lock_bh(priv->dev);
106962306a36Sopenharmony_ci
107062306a36Sopenharmony_ci	/* The subnet prefix may have changed, update it now so we won't have
107162306a36Sopenharmony_ci	 * to do it later
107262306a36Sopenharmony_ci	 */
107362306a36Sopenharmony_ci	priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix;
107462306a36Sopenharmony_ci	dev_addr_mod(priv->dev, 4, (u8 *)&gid0.global.subnet_prefix,
107562306a36Sopenharmony_ci		     sizeof(gid0.global.subnet_prefix));
107662306a36Sopenharmony_ci	search_gid.global.subnet_prefix = gid0.global.subnet_prefix;
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_ci	search_gid.global.interface_id = priv->local_gid.global.interface_id;
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci	netif_addr_unlock_bh(priv->dev);
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_ci	err = ib_find_gid(priv->ca, &search_gid, &port, &index);
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci	netif_addr_lock_bh(priv->dev);
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci	if (search_gid.global.interface_id !=
108762306a36Sopenharmony_ci	    priv->local_gid.global.interface_id)
108862306a36Sopenharmony_ci		/* There was a change while we were looking up the gid, bail
108962306a36Sopenharmony_ci		 * here and let the next work sort this out
109062306a36Sopenharmony_ci		 */
109162306a36Sopenharmony_ci		goto out;
109262306a36Sopenharmony_ci
109362306a36Sopenharmony_ci	/* The next section of code needs some background:
109462306a36Sopenharmony_ci	 * Per IB spec the port GUID can't change if the HCA is powered on.
109562306a36Sopenharmony_ci	 * port GUID is the basis for GID at index 0 which is the basis for
109662306a36Sopenharmony_ci	 * the default device address of a ipoib interface.
109762306a36Sopenharmony_ci	 *
109862306a36Sopenharmony_ci	 * so it seems the flow should be:
109962306a36Sopenharmony_ci	 * if user_changed_dev_addr && gid in gid tbl
110062306a36Sopenharmony_ci	 *	set bit dev_addr_set
110162306a36Sopenharmony_ci	 *	return true
110262306a36Sopenharmony_ci	 * else
110362306a36Sopenharmony_ci	 *	return false
110462306a36Sopenharmony_ci	 *
110562306a36Sopenharmony_ci	 * The issue is that there are devices that don't follow the spec,
110662306a36Sopenharmony_ci	 * they change the port GUID when the HCA is powered, so in order
110762306a36Sopenharmony_ci	 * not to break userspace applications, We need to check if the
110862306a36Sopenharmony_ci	 * user wanted to control the device address and we assume that
110962306a36Sopenharmony_ci	 * if he sets the device address back to be based on GID index 0,
111062306a36Sopenharmony_ci	 * he no longer wishs to control it.
111162306a36Sopenharmony_ci	 *
111262306a36Sopenharmony_ci	 * If the user doesn't control the device address,
111362306a36Sopenharmony_ci	 * IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means
111462306a36Sopenharmony_ci	 * the port GUID has changed and GID at index 0 has changed
111562306a36Sopenharmony_ci	 * so we need to change priv->local_gid and priv->dev->dev_addr
111662306a36Sopenharmony_ci	 * to reflect the new GID.
111762306a36Sopenharmony_ci	 */
111862306a36Sopenharmony_ci	if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
111962306a36Sopenharmony_ci		if (!err && port == priv->port) {
112062306a36Sopenharmony_ci			set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
112162306a36Sopenharmony_ci			if (index == 0)
112262306a36Sopenharmony_ci				clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL,
112362306a36Sopenharmony_ci					  &priv->flags);
112462306a36Sopenharmony_ci			else
112562306a36Sopenharmony_ci				set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags);
112662306a36Sopenharmony_ci			ret = true;
112762306a36Sopenharmony_ci		} else {
112862306a36Sopenharmony_ci			ret = false;
112962306a36Sopenharmony_ci		}
113062306a36Sopenharmony_ci	} else {
113162306a36Sopenharmony_ci		if (!err && port == priv->port) {
113262306a36Sopenharmony_ci			ret = true;
113362306a36Sopenharmony_ci		} else {
113462306a36Sopenharmony_ci			if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) {
113562306a36Sopenharmony_ci				memcpy(&priv->local_gid, &gid0,
113662306a36Sopenharmony_ci				       sizeof(priv->local_gid));
113762306a36Sopenharmony_ci				dev_addr_mod(priv->dev, 4, (u8 *)&gid0,
113862306a36Sopenharmony_ci					     sizeof(priv->local_gid));
113962306a36Sopenharmony_ci				ret = true;
114062306a36Sopenharmony_ci			}
114162306a36Sopenharmony_ci		}
114262306a36Sopenharmony_ci	}
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ciout:
114562306a36Sopenharmony_ci	netif_addr_unlock_bh(priv->dev);
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci	return ret;
114862306a36Sopenharmony_ci}
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_cistatic void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
115162306a36Sopenharmony_ci				enum ipoib_flush_level level,
115262306a36Sopenharmony_ci				int nesting)
115362306a36Sopenharmony_ci{
115462306a36Sopenharmony_ci	struct ipoib_dev_priv *cpriv;
115562306a36Sopenharmony_ci	struct net_device *dev = priv->dev;
115662306a36Sopenharmony_ci	int result;
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci	down_read_nested(&priv->vlan_rwsem, nesting);
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci	/*
116162306a36Sopenharmony_ci	 * Flush any child interfaces too -- they might be up even if
116262306a36Sopenharmony_ci	 * the parent is down.
116362306a36Sopenharmony_ci	 */
116462306a36Sopenharmony_ci	list_for_each_entry(cpriv, &priv->child_intfs, list)
116562306a36Sopenharmony_ci		__ipoib_ib_dev_flush(cpriv, level, nesting + 1);
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci	up_read(&priv->vlan_rwsem);
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
117062306a36Sopenharmony_ci	    level != IPOIB_FLUSH_HEAVY) {
117162306a36Sopenharmony_ci		/* Make sure the dev_addr is set even if not flushing */
117262306a36Sopenharmony_ci		if (level == IPOIB_FLUSH_LIGHT)
117362306a36Sopenharmony_ci			ipoib_dev_addr_changed_valid(priv);
117462306a36Sopenharmony_ci		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
117562306a36Sopenharmony_ci		return;
117662306a36Sopenharmony_ci	}
117762306a36Sopenharmony_ci
117862306a36Sopenharmony_ci	if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
117962306a36Sopenharmony_ci		/* interface is down. update pkey and leave. */
118062306a36Sopenharmony_ci		if (level == IPOIB_FLUSH_HEAVY) {
118162306a36Sopenharmony_ci			if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
118262306a36Sopenharmony_ci				update_parent_pkey(priv);
118362306a36Sopenharmony_ci			else
118462306a36Sopenharmony_ci				update_child_pkey(priv);
118562306a36Sopenharmony_ci		} else if (level == IPOIB_FLUSH_LIGHT)
118662306a36Sopenharmony_ci			ipoib_dev_addr_changed_valid(priv);
118762306a36Sopenharmony_ci		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
118862306a36Sopenharmony_ci		return;
118962306a36Sopenharmony_ci	}
119062306a36Sopenharmony_ci
119162306a36Sopenharmony_ci	if (level == IPOIB_FLUSH_HEAVY) {
119262306a36Sopenharmony_ci		/* child devices chase their origin pkey value, while non-child
119362306a36Sopenharmony_ci		 * (parent) devices should always takes what present in pkey index 0
119462306a36Sopenharmony_ci		 */
119562306a36Sopenharmony_ci		if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
119662306a36Sopenharmony_ci			result = update_child_pkey(priv);
119762306a36Sopenharmony_ci			if (result) {
119862306a36Sopenharmony_ci				/* restart QP only if P_Key index is changed */
119962306a36Sopenharmony_ci				ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
120062306a36Sopenharmony_ci				return;
120162306a36Sopenharmony_ci			}
120262306a36Sopenharmony_ci
120362306a36Sopenharmony_ci		} else {
120462306a36Sopenharmony_ci			result = update_parent_pkey(priv);
120562306a36Sopenharmony_ci			/* restart QP only if P_Key value changed */
120662306a36Sopenharmony_ci			if (result) {
120762306a36Sopenharmony_ci				ipoib_dbg(priv, "Not flushing - P_Key value not changed.\n");
120862306a36Sopenharmony_ci				return;
120962306a36Sopenharmony_ci			}
121062306a36Sopenharmony_ci		}
121162306a36Sopenharmony_ci	}
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_ci	if (level == IPOIB_FLUSH_LIGHT) {
121462306a36Sopenharmony_ci		int oper_up;
121562306a36Sopenharmony_ci		ipoib_mark_paths_invalid(dev);
121662306a36Sopenharmony_ci		/* Set IPoIB operation as down to prevent races between:
121762306a36Sopenharmony_ci		 * the flush flow which leaves MCG and on the fly joins
121862306a36Sopenharmony_ci		 * which can happen during that time. mcast restart task
121962306a36Sopenharmony_ci		 * should deal with join requests we missed.
122062306a36Sopenharmony_ci		 */
122162306a36Sopenharmony_ci		oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
122262306a36Sopenharmony_ci		ipoib_mcast_dev_flush(dev);
122362306a36Sopenharmony_ci		if (oper_up)
122462306a36Sopenharmony_ci			set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
122562306a36Sopenharmony_ci		ipoib_reap_dead_ahs(priv);
122662306a36Sopenharmony_ci	}
122762306a36Sopenharmony_ci
122862306a36Sopenharmony_ci	if (level >= IPOIB_FLUSH_NORMAL)
122962306a36Sopenharmony_ci		ipoib_ib_dev_down(dev);
123062306a36Sopenharmony_ci
123162306a36Sopenharmony_ci	if (level == IPOIB_FLUSH_HEAVY) {
123262306a36Sopenharmony_ci		if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
123362306a36Sopenharmony_ci			ipoib_ib_dev_stop(dev);
123462306a36Sopenharmony_ci
123562306a36Sopenharmony_ci		if (ipoib_ib_dev_open(dev))
123662306a36Sopenharmony_ci			return;
123762306a36Sopenharmony_ci
123862306a36Sopenharmony_ci		if (netif_queue_stopped(dev))
123962306a36Sopenharmony_ci			netif_start_queue(dev);
124062306a36Sopenharmony_ci	}
124162306a36Sopenharmony_ci
124262306a36Sopenharmony_ci	/*
124362306a36Sopenharmony_ci	 * The device could have been brought down between the start and when
124462306a36Sopenharmony_ci	 * we get here, don't bring it back up if it's not configured up
124562306a36Sopenharmony_ci	 */
124662306a36Sopenharmony_ci	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
124762306a36Sopenharmony_ci		if (level >= IPOIB_FLUSH_NORMAL)
124862306a36Sopenharmony_ci			ipoib_ib_dev_up(dev);
124962306a36Sopenharmony_ci		if (ipoib_dev_addr_changed_valid(priv))
125062306a36Sopenharmony_ci			ipoib_mcast_restart_task(&priv->restart_task);
125162306a36Sopenharmony_ci	}
125262306a36Sopenharmony_ci}
125362306a36Sopenharmony_ci
125462306a36Sopenharmony_civoid ipoib_ib_dev_flush_light(struct work_struct *work)
125562306a36Sopenharmony_ci{
125662306a36Sopenharmony_ci	struct ipoib_dev_priv *priv =
125762306a36Sopenharmony_ci		container_of(work, struct ipoib_dev_priv, flush_light);
125862306a36Sopenharmony_ci
125962306a36Sopenharmony_ci	__ipoib_ib_dev_flush(priv, IPOIB_FLUSH_LIGHT, 0);
126062306a36Sopenharmony_ci}
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_civoid ipoib_ib_dev_flush_normal(struct work_struct *work)
126362306a36Sopenharmony_ci{
126462306a36Sopenharmony_ci	struct ipoib_dev_priv *priv =
126562306a36Sopenharmony_ci		container_of(work, struct ipoib_dev_priv, flush_normal);
126662306a36Sopenharmony_ci
126762306a36Sopenharmony_ci	__ipoib_ib_dev_flush(priv, IPOIB_FLUSH_NORMAL, 0);
126862306a36Sopenharmony_ci}
126962306a36Sopenharmony_ci
127062306a36Sopenharmony_civoid ipoib_ib_dev_flush_heavy(struct work_struct *work)
127162306a36Sopenharmony_ci{
127262306a36Sopenharmony_ci	struct ipoib_dev_priv *priv =
127362306a36Sopenharmony_ci		container_of(work, struct ipoib_dev_priv, flush_heavy);
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_ci	rtnl_lock();
127662306a36Sopenharmony_ci	__ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
127762306a36Sopenharmony_ci	rtnl_unlock();
127862306a36Sopenharmony_ci}
127962306a36Sopenharmony_ci
128062306a36Sopenharmony_civoid ipoib_ib_dev_cleanup(struct net_device *dev)
128162306a36Sopenharmony_ci{
128262306a36Sopenharmony_ci	struct ipoib_dev_priv *priv = ipoib_priv(dev);
128362306a36Sopenharmony_ci
128462306a36Sopenharmony_ci	ipoib_dbg(priv, "cleaning up ib_dev\n");
128562306a36Sopenharmony_ci	/*
128662306a36Sopenharmony_ci	 * We must make sure there are no more (path) completions
128762306a36Sopenharmony_ci	 * that may wish to touch priv fields that are no longer valid
128862306a36Sopenharmony_ci	 */
128962306a36Sopenharmony_ci	ipoib_flush_paths(dev);
129062306a36Sopenharmony_ci
129162306a36Sopenharmony_ci	ipoib_mcast_stop_thread(dev);
129262306a36Sopenharmony_ci	ipoib_mcast_dev_flush(dev);
129362306a36Sopenharmony_ci
129462306a36Sopenharmony_ci	/*
129562306a36Sopenharmony_ci	 * All of our ah references aren't free until after
129662306a36Sopenharmony_ci	 * ipoib_mcast_dev_flush(), ipoib_flush_paths, and
129762306a36Sopenharmony_ci	 * the neighbor garbage collection is stopped and reaped.
129862306a36Sopenharmony_ci	 * That should all be done now, so make a final ah flush.
129962306a36Sopenharmony_ci	 */
130062306a36Sopenharmony_ci	ipoib_reap_dead_ahs(priv);
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
130362306a36Sopenharmony_ci
130462306a36Sopenharmony_ci	priv->rn_ops->ndo_uninit(dev);
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_ci	if (priv->pd) {
130762306a36Sopenharmony_ci		ib_dealloc_pd(priv->pd);
130862306a36Sopenharmony_ci		priv->pd = NULL;
130962306a36Sopenharmony_ci	}
131062306a36Sopenharmony_ci}
131162306a36Sopenharmony_ci
1312