162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright (c) 2006 Mellanox Technologies. All rights reserved 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * This software is available to you under a choice of one of two 562306a36Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the 862306a36Sopenharmony_ci * OpenIB.org BSD license below: 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or 1162306a36Sopenharmony_ci * without modification, are permitted provided that the following 1262306a36Sopenharmony_ci * conditions are met: 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * - Redistributions of source code must retain the above 1562306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 1662306a36Sopenharmony_ci * disclaimer. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * - Redistributions in binary form must reproduce the above 1962306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 2062306a36Sopenharmony_ci * disclaimer in the documentation and/or other materials 2162306a36Sopenharmony_ci * provided with the distribution. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 3062306a36Sopenharmony_ci * SOFTWARE. 3162306a36Sopenharmony_ci */ 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#include <rdma/ib_cm.h> 3462306a36Sopenharmony_ci#include <net/dst.h> 3562306a36Sopenharmony_ci#include <net/icmp.h> 3662306a36Sopenharmony_ci#include <linux/icmpv6.h> 3762306a36Sopenharmony_ci#include <linux/delay.h> 3862306a36Sopenharmony_ci#include <linux/slab.h> 3962306a36Sopenharmony_ci#include <linux/vmalloc.h> 4062306a36Sopenharmony_ci#include <linux/moduleparam.h> 4162306a36Sopenharmony_ci#include <linux/sched/signal.h> 4262306a36Sopenharmony_ci#include <linux/sched/mm.h> 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci#include "ipoib.h" 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ciint ipoib_max_conn_qp = 128; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_cimodule_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444); 4962306a36Sopenharmony_ciMODULE_PARM_DESC(max_nonsrq_conn_qp, 5062306a36Sopenharmony_ci "Max number of connected-mode QPs per interface " 5162306a36Sopenharmony_ci "(applied only if shared receive queue is not available)"); 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA 5462306a36Sopenharmony_cistatic int data_debug_level; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cimodule_param_named(cm_data_debug_level, data_debug_level, int, 0644); 5762306a36Sopenharmony_ciMODULE_PARM_DESC(cm_data_debug_level, 5862306a36Sopenharmony_ci "Enable data path debug tracing for connected mode if > 0"); 5962306a36Sopenharmony_ci#endif 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci#define IPOIB_CM_IETF_ID 0x1000000000000000ULL 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ) 6462306a36Sopenharmony_ci#define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ) 6562306a36Sopenharmony_ci#define IPOIB_CM_RX_DELAY (3 * 256 * HZ) 6662306a36Sopenharmony_ci#define IPOIB_CM_RX_UPDATE_MASK (0x3) 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci#define IPOIB_CM_RX_RESERVE (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN) 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_cistatic struct ib_qp_attr ipoib_cm_err_attr = { 7162306a36Sopenharmony_ci .qp_state = IB_QPS_ERR 7262306a36Sopenharmony_ci}; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_cistatic struct ib_send_wr ipoib_cm_rx_drain_wr = { 7762306a36Sopenharmony_ci .opcode = IB_WR_SEND, 7862306a36Sopenharmony_ci}; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_cistatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 8162306a36Sopenharmony_ci const struct ib_cm_event *event); 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_cistatic void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, 8462306a36Sopenharmony_ci u64 mapping[IPOIB_CM_RX_SG]) 8562306a36Sopenharmony_ci{ 8662306a36Sopenharmony_ci int i; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci for (i = 0; i < frags; ++i) 9162306a36Sopenharmony_ci ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); 9262306a36Sopenharmony_ci} 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_cistatic int ipoib_cm_post_receive_srq(struct net_device *dev, int id) 9562306a36Sopenharmony_ci{ 9662306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 9762306a36Sopenharmony_ci int i, ret; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci for (i = 0; i < priv->cm.num_frags; ++i) 10262306a36Sopenharmony_ci priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, NULL); 10562306a36Sopenharmony_ci if (unlikely(ret)) { 10662306a36Sopenharmony_ci ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); 10762306a36Sopenharmony_ci ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, 10862306a36Sopenharmony_ci priv->cm.srq_ring[id].mapping); 10962306a36Sopenharmony_ci dev_kfree_skb_any(priv->cm.srq_ring[id].skb); 11062306a36Sopenharmony_ci priv->cm.srq_ring[id].skb = NULL; 11162306a36Sopenharmony_ci } 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci return ret; 11462306a36Sopenharmony_ci} 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_cistatic int ipoib_cm_post_receive_nonsrq(struct net_device *dev, 11762306a36Sopenharmony_ci struct ipoib_cm_rx *rx, 11862306a36Sopenharmony_ci struct ib_recv_wr *wr, 11962306a36Sopenharmony_ci struct ib_sge *sge, int id) 12062306a36Sopenharmony_ci{ 12162306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 12262306a36Sopenharmony_ci int i, ret; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci for (i = 0; i < IPOIB_CM_RX_SG; ++i) 12762306a36Sopenharmony_ci sge[i].addr = rx->rx_ring[id].mapping[i]; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci ret = ib_post_recv(rx->qp, wr, NULL); 13062306a36Sopenharmony_ci if (unlikely(ret)) { 13162306a36Sopenharmony_ci ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); 13262306a36Sopenharmony_ci ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, 13362306a36Sopenharmony_ci rx->rx_ring[id].mapping); 13462306a36Sopenharmony_ci dev_kfree_skb_any(rx->rx_ring[id].skb); 13562306a36Sopenharmony_ci rx->rx_ring[id].skb = NULL; 13662306a36Sopenharmony_ci } 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci return ret; 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_cistatic struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, 14262306a36Sopenharmony_ci struct ipoib_cm_rx_buf *rx_ring, 14362306a36Sopenharmony_ci int id, int frags, 14462306a36Sopenharmony_ci u64 mapping[IPOIB_CM_RX_SG], 14562306a36Sopenharmony_ci gfp_t gfp) 14662306a36Sopenharmony_ci{ 14762306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 14862306a36Sopenharmony_ci struct sk_buff *skb; 14962306a36Sopenharmony_ci int i; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16)); 15262306a36Sopenharmony_ci if (unlikely(!skb)) 15362306a36Sopenharmony_ci return NULL; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci /* 15662306a36Sopenharmony_ci * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the 15762306a36Sopenharmony_ci * IP header to a multiple of 16. 15862306a36Sopenharmony_ci */ 15962306a36Sopenharmony_ci skb_reserve(skb, IPOIB_CM_RX_RESERVE); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, 16262306a36Sopenharmony_ci DMA_FROM_DEVICE); 16362306a36Sopenharmony_ci if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) { 16462306a36Sopenharmony_ci dev_kfree_skb_any(skb); 16562306a36Sopenharmony_ci return NULL; 16662306a36Sopenharmony_ci } 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci for (i = 0; i < frags; i++) { 16962306a36Sopenharmony_ci struct page *page = alloc_page(gfp); 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci if (!page) 17262306a36Sopenharmony_ci goto partial_error; 17362306a36Sopenharmony_ci skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci mapping[i + 1] = ib_dma_map_page(priv->ca, page, 17662306a36Sopenharmony_ci 0, PAGE_SIZE, DMA_FROM_DEVICE); 17762306a36Sopenharmony_ci if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) 17862306a36Sopenharmony_ci goto partial_error; 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci rx_ring[id].skb = skb; 18262306a36Sopenharmony_ci return skb; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_cipartial_error: 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci for (; i > 0; --i) 18962306a36Sopenharmony_ci ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci dev_kfree_skb_any(skb); 19262306a36Sopenharmony_ci return NULL; 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_cistatic void ipoib_cm_free_rx_ring(struct net_device *dev, 19662306a36Sopenharmony_ci struct ipoib_cm_rx_buf *rx_ring) 19762306a36Sopenharmony_ci{ 19862306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 19962306a36Sopenharmony_ci int i; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci for (i = 0; i < ipoib_recvq_size; ++i) 20262306a36Sopenharmony_ci if (rx_ring[i].skb) { 20362306a36Sopenharmony_ci ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, 20462306a36Sopenharmony_ci rx_ring[i].mapping); 20562306a36Sopenharmony_ci dev_kfree_skb_any(rx_ring[i].skb); 20662306a36Sopenharmony_ci } 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci vfree(rx_ring); 20962306a36Sopenharmony_ci} 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_cistatic void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) 21262306a36Sopenharmony_ci{ 21362306a36Sopenharmony_ci struct ipoib_cm_rx *p; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci /* We only reserved 1 extra slot in CQ for drain WRs, so 21662306a36Sopenharmony_ci * make sure we have at most 1 outstanding WR. */ 21762306a36Sopenharmony_ci if (list_empty(&priv->cm.rx_flush_list) || 21862306a36Sopenharmony_ci !list_empty(&priv->cm.rx_drain_list)) 21962306a36Sopenharmony_ci return; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci /* 22262306a36Sopenharmony_ci * QPs on flush list are error state. This way, a "flush 22362306a36Sopenharmony_ci * error" WC will be immediately generated for each WR we post. 22462306a36Sopenharmony_ci */ 22562306a36Sopenharmony_ci p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); 22662306a36Sopenharmony_ci ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID; 22762306a36Sopenharmony_ci if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL)) 22862306a36Sopenharmony_ci ipoib_warn(priv, "failed to post drain wr\n"); 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); 23162306a36Sopenharmony_ci} 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_cistatic void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci struct ipoib_cm_rx *p = ctx; 23662306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(p->dev); 23762306a36Sopenharmony_ci unsigned long flags; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (event->event != IB_EVENT_QP_LAST_WQE_REACHED) 24062306a36Sopenharmony_ci return; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 24362306a36Sopenharmony_ci list_move(&p->list, &priv->cm.rx_flush_list); 24462306a36Sopenharmony_ci p->state = IPOIB_CM_RX_FLUSH; 24562306a36Sopenharmony_ci ipoib_cm_start_rx_drain(priv); 24662306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 24762306a36Sopenharmony_ci} 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_cistatic struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, 25062306a36Sopenharmony_ci struct ipoib_cm_rx *p) 25162306a36Sopenharmony_ci{ 25262306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 25362306a36Sopenharmony_ci struct ib_qp_init_attr attr = { 25462306a36Sopenharmony_ci .event_handler = ipoib_cm_rx_event_handler, 25562306a36Sopenharmony_ci .send_cq = priv->recv_cq, /* For drain WR */ 25662306a36Sopenharmony_ci .recv_cq = priv->recv_cq, 25762306a36Sopenharmony_ci .srq = priv->cm.srq, 25862306a36Sopenharmony_ci .cap.max_send_wr = 1, /* For drain WR */ 25962306a36Sopenharmony_ci .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */ 26062306a36Sopenharmony_ci .sq_sig_type = IB_SIGNAL_ALL_WR, 26162306a36Sopenharmony_ci .qp_type = IB_QPT_RC, 26262306a36Sopenharmony_ci .qp_context = p, 26362306a36Sopenharmony_ci }; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci if (!ipoib_cm_has_srq(dev)) { 26662306a36Sopenharmony_ci attr.cap.max_recv_wr = ipoib_recvq_size; 26762306a36Sopenharmony_ci attr.cap.max_recv_sge = IPOIB_CM_RX_SG; 26862306a36Sopenharmony_ci } 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci return ib_create_qp(priv->pd, &attr); 27162306a36Sopenharmony_ci} 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_cistatic int ipoib_cm_modify_rx_qp(struct net_device *dev, 27462306a36Sopenharmony_ci struct ib_cm_id *cm_id, struct ib_qp *qp, 27562306a36Sopenharmony_ci unsigned int psn) 27662306a36Sopenharmony_ci{ 27762306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 27862306a36Sopenharmony_ci struct ib_qp_attr qp_attr; 27962306a36Sopenharmony_ci int qp_attr_mask, ret; 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci qp_attr.qp_state = IB_QPS_INIT; 28262306a36Sopenharmony_ci ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 28362306a36Sopenharmony_ci if (ret) { 28462306a36Sopenharmony_ci ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); 28562306a36Sopenharmony_ci return ret; 28662306a36Sopenharmony_ci } 28762306a36Sopenharmony_ci ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 28862306a36Sopenharmony_ci if (ret) { 28962306a36Sopenharmony_ci ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); 29062306a36Sopenharmony_ci return ret; 29162306a36Sopenharmony_ci } 29262306a36Sopenharmony_ci qp_attr.qp_state = IB_QPS_RTR; 29362306a36Sopenharmony_ci ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 29462306a36Sopenharmony_ci if (ret) { 29562306a36Sopenharmony_ci ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); 29662306a36Sopenharmony_ci return ret; 29762306a36Sopenharmony_ci } 29862306a36Sopenharmony_ci qp_attr.rq_psn = psn; 29962306a36Sopenharmony_ci ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 30062306a36Sopenharmony_ci if (ret) { 30162306a36Sopenharmony_ci ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); 30262306a36Sopenharmony_ci return ret; 30362306a36Sopenharmony_ci } 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci /* 30662306a36Sopenharmony_ci * Current Mellanox HCA firmware won't generate completions 30762306a36Sopenharmony_ci * with error for drain WRs unless the QP has been moved to 30862306a36Sopenharmony_ci * RTS first. This work-around leaves a window where a QP has 30962306a36Sopenharmony_ci * moved to error asynchronously, but this will eventually get 31062306a36Sopenharmony_ci * fixed in firmware, so let's not error out if modify QP 31162306a36Sopenharmony_ci * fails. 31262306a36Sopenharmony_ci */ 31362306a36Sopenharmony_ci qp_attr.qp_state = IB_QPS_RTS; 31462306a36Sopenharmony_ci ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 31562306a36Sopenharmony_ci if (ret) { 31662306a36Sopenharmony_ci ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); 31762306a36Sopenharmony_ci return 0; 31862306a36Sopenharmony_ci } 31962306a36Sopenharmony_ci ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 32062306a36Sopenharmony_ci if (ret) { 32162306a36Sopenharmony_ci ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); 32262306a36Sopenharmony_ci return 0; 32362306a36Sopenharmony_ci } 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci return 0; 32662306a36Sopenharmony_ci} 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_cistatic void ipoib_cm_init_rx_wr(struct net_device *dev, 32962306a36Sopenharmony_ci struct ib_recv_wr *wr, 33062306a36Sopenharmony_ci struct ib_sge *sge) 33162306a36Sopenharmony_ci{ 33262306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 33362306a36Sopenharmony_ci int i; 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci for (i = 0; i < priv->cm.num_frags; ++i) 33662306a36Sopenharmony_ci sge[i].lkey = priv->pd->local_dma_lkey; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci sge[0].length = IPOIB_CM_HEAD_SIZE; 33962306a36Sopenharmony_ci for (i = 1; i < priv->cm.num_frags; ++i) 34062306a36Sopenharmony_ci sge[i].length = PAGE_SIZE; 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci wr->next = NULL; 34362306a36Sopenharmony_ci wr->sg_list = sge; 34462306a36Sopenharmony_ci wr->num_sge = priv->cm.num_frags; 34562306a36Sopenharmony_ci} 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_cistatic int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, 34862306a36Sopenharmony_ci struct ipoib_cm_rx *rx) 34962306a36Sopenharmony_ci{ 35062306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 35162306a36Sopenharmony_ci struct { 35262306a36Sopenharmony_ci struct ib_recv_wr wr; 35362306a36Sopenharmony_ci struct ib_sge sge[IPOIB_CM_RX_SG]; 35462306a36Sopenharmony_ci } *t; 35562306a36Sopenharmony_ci int ret; 35662306a36Sopenharmony_ci int i; 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci rx->rx_ring = vzalloc(array_size(ipoib_recvq_size, 35962306a36Sopenharmony_ci sizeof(*rx->rx_ring))); 36062306a36Sopenharmony_ci if (!rx->rx_ring) 36162306a36Sopenharmony_ci return -ENOMEM; 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci t = kmalloc(sizeof(*t), GFP_KERNEL); 36462306a36Sopenharmony_ci if (!t) { 36562306a36Sopenharmony_ci ret = -ENOMEM; 36662306a36Sopenharmony_ci goto err_free_1; 36762306a36Sopenharmony_ci } 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci ipoib_cm_init_rx_wr(dev, &t->wr, t->sge); 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { 37462306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 37562306a36Sopenharmony_ci ib_send_cm_rej(cm_id, IB_CM_REJ_NO_QP, NULL, 0, NULL, 0); 37662306a36Sopenharmony_ci ret = -EINVAL; 37762306a36Sopenharmony_ci goto err_free; 37862306a36Sopenharmony_ci } else 37962306a36Sopenharmony_ci ++priv->cm.nonsrq_conn_qp; 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci for (i = 0; i < ipoib_recvq_size; ++i) { 38462306a36Sopenharmony_ci if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1, 38562306a36Sopenharmony_ci rx->rx_ring[i].mapping, 38662306a36Sopenharmony_ci GFP_KERNEL)) { 38762306a36Sopenharmony_ci ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 38862306a36Sopenharmony_ci ret = -ENOMEM; 38962306a36Sopenharmony_ci goto err_count; 39062306a36Sopenharmony_ci } 39162306a36Sopenharmony_ci ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i); 39262306a36Sopenharmony_ci if (ret) { 39362306a36Sopenharmony_ci ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " 39462306a36Sopenharmony_ci "failed for buf %d\n", i); 39562306a36Sopenharmony_ci ret = -EIO; 39662306a36Sopenharmony_ci goto err_count; 39762306a36Sopenharmony_ci } 39862306a36Sopenharmony_ci } 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci rx->recv_count = ipoib_recvq_size; 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci kfree(t); 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci return 0; 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_cierr_count: 40762306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 40862306a36Sopenharmony_ci --priv->cm.nonsrq_conn_qp; 40962306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_cierr_free: 41262306a36Sopenharmony_ci kfree(t); 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_cierr_free_1: 41562306a36Sopenharmony_ci ipoib_cm_free_rx_ring(dev, rx->rx_ring); 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci return ret; 41862306a36Sopenharmony_ci} 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_cistatic int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, 42162306a36Sopenharmony_ci struct ib_qp *qp, 42262306a36Sopenharmony_ci const struct ib_cm_req_event_param *req, 42362306a36Sopenharmony_ci unsigned int psn) 42462306a36Sopenharmony_ci{ 42562306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 42662306a36Sopenharmony_ci struct ipoib_cm_data data = {}; 42762306a36Sopenharmony_ci struct ib_cm_rep_param rep = {}; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci data.qpn = cpu_to_be32(priv->qp->qp_num); 43062306a36Sopenharmony_ci data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci rep.private_data = &data; 43362306a36Sopenharmony_ci rep.private_data_len = sizeof(data); 43462306a36Sopenharmony_ci rep.flow_control = 0; 43562306a36Sopenharmony_ci rep.rnr_retry_count = req->rnr_retry_count; 43662306a36Sopenharmony_ci rep.srq = ipoib_cm_has_srq(dev); 43762306a36Sopenharmony_ci rep.qp_num = qp->qp_num; 43862306a36Sopenharmony_ci rep.starting_psn = psn; 43962306a36Sopenharmony_ci return ib_send_cm_rep(cm_id, &rep); 44062306a36Sopenharmony_ci} 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_cistatic int ipoib_cm_req_handler(struct ib_cm_id *cm_id, 44362306a36Sopenharmony_ci const struct ib_cm_event *event) 44462306a36Sopenharmony_ci{ 44562306a36Sopenharmony_ci struct net_device *dev = cm_id->context; 44662306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 44762306a36Sopenharmony_ci struct ipoib_cm_rx *p; 44862306a36Sopenharmony_ci unsigned int psn; 44962306a36Sopenharmony_ci int ret; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci ipoib_dbg(priv, "REQ arrived\n"); 45262306a36Sopenharmony_ci p = kzalloc(sizeof(*p), GFP_KERNEL); 45362306a36Sopenharmony_ci if (!p) 45462306a36Sopenharmony_ci return -ENOMEM; 45562306a36Sopenharmony_ci p->dev = dev; 45662306a36Sopenharmony_ci p->id = cm_id; 45762306a36Sopenharmony_ci cm_id->context = p; 45862306a36Sopenharmony_ci p->state = IPOIB_CM_RX_LIVE; 45962306a36Sopenharmony_ci p->jiffies = jiffies; 46062306a36Sopenharmony_ci INIT_LIST_HEAD(&p->list); 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci p->qp = ipoib_cm_create_rx_qp(dev, p); 46362306a36Sopenharmony_ci if (IS_ERR(p->qp)) { 46462306a36Sopenharmony_ci ret = PTR_ERR(p->qp); 46562306a36Sopenharmony_ci goto err_qp; 46662306a36Sopenharmony_ci } 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci psn = get_random_u32() & 0xffffff; 46962306a36Sopenharmony_ci ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn); 47062306a36Sopenharmony_ci if (ret) 47162306a36Sopenharmony_ci goto err_modify; 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci if (!ipoib_cm_has_srq(dev)) { 47462306a36Sopenharmony_ci ret = ipoib_cm_nonsrq_init_rx(dev, cm_id, p); 47562306a36Sopenharmony_ci if (ret) 47662306a36Sopenharmony_ci goto err_modify; 47762306a36Sopenharmony_ci } 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 48062306a36Sopenharmony_ci queue_delayed_work(priv->wq, 48162306a36Sopenharmony_ci &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 48262306a36Sopenharmony_ci /* Add this entry to passive ids list head, but do not re-add it 48362306a36Sopenharmony_ci * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */ 48462306a36Sopenharmony_ci p->jiffies = jiffies; 48562306a36Sopenharmony_ci if (p->state == IPOIB_CM_RX_LIVE) 48662306a36Sopenharmony_ci list_move(&p->list, &priv->cm.passive_ids); 48762306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn); 49062306a36Sopenharmony_ci if (ret) { 49162306a36Sopenharmony_ci ipoib_warn(priv, "failed to send REP: %d\n", ret); 49262306a36Sopenharmony_ci if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) 49362306a36Sopenharmony_ci ipoib_warn(priv, "unable to move qp to error state\n"); 49462306a36Sopenharmony_ci } 49562306a36Sopenharmony_ci return 0; 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_cierr_modify: 49862306a36Sopenharmony_ci ib_destroy_qp(p->qp); 49962306a36Sopenharmony_cierr_qp: 50062306a36Sopenharmony_ci kfree(p); 50162306a36Sopenharmony_ci return ret; 50262306a36Sopenharmony_ci} 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_cistatic int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, 50562306a36Sopenharmony_ci const struct ib_cm_event *event) 50662306a36Sopenharmony_ci{ 50762306a36Sopenharmony_ci struct ipoib_cm_rx *p; 50862306a36Sopenharmony_ci struct ipoib_dev_priv *priv; 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci switch (event->event) { 51162306a36Sopenharmony_ci case IB_CM_REQ_RECEIVED: 51262306a36Sopenharmony_ci return ipoib_cm_req_handler(cm_id, event); 51362306a36Sopenharmony_ci case IB_CM_DREQ_RECEIVED: 51462306a36Sopenharmony_ci ib_send_cm_drep(cm_id, NULL, 0); 51562306a36Sopenharmony_ci fallthrough; 51662306a36Sopenharmony_ci case IB_CM_REJ_RECEIVED: 51762306a36Sopenharmony_ci p = cm_id->context; 51862306a36Sopenharmony_ci priv = ipoib_priv(p->dev); 51962306a36Sopenharmony_ci if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE)) 52062306a36Sopenharmony_ci ipoib_warn(priv, "unable to move qp to error state\n"); 52162306a36Sopenharmony_ci fallthrough; 52262306a36Sopenharmony_ci default: 52362306a36Sopenharmony_ci return 0; 52462306a36Sopenharmony_ci } 52562306a36Sopenharmony_ci} 52662306a36Sopenharmony_ci/* Adjust length of skb with fragments to match received data */ 52762306a36Sopenharmony_cistatic void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, 52862306a36Sopenharmony_ci unsigned int length, struct sk_buff *toskb) 52962306a36Sopenharmony_ci{ 53062306a36Sopenharmony_ci int i, num_frags; 53162306a36Sopenharmony_ci unsigned int size; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci /* put header into skb */ 53462306a36Sopenharmony_ci size = min(length, hdr_space); 53562306a36Sopenharmony_ci skb->tail += size; 53662306a36Sopenharmony_ci skb->len += size; 53762306a36Sopenharmony_ci length -= size; 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci num_frags = skb_shinfo(skb)->nr_frags; 54062306a36Sopenharmony_ci for (i = 0; i < num_frags; i++) { 54162306a36Sopenharmony_ci skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci if (length == 0) { 54462306a36Sopenharmony_ci /* don't need this page */ 54562306a36Sopenharmony_ci skb_fill_page_desc(toskb, i, skb_frag_page(frag), 54662306a36Sopenharmony_ci 0, PAGE_SIZE); 54762306a36Sopenharmony_ci --skb_shinfo(skb)->nr_frags; 54862306a36Sopenharmony_ci } else { 54962306a36Sopenharmony_ci size = min_t(unsigned int, length, PAGE_SIZE); 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci skb_frag_size_set(frag, size); 55262306a36Sopenharmony_ci skb->data_len += size; 55362306a36Sopenharmony_ci skb->truesize += size; 55462306a36Sopenharmony_ci skb->len += size; 55562306a36Sopenharmony_ci length -= size; 55662306a36Sopenharmony_ci } 55762306a36Sopenharmony_ci } 55862306a36Sopenharmony_ci} 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_civoid ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) 56162306a36Sopenharmony_ci{ 56262306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 56362306a36Sopenharmony_ci struct ipoib_cm_rx_buf *rx_ring; 56462306a36Sopenharmony_ci unsigned int wr_id = wc->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV); 56562306a36Sopenharmony_ci struct sk_buff *skb, *newskb; 56662306a36Sopenharmony_ci struct ipoib_cm_rx *p; 56762306a36Sopenharmony_ci unsigned long flags; 56862306a36Sopenharmony_ci u64 mapping[IPOIB_CM_RX_SG]; 56962306a36Sopenharmony_ci int frags; 57062306a36Sopenharmony_ci int has_srq; 57162306a36Sopenharmony_ci struct sk_buff *small_skb; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci ipoib_dbg_data(priv, "cm recv completion: id %d, status: %d\n", 57462306a36Sopenharmony_ci wr_id, wc->status); 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci if (unlikely(wr_id >= ipoib_recvq_size)) { 57762306a36Sopenharmony_ci if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~(IPOIB_OP_CM | IPOIB_OP_RECV))) { 57862306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 57962306a36Sopenharmony_ci list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list); 58062306a36Sopenharmony_ci ipoib_cm_start_rx_drain(priv); 58162306a36Sopenharmony_ci queue_work(priv->wq, &priv->cm.rx_reap_task); 58262306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 58362306a36Sopenharmony_ci } else 58462306a36Sopenharmony_ci ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n", 58562306a36Sopenharmony_ci wr_id, ipoib_recvq_size); 58662306a36Sopenharmony_ci return; 58762306a36Sopenharmony_ci } 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci p = wc->qp->qp_context; 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci has_srq = ipoib_cm_has_srq(dev); 59262306a36Sopenharmony_ci rx_ring = has_srq ? priv->cm.srq_ring : p->rx_ring; 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ci skb = rx_ring[wr_id].skb; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci if (unlikely(wc->status != IB_WC_SUCCESS)) { 59762306a36Sopenharmony_ci ipoib_dbg(priv, 59862306a36Sopenharmony_ci "cm recv error (status=%d, wrid=%d vend_err %#x)\n", 59962306a36Sopenharmony_ci wc->status, wr_id, wc->vendor_err); 60062306a36Sopenharmony_ci ++dev->stats.rx_dropped; 60162306a36Sopenharmony_ci if (has_srq) 60262306a36Sopenharmony_ci goto repost; 60362306a36Sopenharmony_ci else { 60462306a36Sopenharmony_ci if (!--p->recv_count) { 60562306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 60662306a36Sopenharmony_ci list_move(&p->list, &priv->cm.rx_reap_list); 60762306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 60862306a36Sopenharmony_ci queue_work(priv->wq, &priv->cm.rx_reap_task); 60962306a36Sopenharmony_ci } 61062306a36Sopenharmony_ci return; 61162306a36Sopenharmony_ci } 61262306a36Sopenharmony_ci } 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { 61562306a36Sopenharmony_ci if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) { 61662306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 61762306a36Sopenharmony_ci p->jiffies = jiffies; 61862306a36Sopenharmony_ci /* Move this entry to list head, but do not re-add it 61962306a36Sopenharmony_ci * if it has been moved out of list. */ 62062306a36Sopenharmony_ci if (p->state == IPOIB_CM_RX_LIVE) 62162306a36Sopenharmony_ci list_move(&p->list, &priv->cm.passive_ids); 62262306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 62362306a36Sopenharmony_ci } 62462306a36Sopenharmony_ci } 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci if (wc->byte_len < IPOIB_CM_COPYBREAK) { 62762306a36Sopenharmony_ci int dlen = wc->byte_len; 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE); 63062306a36Sopenharmony_ci if (small_skb) { 63162306a36Sopenharmony_ci skb_reserve(small_skb, IPOIB_CM_RX_RESERVE); 63262306a36Sopenharmony_ci ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0], 63362306a36Sopenharmony_ci dlen, DMA_FROM_DEVICE); 63462306a36Sopenharmony_ci skb_copy_from_linear_data(skb, small_skb->data, dlen); 63562306a36Sopenharmony_ci ib_dma_sync_single_for_device(priv->ca, rx_ring[wr_id].mapping[0], 63662306a36Sopenharmony_ci dlen, DMA_FROM_DEVICE); 63762306a36Sopenharmony_ci skb_put(small_skb, dlen); 63862306a36Sopenharmony_ci skb = small_skb; 63962306a36Sopenharmony_ci goto copied; 64062306a36Sopenharmony_ci } 64162306a36Sopenharmony_ci } 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_ci frags = PAGE_ALIGN(wc->byte_len - 64462306a36Sopenharmony_ci min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) / 64562306a36Sopenharmony_ci PAGE_SIZE; 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, 64862306a36Sopenharmony_ci mapping, GFP_ATOMIC); 64962306a36Sopenharmony_ci if (unlikely(!newskb)) { 65062306a36Sopenharmony_ci /* 65162306a36Sopenharmony_ci * If we can't allocate a new RX buffer, dump 65262306a36Sopenharmony_ci * this packet and reuse the old buffer. 65362306a36Sopenharmony_ci */ 65462306a36Sopenharmony_ci ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id); 65562306a36Sopenharmony_ci ++dev->stats.rx_dropped; 65662306a36Sopenharmony_ci goto repost; 65762306a36Sopenharmony_ci } 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping); 66062306a36Sopenharmony_ci memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping)); 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", 66362306a36Sopenharmony_ci wc->byte_len, wc->slid); 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb); 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_cicopied: 66862306a36Sopenharmony_ci skb->protocol = ((struct ipoib_header *) skb->data)->proto; 66962306a36Sopenharmony_ci skb_add_pseudo_hdr(skb); 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci ++dev->stats.rx_packets; 67262306a36Sopenharmony_ci dev->stats.rx_bytes += skb->len; 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci skb->dev = dev; 67562306a36Sopenharmony_ci /* XXX get correct PACKET_ type here */ 67662306a36Sopenharmony_ci skb->pkt_type = PACKET_HOST; 67762306a36Sopenharmony_ci netif_receive_skb(skb); 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_cirepost: 68062306a36Sopenharmony_ci if (has_srq) { 68162306a36Sopenharmony_ci if (unlikely(ipoib_cm_post_receive_srq(dev, wr_id))) 68262306a36Sopenharmony_ci ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " 68362306a36Sopenharmony_ci "for buf %d\n", wr_id); 68462306a36Sopenharmony_ci } else { 68562306a36Sopenharmony_ci if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, 68662306a36Sopenharmony_ci &priv->cm.rx_wr, 68762306a36Sopenharmony_ci priv->cm.rx_sge, 68862306a36Sopenharmony_ci wr_id))) { 68962306a36Sopenharmony_ci --p->recv_count; 69062306a36Sopenharmony_ci ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " 69162306a36Sopenharmony_ci "for buf %d\n", wr_id); 69262306a36Sopenharmony_ci } 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci} 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_cistatic inline int post_send(struct ipoib_dev_priv *priv, 69762306a36Sopenharmony_ci struct ipoib_cm_tx *tx, 69862306a36Sopenharmony_ci unsigned int wr_id, 69962306a36Sopenharmony_ci struct ipoib_tx_buf *tx_req) 70062306a36Sopenharmony_ci{ 70162306a36Sopenharmony_ci ipoib_build_sge(priv, tx_req); 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM; 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci return ib_post_send(tx->qp, &priv->tx_wr.wr, NULL); 70662306a36Sopenharmony_ci} 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_civoid ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) 70962306a36Sopenharmony_ci{ 71062306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 71162306a36Sopenharmony_ci struct ipoib_tx_buf *tx_req; 71262306a36Sopenharmony_ci int rc; 71362306a36Sopenharmony_ci unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb); 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci if (unlikely(skb->len > tx->mtu)) { 71662306a36Sopenharmony_ci ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", 71762306a36Sopenharmony_ci skb->len, tx->mtu); 71862306a36Sopenharmony_ci ++dev->stats.tx_dropped; 71962306a36Sopenharmony_ci ++dev->stats.tx_errors; 72062306a36Sopenharmony_ci ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN); 72162306a36Sopenharmony_ci return; 72262306a36Sopenharmony_ci } 72362306a36Sopenharmony_ci if (skb_shinfo(skb)->nr_frags > usable_sge) { 72462306a36Sopenharmony_ci if (skb_linearize(skb) < 0) { 72562306a36Sopenharmony_ci ipoib_warn(priv, "skb could not be linearized\n"); 72662306a36Sopenharmony_ci ++dev->stats.tx_dropped; 72762306a36Sopenharmony_ci ++dev->stats.tx_errors; 72862306a36Sopenharmony_ci dev_kfree_skb_any(skb); 72962306a36Sopenharmony_ci return; 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci /* Does skb_linearize return ok without reducing nr_frags? */ 73262306a36Sopenharmony_ci if (skb_shinfo(skb)->nr_frags > usable_sge) { 73362306a36Sopenharmony_ci ipoib_warn(priv, "too many frags after skb linearize\n"); 73462306a36Sopenharmony_ci ++dev->stats.tx_dropped; 73562306a36Sopenharmony_ci ++dev->stats.tx_errors; 73662306a36Sopenharmony_ci dev_kfree_skb_any(skb); 73762306a36Sopenharmony_ci return; 73862306a36Sopenharmony_ci } 73962306a36Sopenharmony_ci } 74062306a36Sopenharmony_ci ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n", 74162306a36Sopenharmony_ci tx->tx_head, skb->len, tx->qp->qp_num); 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci /* 74462306a36Sopenharmony_ci * We put the skb into the tx_ring _before_ we call post_send() 74562306a36Sopenharmony_ci * because it's entirely possible that the completion handler will 74662306a36Sopenharmony_ci * run before we execute anything after the post_send(). That 74762306a36Sopenharmony_ci * means we have to make sure everything is properly recorded and 74862306a36Sopenharmony_ci * our state is consistent before we call post_send(). 74962306a36Sopenharmony_ci */ 75062306a36Sopenharmony_ci tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)]; 75162306a36Sopenharmony_ci tx_req->skb = skb; 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { 75462306a36Sopenharmony_ci ++dev->stats.tx_errors; 75562306a36Sopenharmony_ci dev_kfree_skb_any(skb); 75662306a36Sopenharmony_ci return; 75762306a36Sopenharmony_ci } 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci if ((priv->global_tx_head - priv->global_tx_tail) == 76062306a36Sopenharmony_ci ipoib_sendq_size - 1) { 76162306a36Sopenharmony_ci ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", 76262306a36Sopenharmony_ci tx->qp->qp_num); 76362306a36Sopenharmony_ci netif_stop_queue(dev); 76462306a36Sopenharmony_ci } 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci skb_orphan(skb); 76762306a36Sopenharmony_ci skb_dst_drop(skb); 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci if (netif_queue_stopped(dev)) { 77062306a36Sopenharmony_ci rc = ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP | 77162306a36Sopenharmony_ci IB_CQ_REPORT_MISSED_EVENTS); 77262306a36Sopenharmony_ci if (unlikely(rc < 0)) 77362306a36Sopenharmony_ci ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n"); 77462306a36Sopenharmony_ci else if (rc) 77562306a36Sopenharmony_ci napi_schedule(&priv->send_napi); 77662306a36Sopenharmony_ci } 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), tx_req); 77962306a36Sopenharmony_ci if (unlikely(rc)) { 78062306a36Sopenharmony_ci ipoib_warn(priv, "IPoIB/CM:post_send failed, error %d\n", rc); 78162306a36Sopenharmony_ci ++dev->stats.tx_errors; 78262306a36Sopenharmony_ci ipoib_dma_unmap_tx(priv, tx_req); 78362306a36Sopenharmony_ci dev_kfree_skb_any(skb); 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci if (netif_queue_stopped(dev)) 78662306a36Sopenharmony_ci netif_wake_queue(dev); 78762306a36Sopenharmony_ci } else { 78862306a36Sopenharmony_ci netif_trans_update(dev); 78962306a36Sopenharmony_ci ++tx->tx_head; 79062306a36Sopenharmony_ci ++priv->global_tx_head; 79162306a36Sopenharmony_ci } 79262306a36Sopenharmony_ci} 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_civoid ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) 79562306a36Sopenharmony_ci{ 79662306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 79762306a36Sopenharmony_ci struct ipoib_cm_tx *tx = wc->qp->qp_context; 79862306a36Sopenharmony_ci unsigned int wr_id = wc->wr_id & ~IPOIB_OP_CM; 79962306a36Sopenharmony_ci struct ipoib_tx_buf *tx_req; 80062306a36Sopenharmony_ci unsigned long flags; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci ipoib_dbg_data(priv, "cm send completion: id %d, status: %d\n", 80362306a36Sopenharmony_ci wr_id, wc->status); 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci if (unlikely(wr_id >= ipoib_sendq_size)) { 80662306a36Sopenharmony_ci ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n", 80762306a36Sopenharmony_ci wr_id, ipoib_sendq_size); 80862306a36Sopenharmony_ci return; 80962306a36Sopenharmony_ci } 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci tx_req = &tx->tx_ring[wr_id]; 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci ipoib_dma_unmap_tx(priv, tx_req); 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci /* FIXME: is this right? Shouldn't we only increment on success? */ 81662306a36Sopenharmony_ci ++dev->stats.tx_packets; 81762306a36Sopenharmony_ci dev->stats.tx_bytes += tx_req->skb->len; 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci dev_kfree_skb_any(tx_req->skb); 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci netif_tx_lock(dev); 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci ++tx->tx_tail; 82462306a36Sopenharmony_ci ++priv->global_tx_tail; 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci if (unlikely(netif_queue_stopped(dev) && 82762306a36Sopenharmony_ci ((priv->global_tx_head - priv->global_tx_tail) <= 82862306a36Sopenharmony_ci ipoib_sendq_size >> 1) && 82962306a36Sopenharmony_ci test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) 83062306a36Sopenharmony_ci netif_wake_queue(dev); 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci if (wc->status != IB_WC_SUCCESS && 83362306a36Sopenharmony_ci wc->status != IB_WC_WR_FLUSH_ERR) { 83462306a36Sopenharmony_ci struct ipoib_neigh *neigh; 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci /* IB_WC[_RNR]_RETRY_EXC_ERR error is part of the life cycle, 83762306a36Sopenharmony_ci * so don't make waves. 83862306a36Sopenharmony_ci */ 83962306a36Sopenharmony_ci if (wc->status == IB_WC_RNR_RETRY_EXC_ERR || 84062306a36Sopenharmony_ci wc->status == IB_WC_RETRY_EXC_ERR) 84162306a36Sopenharmony_ci ipoib_dbg(priv, 84262306a36Sopenharmony_ci "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n", 84362306a36Sopenharmony_ci __func__, wc->status, wr_id, wc->vendor_err); 84462306a36Sopenharmony_ci else 84562306a36Sopenharmony_ci ipoib_warn(priv, 84662306a36Sopenharmony_ci "%s: failed cm send event (status=%d, wrid=%d vend_err %#x)\n", 84762306a36Sopenharmony_ci __func__, wc->status, wr_id, wc->vendor_err); 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 85062306a36Sopenharmony_ci neigh = tx->neigh; 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci if (neigh) { 85362306a36Sopenharmony_ci neigh->cm = NULL; 85462306a36Sopenharmony_ci ipoib_neigh_free(neigh); 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci tx->neigh = NULL; 85762306a36Sopenharmony_ci } 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 86062306a36Sopenharmony_ci list_move(&tx->list, &priv->cm.reap_list); 86162306a36Sopenharmony_ci queue_work(priv->wq, &priv->cm.reap_task); 86262306a36Sopenharmony_ci } 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 86762306a36Sopenharmony_ci } 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_ci netif_tx_unlock(dev); 87062306a36Sopenharmony_ci} 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ciint ipoib_cm_dev_open(struct net_device *dev) 87362306a36Sopenharmony_ci{ 87462306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 87562306a36Sopenharmony_ci int ret; 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) 87862306a36Sopenharmony_ci return 0; 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); 88162306a36Sopenharmony_ci if (IS_ERR(priv->cm.id)) { 88262306a36Sopenharmony_ci pr_warn("%s: failed to create CM ID\n", priv->ca->name); 88362306a36Sopenharmony_ci ret = PTR_ERR(priv->cm.id); 88462306a36Sopenharmony_ci goto err_cm; 88562306a36Sopenharmony_ci } 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci ret = ib_cm_listen(priv->cm.id, 88862306a36Sopenharmony_ci cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num)); 88962306a36Sopenharmony_ci if (ret) { 89062306a36Sopenharmony_ci pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name, 89162306a36Sopenharmony_ci IPOIB_CM_IETF_ID | priv->qp->qp_num); 89262306a36Sopenharmony_ci goto err_listen; 89362306a36Sopenharmony_ci } 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci return 0; 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_cierr_listen: 89862306a36Sopenharmony_ci ib_destroy_cm_id(priv->cm.id); 89962306a36Sopenharmony_cierr_cm: 90062306a36Sopenharmony_ci priv->cm.id = NULL; 90162306a36Sopenharmony_ci return ret; 90262306a36Sopenharmony_ci} 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_cistatic void ipoib_cm_free_rx_reap_list(struct net_device *dev) 90562306a36Sopenharmony_ci{ 90662306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 90762306a36Sopenharmony_ci struct ipoib_cm_rx *rx, *n; 90862306a36Sopenharmony_ci LIST_HEAD(list); 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 91162306a36Sopenharmony_ci list_splice_init(&priv->cm.rx_reap_list, &list); 91262306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci list_for_each_entry_safe(rx, n, &list, list) { 91562306a36Sopenharmony_ci ib_destroy_cm_id(rx->id); 91662306a36Sopenharmony_ci ib_destroy_qp(rx->qp); 91762306a36Sopenharmony_ci if (!ipoib_cm_has_srq(dev)) { 91862306a36Sopenharmony_ci ipoib_cm_free_rx_ring(priv->dev, rx->rx_ring); 91962306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 92062306a36Sopenharmony_ci --priv->cm.nonsrq_conn_qp; 92162306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 92262306a36Sopenharmony_ci } 92362306a36Sopenharmony_ci kfree(rx); 92462306a36Sopenharmony_ci } 92562306a36Sopenharmony_ci} 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_civoid ipoib_cm_dev_stop(struct net_device *dev) 92862306a36Sopenharmony_ci{ 92962306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 93062306a36Sopenharmony_ci struct ipoib_cm_rx *p; 93162306a36Sopenharmony_ci unsigned long begin; 93262306a36Sopenharmony_ci int ret; 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id) 93562306a36Sopenharmony_ci return; 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci ib_destroy_cm_id(priv->cm.id); 93862306a36Sopenharmony_ci priv->cm.id = NULL; 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 94162306a36Sopenharmony_ci while (!list_empty(&priv->cm.passive_ids)) { 94262306a36Sopenharmony_ci p = list_entry(priv->cm.passive_ids.next, typeof(*p), list); 94362306a36Sopenharmony_ci list_move(&p->list, &priv->cm.rx_error_list); 94462306a36Sopenharmony_ci p->state = IPOIB_CM_RX_ERROR; 94562306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 94662306a36Sopenharmony_ci ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); 94762306a36Sopenharmony_ci if (ret) 94862306a36Sopenharmony_ci ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); 94962306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 95062306a36Sopenharmony_ci } 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci /* Wait for all RX to be drained */ 95362306a36Sopenharmony_ci begin = jiffies; 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci while (!list_empty(&priv->cm.rx_error_list) || 95662306a36Sopenharmony_ci !list_empty(&priv->cm.rx_flush_list) || 95762306a36Sopenharmony_ci !list_empty(&priv->cm.rx_drain_list)) { 95862306a36Sopenharmony_ci if (time_after(jiffies, begin + 5 * HZ)) { 95962306a36Sopenharmony_ci ipoib_warn(priv, "RX drain timing out\n"); 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci /* 96262306a36Sopenharmony_ci * assume the HW is wedged and just free up everything. 96362306a36Sopenharmony_ci */ 96462306a36Sopenharmony_ci list_splice_init(&priv->cm.rx_flush_list, 96562306a36Sopenharmony_ci &priv->cm.rx_reap_list); 96662306a36Sopenharmony_ci list_splice_init(&priv->cm.rx_error_list, 96762306a36Sopenharmony_ci &priv->cm.rx_reap_list); 96862306a36Sopenharmony_ci list_splice_init(&priv->cm.rx_drain_list, 96962306a36Sopenharmony_ci &priv->cm.rx_reap_list); 97062306a36Sopenharmony_ci break; 97162306a36Sopenharmony_ci } 97262306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 97362306a36Sopenharmony_ci usleep_range(1000, 2000); 97462306a36Sopenharmony_ci ipoib_drain_cq(dev); 97562306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 97662306a36Sopenharmony_ci } 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci ipoib_cm_free_rx_reap_list(dev); 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci cancel_delayed_work(&priv->cm.stale_task); 98362306a36Sopenharmony_ci} 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_cistatic int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, 98662306a36Sopenharmony_ci const struct ib_cm_event *event) 98762306a36Sopenharmony_ci{ 98862306a36Sopenharmony_ci struct ipoib_cm_tx *p = cm_id->context; 98962306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(p->dev); 99062306a36Sopenharmony_ci struct ipoib_cm_data *data = event->private_data; 99162306a36Sopenharmony_ci struct sk_buff_head skqueue; 99262306a36Sopenharmony_ci struct ib_qp_attr qp_attr; 99362306a36Sopenharmony_ci int qp_attr_mask, ret; 99462306a36Sopenharmony_ci struct sk_buff *skb; 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci p->mtu = be32_to_cpu(data->mtu); 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci if (p->mtu <= IPOIB_ENCAP_LEN) { 99962306a36Sopenharmony_ci ipoib_warn(priv, "Rejecting connection: mtu %d <= %d\n", 100062306a36Sopenharmony_ci p->mtu, IPOIB_ENCAP_LEN); 100162306a36Sopenharmony_ci return -EINVAL; 100262306a36Sopenharmony_ci } 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci qp_attr.qp_state = IB_QPS_RTR; 100562306a36Sopenharmony_ci ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 100662306a36Sopenharmony_ci if (ret) { 100762306a36Sopenharmony_ci ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); 100862306a36Sopenharmony_ci return ret; 100962306a36Sopenharmony_ci } 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci qp_attr.rq_psn = 0 /* FIXME */; 101262306a36Sopenharmony_ci ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); 101362306a36Sopenharmony_ci if (ret) { 101462306a36Sopenharmony_ci ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); 101562306a36Sopenharmony_ci return ret; 101662306a36Sopenharmony_ci } 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci qp_attr.qp_state = IB_QPS_RTS; 101962306a36Sopenharmony_ci ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); 102062306a36Sopenharmony_ci if (ret) { 102162306a36Sopenharmony_ci ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); 102262306a36Sopenharmony_ci return ret; 102362306a36Sopenharmony_ci } 102462306a36Sopenharmony_ci ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask); 102562306a36Sopenharmony_ci if (ret) { 102662306a36Sopenharmony_ci ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); 102762306a36Sopenharmony_ci return ret; 102862306a36Sopenharmony_ci } 102962306a36Sopenharmony_ci 103062306a36Sopenharmony_ci skb_queue_head_init(&skqueue); 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci netif_tx_lock_bh(p->dev); 103362306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 103462306a36Sopenharmony_ci set_bit(IPOIB_FLAG_OPER_UP, &p->flags); 103562306a36Sopenharmony_ci if (p->neigh) 103662306a36Sopenharmony_ci while ((skb = __skb_dequeue(&p->neigh->queue))) 103762306a36Sopenharmony_ci __skb_queue_tail(&skqueue, skb); 103862306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 103962306a36Sopenharmony_ci netif_tx_unlock_bh(p->dev); 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci while ((skb = __skb_dequeue(&skqueue))) { 104262306a36Sopenharmony_ci skb->dev = p->dev; 104362306a36Sopenharmony_ci ret = dev_queue_xmit(skb); 104462306a36Sopenharmony_ci if (ret) 104562306a36Sopenharmony_ci ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n", 104662306a36Sopenharmony_ci __func__, ret); 104762306a36Sopenharmony_ci } 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci ret = ib_send_cm_rtu(cm_id, NULL, 0); 105062306a36Sopenharmony_ci if (ret) { 105162306a36Sopenharmony_ci ipoib_warn(priv, "failed to send RTU: %d\n", ret); 105262306a36Sopenharmony_ci return ret; 105362306a36Sopenharmony_ci } 105462306a36Sopenharmony_ci return 0; 105562306a36Sopenharmony_ci} 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_cistatic struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_cm_tx *tx) 105862306a36Sopenharmony_ci{ 105962306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 106062306a36Sopenharmony_ci struct ib_qp_init_attr attr = { 106162306a36Sopenharmony_ci .send_cq = priv->send_cq, 106262306a36Sopenharmony_ci .recv_cq = priv->recv_cq, 106362306a36Sopenharmony_ci .srq = priv->cm.srq, 106462306a36Sopenharmony_ci .cap.max_send_wr = ipoib_sendq_size, 106562306a36Sopenharmony_ci .cap.max_send_sge = 1, 106662306a36Sopenharmony_ci .sq_sig_type = IB_SIGNAL_ALL_WR, 106762306a36Sopenharmony_ci .qp_type = IB_QPT_RC, 106862306a36Sopenharmony_ci .qp_context = tx, 106962306a36Sopenharmony_ci .create_flags = 0 107062306a36Sopenharmony_ci }; 107162306a36Sopenharmony_ci struct ib_qp *tx_qp; 107262306a36Sopenharmony_ci 107362306a36Sopenharmony_ci if (dev->features & NETIF_F_SG) 107462306a36Sopenharmony_ci attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge, 107562306a36Sopenharmony_ci MAX_SKB_FRAGS + 1); 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci tx_qp = ib_create_qp(priv->pd, &attr); 107862306a36Sopenharmony_ci tx->max_send_sge = attr.cap.max_send_sge; 107962306a36Sopenharmony_ci return tx_qp; 108062306a36Sopenharmony_ci} 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_cistatic int ipoib_cm_send_req(struct net_device *dev, 108362306a36Sopenharmony_ci struct ib_cm_id *id, struct ib_qp *qp, 108462306a36Sopenharmony_ci u32 qpn, 108562306a36Sopenharmony_ci struct sa_path_rec *pathrec) 108662306a36Sopenharmony_ci{ 108762306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 108862306a36Sopenharmony_ci struct ipoib_cm_data data = {}; 108962306a36Sopenharmony_ci struct ib_cm_req_param req = {}; 109062306a36Sopenharmony_ci 109162306a36Sopenharmony_ci data.qpn = cpu_to_be32(priv->qp->qp_num); 109262306a36Sopenharmony_ci data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci req.primary_path = pathrec; 109562306a36Sopenharmony_ci req.alternate_path = NULL; 109662306a36Sopenharmony_ci req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn); 109762306a36Sopenharmony_ci req.qp_num = qp->qp_num; 109862306a36Sopenharmony_ci req.qp_type = qp->qp_type; 109962306a36Sopenharmony_ci req.private_data = &data; 110062306a36Sopenharmony_ci req.private_data_len = sizeof(data); 110162306a36Sopenharmony_ci req.flow_control = 0; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci req.starting_psn = 0; /* FIXME */ 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci /* 110662306a36Sopenharmony_ci * Pick some arbitrary defaults here; we could make these 110762306a36Sopenharmony_ci * module parameters if anyone cared about setting them. 110862306a36Sopenharmony_ci */ 110962306a36Sopenharmony_ci req.responder_resources = 4; 111062306a36Sopenharmony_ci req.remote_cm_response_timeout = 20; 111162306a36Sopenharmony_ci req.local_cm_response_timeout = 20; 111262306a36Sopenharmony_ci req.retry_count = 0; /* RFC draft warns against retries */ 111362306a36Sopenharmony_ci req.rnr_retry_count = 0; /* RFC draft warns against retries */ 111462306a36Sopenharmony_ci req.max_cm_retries = 15; 111562306a36Sopenharmony_ci req.srq = ipoib_cm_has_srq(dev); 111662306a36Sopenharmony_ci return ib_send_cm_req(id, &req); 111762306a36Sopenharmony_ci} 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_cistatic int ipoib_cm_modify_tx_init(struct net_device *dev, 112062306a36Sopenharmony_ci struct ib_cm_id *cm_id, struct ib_qp *qp) 112162306a36Sopenharmony_ci{ 112262306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 112362306a36Sopenharmony_ci struct ib_qp_attr qp_attr; 112462306a36Sopenharmony_ci int qp_attr_mask, ret; 112562306a36Sopenharmony_ci 112662306a36Sopenharmony_ci qp_attr.pkey_index = priv->pkey_index; 112762306a36Sopenharmony_ci qp_attr.qp_state = IB_QPS_INIT; 112862306a36Sopenharmony_ci qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE; 112962306a36Sopenharmony_ci qp_attr.port_num = priv->port; 113062306a36Sopenharmony_ci qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 113362306a36Sopenharmony_ci if (ret) { 113462306a36Sopenharmony_ci ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret); 113562306a36Sopenharmony_ci return ret; 113662306a36Sopenharmony_ci } 113762306a36Sopenharmony_ci return 0; 113862306a36Sopenharmony_ci} 113962306a36Sopenharmony_ci 114062306a36Sopenharmony_cistatic int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, 114162306a36Sopenharmony_ci struct sa_path_rec *pathrec) 114262306a36Sopenharmony_ci{ 114362306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(p->dev); 114462306a36Sopenharmony_ci unsigned int noio_flag; 114562306a36Sopenharmony_ci int ret; 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci noio_flag = memalloc_noio_save(); 114862306a36Sopenharmony_ci p->tx_ring = vzalloc(array_size(ipoib_sendq_size, sizeof(*p->tx_ring))); 114962306a36Sopenharmony_ci if (!p->tx_ring) { 115062306a36Sopenharmony_ci memalloc_noio_restore(noio_flag); 115162306a36Sopenharmony_ci ret = -ENOMEM; 115262306a36Sopenharmony_ci goto err_tx; 115362306a36Sopenharmony_ci } 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_ci p->qp = ipoib_cm_create_tx_qp(p->dev, p); 115662306a36Sopenharmony_ci memalloc_noio_restore(noio_flag); 115762306a36Sopenharmony_ci if (IS_ERR(p->qp)) { 115862306a36Sopenharmony_ci ret = PTR_ERR(p->qp); 115962306a36Sopenharmony_ci ipoib_warn(priv, "failed to create tx qp: %d\n", ret); 116062306a36Sopenharmony_ci goto err_qp; 116162306a36Sopenharmony_ci } 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_ci p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p); 116462306a36Sopenharmony_ci if (IS_ERR(p->id)) { 116562306a36Sopenharmony_ci ret = PTR_ERR(p->id); 116662306a36Sopenharmony_ci ipoib_warn(priv, "failed to create tx cm id: %d\n", ret); 116762306a36Sopenharmony_ci goto err_id; 116862306a36Sopenharmony_ci } 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp); 117162306a36Sopenharmony_ci if (ret) { 117262306a36Sopenharmony_ci ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret); 117362306a36Sopenharmony_ci goto err_modify_send; 117462306a36Sopenharmony_ci } 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_ci ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec); 117762306a36Sopenharmony_ci if (ret) { 117862306a36Sopenharmony_ci ipoib_warn(priv, "failed to send cm req: %d\n", ret); 117962306a36Sopenharmony_ci goto err_modify_send; 118062306a36Sopenharmony_ci } 118162306a36Sopenharmony_ci 118262306a36Sopenharmony_ci ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n", 118362306a36Sopenharmony_ci p->qp->qp_num, pathrec->dgid.raw, qpn); 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci return 0; 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_cierr_modify_send: 118862306a36Sopenharmony_ci ib_destroy_cm_id(p->id); 118962306a36Sopenharmony_cierr_id: 119062306a36Sopenharmony_ci p->id = NULL; 119162306a36Sopenharmony_ci ib_destroy_qp(p->qp); 119262306a36Sopenharmony_cierr_qp: 119362306a36Sopenharmony_ci p->qp = NULL; 119462306a36Sopenharmony_ci vfree(p->tx_ring); 119562306a36Sopenharmony_cierr_tx: 119662306a36Sopenharmony_ci return ret; 119762306a36Sopenharmony_ci} 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_cistatic void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) 120062306a36Sopenharmony_ci{ 120162306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(p->dev); 120262306a36Sopenharmony_ci struct ipoib_tx_buf *tx_req; 120362306a36Sopenharmony_ci unsigned long begin; 120462306a36Sopenharmony_ci 120562306a36Sopenharmony_ci ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", 120662306a36Sopenharmony_ci p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail); 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci if (p->id) 120962306a36Sopenharmony_ci ib_destroy_cm_id(p->id); 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_ci if (p->tx_ring) { 121262306a36Sopenharmony_ci /* Wait for all sends to complete */ 121362306a36Sopenharmony_ci begin = jiffies; 121462306a36Sopenharmony_ci while ((int) p->tx_tail - (int) p->tx_head < 0) { 121562306a36Sopenharmony_ci if (time_after(jiffies, begin + 5 * HZ)) { 121662306a36Sopenharmony_ci ipoib_warn(priv, "timing out; %d sends not completed\n", 121762306a36Sopenharmony_ci p->tx_head - p->tx_tail); 121862306a36Sopenharmony_ci goto timeout; 121962306a36Sopenharmony_ci } 122062306a36Sopenharmony_ci 122162306a36Sopenharmony_ci usleep_range(1000, 2000); 122262306a36Sopenharmony_ci } 122362306a36Sopenharmony_ci } 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_citimeout: 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci while ((int) p->tx_tail - (int) p->tx_head < 0) { 122862306a36Sopenharmony_ci tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 122962306a36Sopenharmony_ci ipoib_dma_unmap_tx(priv, tx_req); 123062306a36Sopenharmony_ci dev_kfree_skb_any(tx_req->skb); 123162306a36Sopenharmony_ci netif_tx_lock_bh(p->dev); 123262306a36Sopenharmony_ci ++p->tx_tail; 123362306a36Sopenharmony_ci ++priv->global_tx_tail; 123462306a36Sopenharmony_ci if (unlikely((priv->global_tx_head - priv->global_tx_tail) <= 123562306a36Sopenharmony_ci ipoib_sendq_size >> 1) && 123662306a36Sopenharmony_ci netif_queue_stopped(p->dev) && 123762306a36Sopenharmony_ci test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) 123862306a36Sopenharmony_ci netif_wake_queue(p->dev); 123962306a36Sopenharmony_ci netif_tx_unlock_bh(p->dev); 124062306a36Sopenharmony_ci } 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci if (p->qp) 124362306a36Sopenharmony_ci ib_destroy_qp(p->qp); 124462306a36Sopenharmony_ci 124562306a36Sopenharmony_ci vfree(p->tx_ring); 124662306a36Sopenharmony_ci kfree(p); 124762306a36Sopenharmony_ci} 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_cistatic int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, 125062306a36Sopenharmony_ci const struct ib_cm_event *event) 125162306a36Sopenharmony_ci{ 125262306a36Sopenharmony_ci struct ipoib_cm_tx *tx = cm_id->context; 125362306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(tx->dev); 125462306a36Sopenharmony_ci struct net_device *dev = priv->dev; 125562306a36Sopenharmony_ci struct ipoib_neigh *neigh; 125662306a36Sopenharmony_ci unsigned long flags; 125762306a36Sopenharmony_ci int ret; 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ci switch (event->event) { 126062306a36Sopenharmony_ci case IB_CM_DREQ_RECEIVED: 126162306a36Sopenharmony_ci ipoib_dbg(priv, "DREQ received.\n"); 126262306a36Sopenharmony_ci ib_send_cm_drep(cm_id, NULL, 0); 126362306a36Sopenharmony_ci break; 126462306a36Sopenharmony_ci case IB_CM_REP_RECEIVED: 126562306a36Sopenharmony_ci ipoib_dbg(priv, "REP received.\n"); 126662306a36Sopenharmony_ci ret = ipoib_cm_rep_handler(cm_id, event); 126762306a36Sopenharmony_ci if (ret) 126862306a36Sopenharmony_ci ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, 126962306a36Sopenharmony_ci NULL, 0, NULL, 0); 127062306a36Sopenharmony_ci break; 127162306a36Sopenharmony_ci case IB_CM_REQ_ERROR: 127262306a36Sopenharmony_ci case IB_CM_REJ_RECEIVED: 127362306a36Sopenharmony_ci case IB_CM_TIMEWAIT_EXIT: 127462306a36Sopenharmony_ci ipoib_dbg(priv, "CM error %d.\n", event->event); 127562306a36Sopenharmony_ci netif_tx_lock_bh(dev); 127662306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 127762306a36Sopenharmony_ci neigh = tx->neigh; 127862306a36Sopenharmony_ci 127962306a36Sopenharmony_ci if (neigh) { 128062306a36Sopenharmony_ci neigh->cm = NULL; 128162306a36Sopenharmony_ci ipoib_neigh_free(neigh); 128262306a36Sopenharmony_ci 128362306a36Sopenharmony_ci tx->neigh = NULL; 128462306a36Sopenharmony_ci } 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_ci if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 128762306a36Sopenharmony_ci list_move(&tx->list, &priv->cm.reap_list); 128862306a36Sopenharmony_ci queue_work(priv->wq, &priv->cm.reap_task); 128962306a36Sopenharmony_ci } 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 129262306a36Sopenharmony_ci netif_tx_unlock_bh(dev); 129362306a36Sopenharmony_ci break; 129462306a36Sopenharmony_ci default: 129562306a36Sopenharmony_ci break; 129662306a36Sopenharmony_ci } 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci return 0; 129962306a36Sopenharmony_ci} 130062306a36Sopenharmony_ci 130162306a36Sopenharmony_cistruct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path, 130262306a36Sopenharmony_ci struct ipoib_neigh *neigh) 130362306a36Sopenharmony_ci{ 130462306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 130562306a36Sopenharmony_ci struct ipoib_cm_tx *tx; 130662306a36Sopenharmony_ci 130762306a36Sopenharmony_ci tx = kzalloc(sizeof(*tx), GFP_ATOMIC); 130862306a36Sopenharmony_ci if (!tx) 130962306a36Sopenharmony_ci return NULL; 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci neigh->cm = tx; 131262306a36Sopenharmony_ci tx->neigh = neigh; 131362306a36Sopenharmony_ci tx->dev = dev; 131462306a36Sopenharmony_ci list_add(&tx->list, &priv->cm.start_list); 131562306a36Sopenharmony_ci set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags); 131662306a36Sopenharmony_ci queue_work(priv->wq, &priv->cm.start_task); 131762306a36Sopenharmony_ci return tx; 131862306a36Sopenharmony_ci} 131962306a36Sopenharmony_ci 132062306a36Sopenharmony_civoid ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) 132162306a36Sopenharmony_ci{ 132262306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(tx->dev); 132362306a36Sopenharmony_ci unsigned long flags; 132462306a36Sopenharmony_ci if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 132562306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 132662306a36Sopenharmony_ci list_move(&tx->list, &priv->cm.reap_list); 132762306a36Sopenharmony_ci queue_work(priv->wq, &priv->cm.reap_task); 132862306a36Sopenharmony_ci ipoib_dbg(priv, "Reap connection for gid %pI6\n", 132962306a36Sopenharmony_ci tx->neigh->daddr + 4); 133062306a36Sopenharmony_ci tx->neigh = NULL; 133162306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 133262306a36Sopenharmony_ci } 133362306a36Sopenharmony_ci} 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci#define QPN_AND_OPTIONS_OFFSET 4 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_cistatic void ipoib_cm_tx_start(struct work_struct *work) 133862306a36Sopenharmony_ci{ 133962306a36Sopenharmony_ci struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 134062306a36Sopenharmony_ci cm.start_task); 134162306a36Sopenharmony_ci struct net_device *dev = priv->dev; 134262306a36Sopenharmony_ci struct ipoib_neigh *neigh; 134362306a36Sopenharmony_ci struct ipoib_cm_tx *p; 134462306a36Sopenharmony_ci unsigned long flags; 134562306a36Sopenharmony_ci struct ipoib_path *path; 134662306a36Sopenharmony_ci int ret; 134762306a36Sopenharmony_ci 134862306a36Sopenharmony_ci struct sa_path_rec pathrec; 134962306a36Sopenharmony_ci u32 qpn; 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_ci netif_tx_lock_bh(dev); 135262306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_ci while (!list_empty(&priv->cm.start_list)) { 135562306a36Sopenharmony_ci p = list_entry(priv->cm.start_list.next, typeof(*p), list); 135662306a36Sopenharmony_ci list_del_init(&p->list); 135762306a36Sopenharmony_ci neigh = p->neigh; 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci qpn = IPOIB_QPN(neigh->daddr); 136062306a36Sopenharmony_ci /* 136162306a36Sopenharmony_ci * As long as the search is with these 2 locks, 136262306a36Sopenharmony_ci * path existence indicates its validity. 136362306a36Sopenharmony_ci */ 136462306a36Sopenharmony_ci path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET); 136562306a36Sopenharmony_ci if (!path) { 136662306a36Sopenharmony_ci pr_info("%s ignore not valid path %pI6\n", 136762306a36Sopenharmony_ci __func__, 136862306a36Sopenharmony_ci neigh->daddr + QPN_AND_OPTIONS_OFFSET); 136962306a36Sopenharmony_ci goto free_neigh; 137062306a36Sopenharmony_ci } 137162306a36Sopenharmony_ci memcpy(&pathrec, &path->pathrec, sizeof(pathrec)); 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 137462306a36Sopenharmony_ci netif_tx_unlock_bh(dev); 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci ret = ipoib_cm_tx_init(p, qpn, &pathrec); 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_ci netif_tx_lock_bh(dev); 137962306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 138062306a36Sopenharmony_ci 138162306a36Sopenharmony_ci if (ret) { 138262306a36Sopenharmony_cifree_neigh: 138362306a36Sopenharmony_ci neigh = p->neigh; 138462306a36Sopenharmony_ci if (neigh) { 138562306a36Sopenharmony_ci neigh->cm = NULL; 138662306a36Sopenharmony_ci ipoib_neigh_free(neigh); 138762306a36Sopenharmony_ci } 138862306a36Sopenharmony_ci list_del(&p->list); 138962306a36Sopenharmony_ci kfree(p); 139062306a36Sopenharmony_ci } 139162306a36Sopenharmony_ci } 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 139462306a36Sopenharmony_ci netif_tx_unlock_bh(dev); 139562306a36Sopenharmony_ci} 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_cistatic void ipoib_cm_tx_reap(struct work_struct *work) 139862306a36Sopenharmony_ci{ 139962306a36Sopenharmony_ci struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 140062306a36Sopenharmony_ci cm.reap_task); 140162306a36Sopenharmony_ci struct net_device *dev = priv->dev; 140262306a36Sopenharmony_ci struct ipoib_cm_tx *p; 140362306a36Sopenharmony_ci unsigned long flags; 140462306a36Sopenharmony_ci 140562306a36Sopenharmony_ci netif_tx_lock_bh(dev); 140662306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 140762306a36Sopenharmony_ci 140862306a36Sopenharmony_ci while (!list_empty(&priv->cm.reap_list)) { 140962306a36Sopenharmony_ci p = list_entry(priv->cm.reap_list.next, typeof(*p), list); 141062306a36Sopenharmony_ci list_del_init(&p->list); 141162306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 141262306a36Sopenharmony_ci netif_tx_unlock_bh(dev); 141362306a36Sopenharmony_ci ipoib_cm_tx_destroy(p); 141462306a36Sopenharmony_ci netif_tx_lock_bh(dev); 141562306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 141662306a36Sopenharmony_ci } 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 141962306a36Sopenharmony_ci netif_tx_unlock_bh(dev); 142062306a36Sopenharmony_ci} 142162306a36Sopenharmony_ci 142262306a36Sopenharmony_cistatic void ipoib_cm_skb_reap(struct work_struct *work) 142362306a36Sopenharmony_ci{ 142462306a36Sopenharmony_ci struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 142562306a36Sopenharmony_ci cm.skb_task); 142662306a36Sopenharmony_ci struct net_device *dev = priv->dev; 142762306a36Sopenharmony_ci struct sk_buff *skb; 142862306a36Sopenharmony_ci unsigned long flags; 142962306a36Sopenharmony_ci unsigned int mtu = priv->mcast_mtu; 143062306a36Sopenharmony_ci 143162306a36Sopenharmony_ci netif_tx_lock_bh(dev); 143262306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci while ((skb = skb_dequeue(&priv->cm.skb_queue))) { 143562306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 143662306a36Sopenharmony_ci netif_tx_unlock_bh(dev); 143762306a36Sopenharmony_ci 143862306a36Sopenharmony_ci if (skb->protocol == htons(ETH_P_IP)) { 143962306a36Sopenharmony_ci memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 144062306a36Sopenharmony_ci icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 144162306a36Sopenharmony_ci } 144262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 144362306a36Sopenharmony_ci else if (skb->protocol == htons(ETH_P_IPV6)) { 144462306a36Sopenharmony_ci memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 144562306a36Sopenharmony_ci icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 144662306a36Sopenharmony_ci } 144762306a36Sopenharmony_ci#endif 144862306a36Sopenharmony_ci dev_kfree_skb_any(skb); 144962306a36Sopenharmony_ci 145062306a36Sopenharmony_ci netif_tx_lock_bh(dev); 145162306a36Sopenharmony_ci spin_lock_irqsave(&priv->lock, flags); 145262306a36Sopenharmony_ci } 145362306a36Sopenharmony_ci 145462306a36Sopenharmony_ci spin_unlock_irqrestore(&priv->lock, flags); 145562306a36Sopenharmony_ci netif_tx_unlock_bh(dev); 145662306a36Sopenharmony_ci} 145762306a36Sopenharmony_ci 145862306a36Sopenharmony_civoid ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, 145962306a36Sopenharmony_ci unsigned int mtu) 146062306a36Sopenharmony_ci{ 146162306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 146262306a36Sopenharmony_ci int e = skb_queue_empty(&priv->cm.skb_queue); 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_ci skb_dst_update_pmtu(skb, mtu); 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci skb_queue_tail(&priv->cm.skb_queue, skb); 146762306a36Sopenharmony_ci if (e) 146862306a36Sopenharmony_ci queue_work(priv->wq, &priv->cm.skb_task); 146962306a36Sopenharmony_ci} 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_cistatic void ipoib_cm_rx_reap(struct work_struct *work) 147262306a36Sopenharmony_ci{ 147362306a36Sopenharmony_ci ipoib_cm_free_rx_reap_list(container_of(work, struct ipoib_dev_priv, 147462306a36Sopenharmony_ci cm.rx_reap_task)->dev); 147562306a36Sopenharmony_ci} 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_cistatic void ipoib_cm_stale_task(struct work_struct *work) 147862306a36Sopenharmony_ci{ 147962306a36Sopenharmony_ci struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, 148062306a36Sopenharmony_ci cm.stale_task.work); 148162306a36Sopenharmony_ci struct ipoib_cm_rx *p; 148262306a36Sopenharmony_ci int ret; 148362306a36Sopenharmony_ci 148462306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 148562306a36Sopenharmony_ci while (!list_empty(&priv->cm.passive_ids)) { 148662306a36Sopenharmony_ci /* List is sorted by LRU, start from tail, 148762306a36Sopenharmony_ci * stop when we see a recently used entry */ 148862306a36Sopenharmony_ci p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); 148962306a36Sopenharmony_ci if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) 149062306a36Sopenharmony_ci break; 149162306a36Sopenharmony_ci list_move(&p->list, &priv->cm.rx_error_list); 149262306a36Sopenharmony_ci p->state = IPOIB_CM_RX_ERROR; 149362306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 149462306a36Sopenharmony_ci ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); 149562306a36Sopenharmony_ci if (ret) 149662306a36Sopenharmony_ci ipoib_warn(priv, "unable to move qp to error state: %d\n", ret); 149762306a36Sopenharmony_ci spin_lock_irq(&priv->lock); 149862306a36Sopenharmony_ci } 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci if (!list_empty(&priv->cm.passive_ids)) 150162306a36Sopenharmony_ci queue_delayed_work(priv->wq, 150262306a36Sopenharmony_ci &priv->cm.stale_task, IPOIB_CM_RX_DELAY); 150362306a36Sopenharmony_ci spin_unlock_irq(&priv->lock); 150462306a36Sopenharmony_ci} 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_cistatic ssize_t mode_show(struct device *d, struct device_attribute *attr, 150762306a36Sopenharmony_ci char *buf) 150862306a36Sopenharmony_ci{ 150962306a36Sopenharmony_ci struct net_device *dev = to_net_dev(d); 151062306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 151162306a36Sopenharmony_ci 151262306a36Sopenharmony_ci if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags)) 151362306a36Sopenharmony_ci return sysfs_emit(buf, "connected\n"); 151462306a36Sopenharmony_ci else 151562306a36Sopenharmony_ci return sysfs_emit(buf, "datagram\n"); 151662306a36Sopenharmony_ci} 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_cistatic ssize_t mode_store(struct device *d, struct device_attribute *attr, 151962306a36Sopenharmony_ci const char *buf, size_t count) 152062306a36Sopenharmony_ci{ 152162306a36Sopenharmony_ci struct net_device *dev = to_net_dev(d); 152262306a36Sopenharmony_ci int ret; 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_ci if (!rtnl_trylock()) { 152562306a36Sopenharmony_ci return restart_syscall(); 152662306a36Sopenharmony_ci } 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci if (dev->reg_state != NETREG_REGISTERED) { 152962306a36Sopenharmony_ci rtnl_unlock(); 153062306a36Sopenharmony_ci return -EPERM; 153162306a36Sopenharmony_ci } 153262306a36Sopenharmony_ci 153362306a36Sopenharmony_ci ret = ipoib_set_mode(dev, buf); 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ci /* The assumption is that the function ipoib_set_mode returned 153662306a36Sopenharmony_ci * with the rtnl held by it, if not the value -EBUSY returned, 153762306a36Sopenharmony_ci * then no need to rtnl_unlock 153862306a36Sopenharmony_ci */ 153962306a36Sopenharmony_ci if (ret != -EBUSY) 154062306a36Sopenharmony_ci rtnl_unlock(); 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci return (!ret || ret == -EBUSY) ? count : ret; 154362306a36Sopenharmony_ci} 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_cistatic DEVICE_ATTR_RW(mode); 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ciint ipoib_cm_add_mode_attr(struct net_device *dev) 154862306a36Sopenharmony_ci{ 154962306a36Sopenharmony_ci return device_create_file(&dev->dev, &dev_attr_mode); 155062306a36Sopenharmony_ci} 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_cistatic void ipoib_cm_create_srq(struct net_device *dev, int max_sge) 155362306a36Sopenharmony_ci{ 155462306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 155562306a36Sopenharmony_ci struct ib_srq_init_attr srq_init_attr = { 155662306a36Sopenharmony_ci .srq_type = IB_SRQT_BASIC, 155762306a36Sopenharmony_ci .attr = { 155862306a36Sopenharmony_ci .max_wr = ipoib_recvq_size, 155962306a36Sopenharmony_ci .max_sge = max_sge 156062306a36Sopenharmony_ci } 156162306a36Sopenharmony_ci }; 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_ci priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); 156462306a36Sopenharmony_ci if (IS_ERR(priv->cm.srq)) { 156562306a36Sopenharmony_ci if (PTR_ERR(priv->cm.srq) != -EOPNOTSUPP) 156662306a36Sopenharmony_ci pr_warn("%s: failed to allocate SRQ, error %ld\n", 156762306a36Sopenharmony_ci priv->ca->name, PTR_ERR(priv->cm.srq)); 156862306a36Sopenharmony_ci priv->cm.srq = NULL; 156962306a36Sopenharmony_ci return; 157062306a36Sopenharmony_ci } 157162306a36Sopenharmony_ci 157262306a36Sopenharmony_ci priv->cm.srq_ring = vzalloc(array_size(ipoib_recvq_size, 157362306a36Sopenharmony_ci sizeof(*priv->cm.srq_ring))); 157462306a36Sopenharmony_ci if (!priv->cm.srq_ring) { 157562306a36Sopenharmony_ci ib_destroy_srq(priv->cm.srq); 157662306a36Sopenharmony_ci priv->cm.srq = NULL; 157762306a36Sopenharmony_ci return; 157862306a36Sopenharmony_ci } 157962306a36Sopenharmony_ci 158062306a36Sopenharmony_ci} 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ciint ipoib_cm_dev_init(struct net_device *dev) 158362306a36Sopenharmony_ci{ 158462306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 158562306a36Sopenharmony_ci int max_srq_sge, i; 158662306a36Sopenharmony_ci u8 addr; 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci INIT_LIST_HEAD(&priv->cm.passive_ids); 158962306a36Sopenharmony_ci INIT_LIST_HEAD(&priv->cm.reap_list); 159062306a36Sopenharmony_ci INIT_LIST_HEAD(&priv->cm.start_list); 159162306a36Sopenharmony_ci INIT_LIST_HEAD(&priv->cm.rx_error_list); 159262306a36Sopenharmony_ci INIT_LIST_HEAD(&priv->cm.rx_flush_list); 159362306a36Sopenharmony_ci INIT_LIST_HEAD(&priv->cm.rx_drain_list); 159462306a36Sopenharmony_ci INIT_LIST_HEAD(&priv->cm.rx_reap_list); 159562306a36Sopenharmony_ci INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start); 159662306a36Sopenharmony_ci INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap); 159762306a36Sopenharmony_ci INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap); 159862306a36Sopenharmony_ci INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap); 159962306a36Sopenharmony_ci INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task); 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci skb_queue_head_init(&priv->cm.skb_queue); 160262306a36Sopenharmony_ci 160362306a36Sopenharmony_ci ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge); 160462306a36Sopenharmony_ci 160562306a36Sopenharmony_ci max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge); 160662306a36Sopenharmony_ci ipoib_cm_create_srq(dev, max_srq_sge); 160762306a36Sopenharmony_ci if (ipoib_cm_has_srq(dev)) { 160862306a36Sopenharmony_ci priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10; 160962306a36Sopenharmony_ci priv->cm.num_frags = max_srq_sge; 161062306a36Sopenharmony_ci ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n", 161162306a36Sopenharmony_ci priv->cm.max_cm_mtu, priv->cm.num_frags); 161262306a36Sopenharmony_ci } else { 161362306a36Sopenharmony_ci priv->cm.max_cm_mtu = IPOIB_CM_MTU; 161462306a36Sopenharmony_ci priv->cm.num_frags = IPOIB_CM_RX_SG; 161562306a36Sopenharmony_ci } 161662306a36Sopenharmony_ci 161762306a36Sopenharmony_ci ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge); 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci if (ipoib_cm_has_srq(dev)) { 162062306a36Sopenharmony_ci for (i = 0; i < ipoib_recvq_size; ++i) { 162162306a36Sopenharmony_ci if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i, 162262306a36Sopenharmony_ci priv->cm.num_frags - 1, 162362306a36Sopenharmony_ci priv->cm.srq_ring[i].mapping, 162462306a36Sopenharmony_ci GFP_KERNEL)) { 162562306a36Sopenharmony_ci ipoib_warn(priv, "failed to allocate " 162662306a36Sopenharmony_ci "receive buffer %d\n", i); 162762306a36Sopenharmony_ci ipoib_cm_dev_cleanup(dev); 162862306a36Sopenharmony_ci return -ENOMEM; 162962306a36Sopenharmony_ci } 163062306a36Sopenharmony_ci 163162306a36Sopenharmony_ci if (ipoib_cm_post_receive_srq(dev, i)) { 163262306a36Sopenharmony_ci ipoib_warn(priv, "ipoib_cm_post_receive_srq " 163362306a36Sopenharmony_ci "failed for buf %d\n", i); 163462306a36Sopenharmony_ci ipoib_cm_dev_cleanup(dev); 163562306a36Sopenharmony_ci return -EIO; 163662306a36Sopenharmony_ci } 163762306a36Sopenharmony_ci } 163862306a36Sopenharmony_ci } 163962306a36Sopenharmony_ci 164062306a36Sopenharmony_ci addr = IPOIB_FLAGS_RC; 164162306a36Sopenharmony_ci dev_addr_mod(dev, 0, &addr, 1); 164262306a36Sopenharmony_ci return 0; 164362306a36Sopenharmony_ci} 164462306a36Sopenharmony_ci 164562306a36Sopenharmony_civoid ipoib_cm_dev_cleanup(struct net_device *dev) 164662306a36Sopenharmony_ci{ 164762306a36Sopenharmony_ci struct ipoib_dev_priv *priv = ipoib_priv(dev); 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci if (!priv->cm.srq) 165062306a36Sopenharmony_ci return; 165162306a36Sopenharmony_ci 165262306a36Sopenharmony_ci ipoib_dbg(priv, "Cleanup ipoib connected mode.\n"); 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci ib_destroy_srq(priv->cm.srq); 165562306a36Sopenharmony_ci priv->cm.srq = NULL; 165662306a36Sopenharmony_ci if (!priv->cm.srq_ring) 165762306a36Sopenharmony_ci return; 165862306a36Sopenharmony_ci 165962306a36Sopenharmony_ci ipoib_cm_free_rx_ring(dev, priv->cm.srq_ring); 166062306a36Sopenharmony_ci priv->cm.srq_ring = NULL; 166162306a36Sopenharmony_ci} 1662