1// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) 2/* 3 * Copyright(c) 2020 Intel Corporation. 4 * 5 */ 6 7/* 8 * This file contains HFI1 support for IPOIB SDMA functionality 9 */ 10 11#include <linux/log2.h> 12#include <linux/circ_buf.h> 13 14#include "sdma.h" 15#include "verbs.h" 16#include "trace_ibhdrs.h" 17#include "ipoib.h" 18#include "trace_tx.h" 19 20/* Add a convenience helper */ 21#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1)) 22#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size) 23#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size) 24 25/** 26 * struct ipoib_txreq - IPOIB transmit descriptor 27 * @txreq: sdma transmit request 28 * @sdma_hdr: 9b ib headers 29 * @sdma_status: status returned by sdma engine 30 * @priv: ipoib netdev private data 31 * @txq: txq on which skb was output 32 * @skb: skb to send 33 */ 34struct ipoib_txreq { 35 struct sdma_txreq txreq; 36 struct hfi1_sdma_header sdma_hdr; 37 int sdma_status; 38 struct hfi1_ipoib_dev_priv *priv; 39 struct hfi1_ipoib_txq *txq; 40 struct sk_buff *skb; 41}; 42 43struct ipoib_txparms { 44 struct hfi1_devdata *dd; 45 struct rdma_ah_attr *ah_attr; 46 struct hfi1_ibport *ibp; 47 struct hfi1_ipoib_txq *txq; 48 union hfi1_ipoib_flow flow; 49 u32 dqpn; 50 u8 hdr_dwords; 51 u8 entropy; 52}; 53 54static u64 hfi1_ipoib_txreqs(const u64 sent, const u64 completed) 55{ 56 return sent - completed; 57} 58 59static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq) 60{ 61 return hfi1_ipoib_txreqs(txq->sent_txreqs, 62 atomic64_read(&txq->complete_txreqs)); 63} 64 65static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq) 66{ 67 trace_hfi1_txq_stop(txq); 68 if (atomic_inc_return(&txq->stops) == 1) 69 netif_stop_subqueue(txq->priv->netdev, txq->q_idx); 70} 71 72static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq) 73{ 74 trace_hfi1_txq_wake(txq); 75 if (atomic_dec_and_test(&txq->stops)) 76 netif_wake_subqueue(txq->priv->netdev, txq->q_idx); 77} 78 79static uint hfi1_ipoib_ring_hwat(struct hfi1_ipoib_txq *txq) 80{ 81 return min_t(uint, txq->priv->netdev->tx_queue_len, 82 txq->tx_ring.max_items - 1); 83} 84 85static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq) 86{ 87 return min_t(uint, txq->priv->netdev->tx_queue_len, 88 txq->tx_ring.max_items) >> 1; 89} 90 91static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq) 92{ 93 ++txq->sent_txreqs; 94 if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) && 95 !atomic_xchg(&txq->ring_full, 1)) { 96 trace_hfi1_txq_full(txq); 97 hfi1_ipoib_stop_txq(txq); 98 } 99} 100 101static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq) 102{ 103 struct net_device *dev = txq->priv->netdev; 104 105 /* If shutting down just return as queue state is irrelevant */ 106 if (unlikely(dev->reg_state != NETREG_REGISTERED)) 107 return; 108 109 /* 110 * When the queue has been drained to less than half full it will be 111 * restarted. 112 * The size of the txreq ring is fixed at initialization. 113 * The tx queue len can be adjusted upward while the interface is 114 * running. 115 * The tx queue len can be large enough to overflow the txreq_ring. 116 * Use the minimum of the current tx_queue_len or the rings max txreqs 117 * to protect against ring overflow. 118 */ 119 if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) && 120 atomic_xchg(&txq->ring_full, 0)) { 121 trace_hfi1_txq_xmit_unstopped(txq); 122 hfi1_ipoib_wake_txq(txq); 123 } 124} 125 126static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) 127{ 128 struct hfi1_ipoib_dev_priv *priv = tx->priv; 129 130 if (likely(!tx->sdma_status)) { 131 hfi1_ipoib_update_tx_netstats(priv, 1, tx->skb->len); 132 } else { 133 ++priv->netdev->stats.tx_errors; 134 dd_dev_warn(priv->dd, 135 "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n", 136 __func__, tx->sdma_status, 137 le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx, 138 tx->txq->sde->this_idx); 139 } 140 141 napi_consume_skb(tx->skb, budget); 142 sdma_txclean(priv->dd, &tx->txreq); 143 kmem_cache_free(priv->txreq_cache, tx); 144} 145 146static int hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq, int budget) 147{ 148 struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; 149 unsigned long head; 150 unsigned long tail; 151 unsigned int max_tx; 152 int work_done; 153 int tx_count; 154 155 spin_lock_bh(&tx_ring->consumer_lock); 156 157 /* Read index before reading contents at that index. */ 158 head = smp_load_acquire(&tx_ring->head); 159 tail = tx_ring->tail; 160 max_tx = tx_ring->max_items; 161 162 work_done = min_t(int, CIRC_CNT(head, tail, max_tx), budget); 163 164 for (tx_count = work_done; tx_count; tx_count--) { 165 hfi1_ipoib_free_tx(tx_ring->items[tail], budget); 166 tail = CIRC_NEXT(tail, max_tx); 167 } 168 169 atomic64_add(work_done, &txq->complete_txreqs); 170 171 /* Finished freeing tx items so store the tail value. */ 172 smp_store_release(&tx_ring->tail, tail); 173 174 spin_unlock_bh(&tx_ring->consumer_lock); 175 176 hfi1_ipoib_check_queue_stopped(txq); 177 178 return work_done; 179} 180 181static int hfi1_ipoib_process_tx_ring(struct napi_struct *napi, int budget) 182{ 183 struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(napi->dev); 184 struct hfi1_ipoib_txq *txq = &priv->txqs[napi - priv->tx_napis]; 185 186 int work_done = hfi1_ipoib_drain_tx_ring(txq, budget); 187 188 if (work_done < budget) 189 napi_complete_done(napi, work_done); 190 191 return work_done; 192} 193 194static void hfi1_ipoib_add_tx(struct ipoib_txreq *tx) 195{ 196 struct hfi1_ipoib_circ_buf *tx_ring = &tx->txq->tx_ring; 197 unsigned long head; 198 unsigned long tail; 199 size_t max_tx; 200 201 spin_lock(&tx_ring->producer_lock); 202 203 head = tx_ring->head; 204 tail = READ_ONCE(tx_ring->tail); 205 max_tx = tx_ring->max_items; 206 207 if (likely(CIRC_SPACE(head, tail, max_tx))) { 208 tx_ring->items[head] = tx; 209 210 /* Finish storing txreq before incrementing head. */ 211 smp_store_release(&tx_ring->head, CIRC_ADD(head, 1, max_tx)); 212 napi_schedule(tx->txq->napi); 213 } else { 214 struct hfi1_ipoib_txq *txq = tx->txq; 215 struct hfi1_ipoib_dev_priv *priv = tx->priv; 216 217 /* Ring was full */ 218 hfi1_ipoib_free_tx(tx, 0); 219 atomic64_inc(&txq->complete_txreqs); 220 dd_dev_dbg(priv->dd, "txq %d full.\n", txq->q_idx); 221 } 222 223 spin_unlock(&tx_ring->producer_lock); 224} 225 226static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status) 227{ 228 struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq); 229 230 tx->sdma_status = status; 231 232 hfi1_ipoib_add_tx(tx); 233} 234 235static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, 236 struct ipoib_txparms *txp) 237{ 238 struct hfi1_devdata *dd = txp->dd; 239 struct sdma_txreq *txreq = &tx->txreq; 240 struct sk_buff *skb = tx->skb; 241 int ret = 0; 242 int i; 243 244 if (skb_headlen(skb)) { 245 ret = sdma_txadd_kvaddr(dd, txreq, skb->data, skb_headlen(skb)); 246 if (unlikely(ret)) 247 return ret; 248 } 249 250 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 251 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 252 253 ret = sdma_txadd_page(dd, 254 txreq, 255 skb_frag_page(frag), 256 frag->bv_offset, 257 skb_frag_size(frag), 258 NULL, NULL, NULL); 259 if (unlikely(ret)) 260 break; 261 } 262 263 return ret; 264} 265 266static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx, 267 struct ipoib_txparms *txp) 268{ 269 struct hfi1_devdata *dd = txp->dd; 270 struct sdma_txreq *txreq = &tx->txreq; 271 struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; 272 u16 pkt_bytes = 273 sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len; 274 int ret; 275 276 ret = sdma_txinit(txreq, 0, pkt_bytes, hfi1_ipoib_sdma_complete); 277 if (unlikely(ret)) 278 return ret; 279 280 /* add pbc + headers */ 281 ret = sdma_txadd_kvaddr(dd, 282 txreq, 283 sdma_hdr, 284 sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2)); 285 if (unlikely(ret)) 286 return ret; 287 288 /* add the ulp payload */ 289 return hfi1_ipoib_build_ulp_payload(tx, txp); 290} 291 292static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, 293 struct ipoib_txparms *txp) 294{ 295 struct hfi1_ipoib_dev_priv *priv = tx->priv; 296 struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; 297 struct sk_buff *skb = tx->skb; 298 struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp); 299 struct rdma_ah_attr *ah_attr = txp->ah_attr; 300 struct ib_other_headers *ohdr; 301 struct ib_grh *grh; 302 u16 dwords; 303 u16 slid; 304 u16 dlid; 305 u16 lrh0; 306 u32 bth0; 307 u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 | 308 priv->netdev->dev_addr[2] << 8 | 309 priv->netdev->dev_addr[3]); 310 u16 payload_dwords; 311 u8 pad_cnt; 312 313 pad_cnt = -skb->len & 3; 314 315 /* Includes ICRC */ 316 payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC; 317 318 /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */ 319 txp->hdr_dwords = 7; 320 321 if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { 322 grh = &sdma_hdr->hdr.ibh.u.l.grh; 323 txp->hdr_dwords += 324 hfi1_make_grh(txp->ibp, 325 grh, 326 rdma_ah_read_grh(ah_attr), 327 txp->hdr_dwords - LRH_9B_DWORDS, 328 payload_dwords); 329 lrh0 = HFI1_LRH_GRH; 330 ohdr = &sdma_hdr->hdr.ibh.u.l.oth; 331 } else { 332 lrh0 = HFI1_LRH_BTH; 333 ohdr = &sdma_hdr->hdr.ibh.u.oth; 334 } 335 336 lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4; 337 lrh0 |= (txp->flow.sc5 & 0xf) << 12; 338 339 dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B); 340 if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) { 341 slid = be16_to_cpu(IB_LID_PERMISSIVE); 342 } else { 343 u16 lid = (u16)ppd->lid; 344 345 if (lid) { 346 lid |= rdma_ah_get_path_bits(ah_attr) & 347 ((1 << ppd->lmc) - 1); 348 slid = lid; 349 } else { 350 slid = be16_to_cpu(IB_LID_PERMISSIVE); 351 } 352 } 353 354 /* Includes ICRC */ 355 dwords = txp->hdr_dwords + payload_dwords; 356 357 /* Build the lrh */ 358 sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B; 359 hfi1_make_ib_hdr(&sdma_hdr->hdr.ibh, lrh0, dwords, dlid, slid); 360 361 /* Build the bth */ 362 bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey; 363 364 ohdr->bth[0] = cpu_to_be32(bth0); 365 ohdr->bth[1] = cpu_to_be32(txp->dqpn); 366 ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->sent_txreqs)); 367 368 /* Build the deth */ 369 ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey); 370 ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy << 371 HFI1_IPOIB_ENTROPY_SHIFT) | sqpn); 372 373 /* Construct the pbc. */ 374 sdma_hdr->pbc = 375 cpu_to_le64(create_pbc(ppd, 376 ib_is_sc5(txp->flow.sc5) << 377 PBC_DC_INFO_SHIFT, 378 0, 379 sc_to_vlt(priv->dd, txp->flow.sc5), 380 dwords - SIZE_OF_CRC + 381 (sizeof(sdma_hdr->pbc) >> 2))); 382} 383 384static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev, 385 struct sk_buff *skb, 386 struct ipoib_txparms *txp) 387{ 388 struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); 389 struct ipoib_txreq *tx; 390 int ret; 391 392 tx = kmem_cache_alloc_node(priv->txreq_cache, 393 GFP_ATOMIC, 394 priv->dd->node); 395 if (unlikely(!tx)) 396 return ERR_PTR(-ENOMEM); 397 398 /* so that we can test if the sdma descriptors are there */ 399 tx->txreq.num_desc = 0; 400 tx->priv = priv; 401 tx->txq = txp->txq; 402 tx->skb = skb; 403 INIT_LIST_HEAD(&tx->txreq.list); 404 405 hfi1_ipoib_build_ib_tx_headers(tx, txp); 406 407 ret = hfi1_ipoib_build_tx_desc(tx, txp); 408 if (likely(!ret)) { 409 if (txp->txq->flow.as_int != txp->flow.as_int) { 410 txp->txq->flow.tx_queue = txp->flow.tx_queue; 411 txp->txq->flow.sc5 = txp->flow.sc5; 412 txp->txq->sde = 413 sdma_select_engine_sc(priv->dd, 414 txp->flow.tx_queue, 415 txp->flow.sc5); 416 trace_hfi1_flow_switch(txp->txq); 417 } 418 419 return tx; 420 } 421 422 sdma_txclean(priv->dd, &tx->txreq); 423 kmem_cache_free(priv->txreq_cache, tx); 424 425 return ERR_PTR(ret); 426} 427 428static int hfi1_ipoib_submit_tx_list(struct net_device *dev, 429 struct hfi1_ipoib_txq *txq) 430{ 431 int ret; 432 u16 count_out; 433 434 ret = sdma_send_txlist(txq->sde, 435 iowait_get_ib_work(&txq->wait), 436 &txq->tx_list, 437 &count_out); 438 if (likely(!ret) || ret == -EBUSY || ret == -ECOMM) 439 return ret; 440 441 dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n", ret); 442 443 return ret; 444} 445 446static int hfi1_ipoib_flush_tx_list(struct net_device *dev, 447 struct hfi1_ipoib_txq *txq) 448{ 449 int ret = 0; 450 451 if (!list_empty(&txq->tx_list)) { 452 /* Flush the current list */ 453 ret = hfi1_ipoib_submit_tx_list(dev, txq); 454 455 if (unlikely(ret)) 456 if (ret != -EBUSY) 457 ++dev->stats.tx_carrier_errors; 458 } 459 460 return ret; 461} 462 463static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq, 464 struct ipoib_txreq *tx) 465{ 466 int ret; 467 468 ret = sdma_send_txreq(txq->sde, 469 iowait_get_ib_work(&txq->wait), 470 &tx->txreq, 471 txq->pkts_sent); 472 if (likely(!ret)) { 473 txq->pkts_sent = true; 474 iowait_starve_clear(txq->pkts_sent, &txq->wait); 475 } 476 477 return ret; 478} 479 480static int hfi1_ipoib_send_dma_single(struct net_device *dev, 481 struct sk_buff *skb, 482 struct ipoib_txparms *txp) 483{ 484 struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); 485 struct hfi1_ipoib_txq *txq = txp->txq; 486 struct ipoib_txreq *tx; 487 int ret; 488 489 tx = hfi1_ipoib_send_dma_common(dev, skb, txp); 490 if (IS_ERR(tx)) { 491 int ret = PTR_ERR(tx); 492 493 dev_kfree_skb_any(skb); 494 495 if (ret == -ENOMEM) 496 ++dev->stats.tx_errors; 497 else 498 ++dev->stats.tx_carrier_errors; 499 500 return NETDEV_TX_OK; 501 } 502 503 ret = hfi1_ipoib_submit_tx(txq, tx); 504 if (likely(!ret)) { 505tx_ok: 506 trace_sdma_output_ibhdr(tx->priv->dd, 507 &tx->sdma_hdr.hdr, 508 ib_is_sc5(txp->flow.sc5)); 509 hfi1_ipoib_check_queue_depth(txq); 510 return NETDEV_TX_OK; 511 } 512 513 txq->pkts_sent = false; 514 515 if (ret == -EBUSY || ret == -ECOMM) 516 goto tx_ok; 517 518 sdma_txclean(priv->dd, &tx->txreq); 519 dev_kfree_skb_any(skb); 520 kmem_cache_free(priv->txreq_cache, tx); 521 ++dev->stats.tx_carrier_errors; 522 523 return NETDEV_TX_OK; 524} 525 526static int hfi1_ipoib_send_dma_list(struct net_device *dev, 527 struct sk_buff *skb, 528 struct ipoib_txparms *txp) 529{ 530 struct hfi1_ipoib_txq *txq = txp->txq; 531 struct ipoib_txreq *tx; 532 533 /* Has the flow change ? */ 534 if (txq->flow.as_int != txp->flow.as_int) { 535 int ret; 536 537 trace_hfi1_flow_flush(txq); 538 ret = hfi1_ipoib_flush_tx_list(dev, txq); 539 if (unlikely(ret)) { 540 if (ret == -EBUSY) 541 ++dev->stats.tx_dropped; 542 dev_kfree_skb_any(skb); 543 return NETDEV_TX_OK; 544 } 545 } 546 tx = hfi1_ipoib_send_dma_common(dev, skb, txp); 547 if (IS_ERR(tx)) { 548 int ret = PTR_ERR(tx); 549 550 dev_kfree_skb_any(skb); 551 552 if (ret == -ENOMEM) 553 ++dev->stats.tx_errors; 554 else 555 ++dev->stats.tx_carrier_errors; 556 557 return NETDEV_TX_OK; 558 } 559 560 list_add_tail(&tx->txreq.list, &txq->tx_list); 561 562 hfi1_ipoib_check_queue_depth(txq); 563 564 trace_sdma_output_ibhdr(tx->priv->dd, 565 &tx->sdma_hdr.hdr, 566 ib_is_sc5(txp->flow.sc5)); 567 568 if (!netdev_xmit_more()) 569 (void)hfi1_ipoib_flush_tx_list(dev, txq); 570 571 return NETDEV_TX_OK; 572} 573 574static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb) 575{ 576 if (skb_transport_header_was_set(skb)) { 577 u8 *hdr = (u8 *)skb_transport_header(skb); 578 579 return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]); 580 } 581 582 return (u8)skb_get_queue_mapping(skb); 583} 584 585int hfi1_ipoib_send_dma(struct net_device *dev, 586 struct sk_buff *skb, 587 struct ib_ah *address, 588 u32 dqpn) 589{ 590 struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); 591 struct ipoib_txparms txp; 592 struct rdma_netdev *rn = netdev_priv(dev); 593 594 if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) { 595 dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n", 596 skb->len, 597 rn->mtu + HFI1_IPOIB_ENCAP_LEN); 598 ++dev->stats.tx_dropped; 599 ++dev->stats.tx_errors; 600 dev_kfree_skb_any(skb); 601 return NETDEV_TX_OK; 602 } 603 604 txp.dd = priv->dd; 605 txp.ah_attr = &ibah_to_rvtah(address)->attr; 606 txp.ibp = to_iport(priv->device, priv->port_num); 607 txp.txq = &priv->txqs[skb_get_queue_mapping(skb)]; 608 txp.dqpn = dqpn; 609 txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(txp.ah_attr)]; 610 txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb); 611 txp.entropy = hfi1_ipoib_calc_entropy(skb); 612 613 if (netdev_xmit_more() || !list_empty(&txp.txq->tx_list)) 614 return hfi1_ipoib_send_dma_list(dev, skb, &txp); 615 616 return hfi1_ipoib_send_dma_single(dev, skb, &txp); 617} 618 619/* 620 * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function 621 * 622 * This function gets called from sdma_send_txreq() when there are not enough 623 * sdma descriptors available to send the packet. It adds Tx queue's wait 624 * structure to sdma engine's dmawait list to be woken up when descriptors 625 * become available. 626 */ 627static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde, 628 struct iowait_work *wait, 629 struct sdma_txreq *txreq, 630 uint seq, 631 bool pkts_sent) 632{ 633 struct hfi1_ipoib_txq *txq = 634 container_of(wait->iow, struct hfi1_ipoib_txq, wait); 635 636 write_seqlock(&sde->waitlock); 637 638 if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) { 639 if (sdma_progress(sde, seq, txreq)) { 640 write_sequnlock(&sde->waitlock); 641 return -EAGAIN; 642 } 643 644 if (list_empty(&txreq->list)) 645 /* came from non-list submit */ 646 list_add_tail(&txreq->list, &txq->tx_list); 647 if (list_empty(&txq->wait.list)) { 648 if (!atomic_xchg(&txq->no_desc, 1)) { 649 trace_hfi1_txq_queued(txq); 650 hfi1_ipoib_stop_txq(txq); 651 } 652 iowait_queue(pkts_sent, wait->iow, &sde->dmawait); 653 } 654 655 write_sequnlock(&sde->waitlock); 656 return -EBUSY; 657 } 658 659 write_sequnlock(&sde->waitlock); 660 return -EINVAL; 661} 662 663/* 664 * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function 665 * 666 * This function gets called when SDMA descriptors becomes available and Tx 667 * queue's wait structure was previously added to sdma engine's dmawait list. 668 */ 669static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason) 670{ 671 struct hfi1_ipoib_txq *txq = 672 container_of(wait, struct hfi1_ipoib_txq, wait); 673 674 trace_hfi1_txq_wakeup(txq); 675 if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) 676 iowait_schedule(wait, system_highpri_wq, WORK_CPU_UNBOUND); 677} 678 679static void hfi1_ipoib_flush_txq(struct work_struct *work) 680{ 681 struct iowait_work *ioww = 682 container_of(work, struct iowait_work, iowork); 683 struct iowait *wait = iowait_ioww_to_iow(ioww); 684 struct hfi1_ipoib_txq *txq = 685 container_of(wait, struct hfi1_ipoib_txq, wait); 686 struct net_device *dev = txq->priv->netdev; 687 688 if (likely(dev->reg_state == NETREG_REGISTERED) && 689 likely(!hfi1_ipoib_flush_tx_list(dev, txq))) 690 if (atomic_xchg(&txq->no_desc, 0)) 691 hfi1_ipoib_wake_txq(txq); 692} 693 694int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) 695{ 696 struct net_device *dev = priv->netdev; 697 char buf[HFI1_IPOIB_TXREQ_NAME_LEN]; 698 unsigned long tx_ring_size; 699 int i; 700 701 /* 702 * Ring holds 1 less than tx_ring_size 703 * Round up to next power of 2 in order to hold at least tx_queue_len 704 */ 705 tx_ring_size = roundup_pow_of_two((unsigned long)dev->tx_queue_len + 1); 706 707 snprintf(buf, sizeof(buf), "hfi1_%u_ipoib_txreq_cache", priv->dd->unit); 708 priv->txreq_cache = kmem_cache_create(buf, 709 sizeof(struct ipoib_txreq), 710 0, 711 0, 712 NULL); 713 if (!priv->txreq_cache) 714 return -ENOMEM; 715 716 priv->tx_napis = kcalloc_node(dev->num_tx_queues, 717 sizeof(struct napi_struct), 718 GFP_KERNEL, 719 priv->dd->node); 720 if (!priv->tx_napis) 721 goto free_txreq_cache; 722 723 priv->txqs = kcalloc_node(dev->num_tx_queues, 724 sizeof(struct hfi1_ipoib_txq), 725 GFP_KERNEL, 726 priv->dd->node); 727 if (!priv->txqs) 728 goto free_tx_napis; 729 730 for (i = 0; i < dev->num_tx_queues; i++) { 731 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 732 733 iowait_init(&txq->wait, 734 0, 735 hfi1_ipoib_flush_txq, 736 NULL, 737 hfi1_ipoib_sdma_sleep, 738 hfi1_ipoib_sdma_wakeup, 739 NULL, 740 NULL); 741 txq->priv = priv; 742 txq->sde = NULL; 743 INIT_LIST_HEAD(&txq->tx_list); 744 atomic64_set(&txq->complete_txreqs, 0); 745 atomic_set(&txq->stops, 0); 746 atomic_set(&txq->ring_full, 0); 747 atomic_set(&txq->no_desc, 0); 748 txq->q_idx = i; 749 txq->flow.tx_queue = 0xff; 750 txq->flow.sc5 = 0xff; 751 txq->pkts_sent = false; 752 753 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), 754 priv->dd->node); 755 756 txq->tx_ring.items = 757 kcalloc_node(tx_ring_size, 758 sizeof(struct ipoib_txreq *), 759 GFP_KERNEL, priv->dd->node); 760 if (!txq->tx_ring.items) 761 goto free_txqs; 762 763 spin_lock_init(&txq->tx_ring.producer_lock); 764 spin_lock_init(&txq->tx_ring.consumer_lock); 765 txq->tx_ring.max_items = tx_ring_size; 766 767 txq->napi = &priv->tx_napis[i]; 768 netif_tx_napi_add(dev, txq->napi, 769 hfi1_ipoib_process_tx_ring, 770 NAPI_POLL_WEIGHT); 771 } 772 773 return 0; 774 775free_txqs: 776 for (i--; i >= 0; i--) { 777 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 778 779 netif_napi_del(txq->napi); 780 kfree(txq->tx_ring.items); 781 } 782 783 kfree(priv->txqs); 784 priv->txqs = NULL; 785 786free_tx_napis: 787 kfree(priv->tx_napis); 788 priv->tx_napis = NULL; 789 790free_txreq_cache: 791 kmem_cache_destroy(priv->txreq_cache); 792 priv->txreq_cache = NULL; 793 return -ENOMEM; 794} 795 796static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) 797{ 798 struct sdma_txreq *txreq; 799 struct sdma_txreq *txreq_tmp; 800 atomic64_t *complete_txreqs = &txq->complete_txreqs; 801 802 list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) { 803 struct ipoib_txreq *tx = 804 container_of(txreq, struct ipoib_txreq, txreq); 805 806 list_del(&txreq->list); 807 sdma_txclean(txq->priv->dd, &tx->txreq); 808 dev_kfree_skb_any(tx->skb); 809 kmem_cache_free(txq->priv->txreq_cache, tx); 810 atomic64_inc(complete_txreqs); 811 } 812 813 if (hfi1_ipoib_used(txq)) 814 dd_dev_warn(txq->priv->dd, 815 "txq %d not empty found %llu requests\n", 816 txq->q_idx, 817 hfi1_ipoib_txreqs(txq->sent_txreqs, 818 atomic64_read(complete_txreqs))); 819} 820 821void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) 822{ 823 int i; 824 825 for (i = 0; i < priv->netdev->num_tx_queues; i++) { 826 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 827 828 iowait_cancel_work(&txq->wait); 829 iowait_sdma_drain(&txq->wait); 830 hfi1_ipoib_drain_tx_list(txq); 831 netif_napi_del(txq->napi); 832 (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); 833 kfree(txq->tx_ring.items); 834 } 835 836 kfree(priv->txqs); 837 priv->txqs = NULL; 838 839 kfree(priv->tx_napis); 840 priv->tx_napis = NULL; 841 842 kmem_cache_destroy(priv->txreq_cache); 843 priv->txreq_cache = NULL; 844} 845 846void hfi1_ipoib_napi_tx_enable(struct net_device *dev) 847{ 848 struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); 849 int i; 850 851 for (i = 0; i < dev->num_tx_queues; i++) { 852 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 853 854 napi_enable(txq->napi); 855 } 856} 857 858void hfi1_ipoib_napi_tx_disable(struct net_device *dev) 859{ 860 struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); 861 int i; 862 863 for (i = 0; i < dev->num_tx_queues; i++) { 864 struct hfi1_ipoib_txq *txq = &priv->txqs[i]; 865 866 napi_disable(txq->napi); 867 (void)hfi1_ipoib_drain_tx_ring(txq, txq->tx_ring.max_items); 868 } 869} 870