1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2/* 3 * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved. 4 */ 5 6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 8#ifdef CONFIG_RFS_ACCEL 9#include <linux/cpu_rmap.h> 10#endif /* CONFIG_RFS_ACCEL */ 11#include <linux/ethtool.h> 12#include <linux/kernel.h> 13#include <linux/module.h> 14#include <linux/numa.h> 15#include <linux/pci.h> 16#include <linux/utsname.h> 17#include <linux/version.h> 18#include <linux/vmalloc.h> 19#include <net/ip.h> 20 21#include "ena_netdev.h" 22#include <linux/bpf_trace.h> 23#include "ena_pci_id_tbl.h" 24 25MODULE_AUTHOR("Amazon.com, Inc. or its affiliates"); 26MODULE_DESCRIPTION(DEVICE_NAME); 27MODULE_LICENSE("GPL"); 28 29/* Time in jiffies before concluding the transmitter is hung. */ 30#define TX_TIMEOUT (5 * HZ) 31 32#define ENA_NAPI_BUDGET 64 33 34#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \ 35 NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR) 36static int debug = -1; 37module_param(debug, int, 0); 38MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 39 40static struct ena_aenq_handlers aenq_handlers; 41 42static struct workqueue_struct *ena_wq; 43 44MODULE_DEVICE_TABLE(pci, ena_pci_tbl); 45 46static int ena_rss_init_default(struct ena_adapter *adapter); 47static void check_for_admin_com_state(struct ena_adapter *adapter); 48static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); 49static int ena_restore_device(struct ena_adapter *adapter); 50 51static void ena_init_io_rings(struct ena_adapter *adapter, 52 int first_index, int count); 53static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index, 54 int count); 55static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index, 56 int count); 57static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid); 58static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, 59 int first_index, 60 int count); 61static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid); 62static void ena_free_tx_resources(struct ena_adapter *adapter, int qid); 63static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget); 64static void ena_destroy_all_tx_queues(struct ena_adapter *adapter); 65static void ena_free_all_io_tx_resources(struct ena_adapter *adapter); 66static void ena_napi_disable_in_range(struct ena_adapter *adapter, 67 int first_index, int count); 68static void ena_napi_enable_in_range(struct ena_adapter *adapter, 69 int first_index, int count); 70static int ena_up(struct ena_adapter *adapter); 71static void ena_down(struct ena_adapter *adapter); 72static void ena_unmask_interrupt(struct ena_ring *tx_ring, 73 struct ena_ring *rx_ring); 74static void ena_update_ring_numa_node(struct ena_ring *tx_ring, 75 struct ena_ring *rx_ring); 76static void ena_unmap_tx_buff(struct ena_ring *tx_ring, 77 struct ena_tx_buffer *tx_info); 78static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, 79 int first_index, int count); 80static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, 81 int first_index, int count); 82 83static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) 84{ 85 struct ena_adapter *adapter = netdev_priv(dev); 86 87 /* Change the state of the device to trigger reset 88 * Check that we are not in the middle or a trigger already 89 */ 90 91 if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) 92 return; 93 94 adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD; 95 u64_stats_update_begin(&adapter->syncp); 96 adapter->dev_stats.tx_timeout++; 97 u64_stats_update_end(&adapter->syncp); 98 99 netif_err(adapter, tx_err, dev, "Transmit time out\n"); 100} 101 102static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) 103{ 104 int i; 105 106 for (i = 0; i < adapter->num_io_queues; i++) 107 adapter->rx_ring[i].mtu = mtu; 108} 109 110static int ena_change_mtu(struct net_device *dev, int new_mtu) 111{ 112 struct ena_adapter *adapter = netdev_priv(dev); 113 int ret; 114 115 ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); 116 if (!ret) { 117 netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu); 118 update_rx_ring_mtu(adapter, new_mtu); 119 dev->mtu = new_mtu; 120 } else { 121 netif_err(adapter, drv, dev, "Failed to set MTU to %d\n", 122 new_mtu); 123 } 124 125 return ret; 126} 127 128static int ena_xmit_common(struct net_device *dev, 129 struct ena_ring *ring, 130 struct ena_tx_buffer *tx_info, 131 struct ena_com_tx_ctx *ena_tx_ctx, 132 u16 next_to_use, 133 u32 bytes) 134{ 135 struct ena_adapter *adapter = netdev_priv(dev); 136 int rc, nb_hw_desc; 137 138 if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq, 139 ena_tx_ctx))) { 140 netif_dbg(adapter, tx_queued, dev, 141 "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", 142 ring->qid); 143 ena_com_write_sq_doorbell(ring->ena_com_io_sq); 144 } 145 146 /* prepare the packet's descriptors to dma engine */ 147 rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx, 148 &nb_hw_desc); 149 150 /* In case there isn't enough space in the queue for the packet, 151 * we simply drop it. All other failure reasons of 152 * ena_com_prepare_tx() are fatal and therefore require a device reset. 153 */ 154 if (unlikely(rc)) { 155 netif_err(adapter, tx_queued, dev, 156 "Failed to prepare tx bufs\n"); 157 u64_stats_update_begin(&ring->syncp); 158 ring->tx_stats.prepare_ctx_err++; 159 u64_stats_update_end(&ring->syncp); 160 if (rc != -ENOMEM) { 161 adapter->reset_reason = 162 ENA_REGS_RESET_DRIVER_INVALID_STATE; 163 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 164 } 165 return rc; 166 } 167 168 u64_stats_update_begin(&ring->syncp); 169 ring->tx_stats.cnt++; 170 ring->tx_stats.bytes += bytes; 171 u64_stats_update_end(&ring->syncp); 172 173 tx_info->tx_descs = nb_hw_desc; 174 tx_info->last_jiffies = jiffies; 175 tx_info->print_once = 0; 176 177 ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, 178 ring->ring_size); 179 return 0; 180} 181 182/* This is the XDP napi callback. XDP queues use a separate napi callback 183 * than Rx/Tx queues. 184 */ 185static int ena_xdp_io_poll(struct napi_struct *napi, int budget) 186{ 187 struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); 188 u32 xdp_work_done, xdp_budget; 189 struct ena_ring *xdp_ring; 190 int napi_comp_call = 0; 191 int ret; 192 193 xdp_ring = ena_napi->xdp_ring; 194 xdp_ring->first_interrupt = ena_napi->first_interrupt; 195 196 xdp_budget = budget; 197 198 if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) || 199 test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) { 200 napi_complete_done(napi, 0); 201 return 0; 202 } 203 204 xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget); 205 206 /* If the device is about to reset or down, avoid unmask 207 * the interrupt and return 0 so NAPI won't reschedule 208 */ 209 if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) { 210 napi_complete_done(napi, 0); 211 ret = 0; 212 } else if (xdp_budget > xdp_work_done) { 213 napi_comp_call = 1; 214 if (napi_complete_done(napi, xdp_work_done)) 215 ena_unmask_interrupt(xdp_ring, NULL); 216 ena_update_ring_numa_node(xdp_ring, NULL); 217 ret = xdp_work_done; 218 } else { 219 ret = xdp_budget; 220 } 221 222 u64_stats_update_begin(&xdp_ring->syncp); 223 xdp_ring->tx_stats.napi_comp += napi_comp_call; 224 xdp_ring->tx_stats.tx_poll++; 225 u64_stats_update_end(&xdp_ring->syncp); 226 227 return ret; 228} 229 230static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring, 231 struct ena_tx_buffer *tx_info, 232 struct xdp_buff *xdp, 233 void **push_hdr, 234 u32 *push_len) 235{ 236 struct ena_adapter *adapter = xdp_ring->adapter; 237 struct ena_com_buf *ena_buf; 238 dma_addr_t dma = 0; 239 u32 size; 240 241 tx_info->xdpf = xdp_convert_buff_to_frame(xdp); 242 size = tx_info->xdpf->len; 243 ena_buf = tx_info->bufs; 244 245 /* llq push buffer */ 246 *push_len = min_t(u32, size, xdp_ring->tx_max_header_size); 247 *push_hdr = tx_info->xdpf->data; 248 249 if (size - *push_len > 0) { 250 dma = dma_map_single(xdp_ring->dev, 251 *push_hdr + *push_len, 252 size - *push_len, 253 DMA_TO_DEVICE); 254 if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) 255 goto error_report_dma_error; 256 257 tx_info->map_linear_data = 1; 258 tx_info->num_of_bufs = 1; 259 } 260 261 ena_buf->paddr = dma; 262 ena_buf->len = size; 263 264 return 0; 265 266error_report_dma_error: 267 u64_stats_update_begin(&xdp_ring->syncp); 268 xdp_ring->tx_stats.dma_mapping_err++; 269 u64_stats_update_end(&xdp_ring->syncp); 270 netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n"); 271 272 xdp_return_frame_rx_napi(tx_info->xdpf); 273 tx_info->xdpf = NULL; 274 tx_info->num_of_bufs = 0; 275 276 return -EINVAL; 277} 278 279static int ena_xdp_xmit_buff(struct net_device *dev, 280 struct xdp_buff *xdp, 281 int qid, 282 struct ena_rx_buffer *rx_info) 283{ 284 struct ena_adapter *adapter = netdev_priv(dev); 285 struct ena_com_tx_ctx ena_tx_ctx = {}; 286 struct ena_tx_buffer *tx_info; 287 struct ena_ring *xdp_ring; 288 u16 next_to_use, req_id; 289 int rc; 290 void *push_hdr; 291 u32 push_len; 292 293 xdp_ring = &adapter->tx_ring[qid]; 294 next_to_use = xdp_ring->next_to_use; 295 req_id = xdp_ring->free_ids[next_to_use]; 296 tx_info = &xdp_ring->tx_buffer_info[req_id]; 297 tx_info->num_of_bufs = 0; 298 page_ref_inc(rx_info->page); 299 tx_info->xdp_rx_page = rx_info->page; 300 301 rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len); 302 if (unlikely(rc)) 303 goto error_drop_packet; 304 305 ena_tx_ctx.ena_bufs = tx_info->bufs; 306 ena_tx_ctx.push_header = push_hdr; 307 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 308 ena_tx_ctx.req_id = req_id; 309 ena_tx_ctx.header_len = push_len; 310 311 rc = ena_xmit_common(dev, 312 xdp_ring, 313 tx_info, 314 &ena_tx_ctx, 315 next_to_use, 316 xdp->data_end - xdp->data); 317 if (rc) 318 goto error_unmap_dma; 319 /* trigger the dma engine. ena_com_write_sq_doorbell() 320 * has a mb 321 */ 322 ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq); 323 u64_stats_update_begin(&xdp_ring->syncp); 324 xdp_ring->tx_stats.doorbells++; 325 u64_stats_update_end(&xdp_ring->syncp); 326 327 return NETDEV_TX_OK; 328 329error_unmap_dma: 330 ena_unmap_tx_buff(xdp_ring, tx_info); 331 tx_info->xdpf = NULL; 332error_drop_packet: 333 __free_page(tx_info->xdp_rx_page); 334 return NETDEV_TX_OK; 335} 336 337static int ena_xdp_execute(struct ena_ring *rx_ring, 338 struct xdp_buff *xdp, 339 struct ena_rx_buffer *rx_info) 340{ 341 struct bpf_prog *xdp_prog; 342 u32 verdict = XDP_PASS; 343 u64 *xdp_stat; 344 345 rcu_read_lock(); 346 xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); 347 348 if (!xdp_prog) 349 goto out; 350 351 verdict = bpf_prog_run_xdp(xdp_prog, xdp); 352 353 if (verdict == XDP_TX) { 354 ena_xdp_xmit_buff(rx_ring->netdev, 355 xdp, 356 rx_ring->qid + rx_ring->adapter->num_io_queues, 357 rx_info); 358 359 xdp_stat = &rx_ring->rx_stats.xdp_tx; 360 } else if (unlikely(verdict == XDP_ABORTED)) { 361 trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); 362 xdp_stat = &rx_ring->rx_stats.xdp_aborted; 363 } else if (unlikely(verdict == XDP_DROP)) { 364 xdp_stat = &rx_ring->rx_stats.xdp_drop; 365 } else if (unlikely(verdict == XDP_PASS)) { 366 xdp_stat = &rx_ring->rx_stats.xdp_pass; 367 } else { 368 bpf_warn_invalid_xdp_action(verdict); 369 xdp_stat = &rx_ring->rx_stats.xdp_invalid; 370 } 371 372 u64_stats_update_begin(&rx_ring->syncp); 373 (*xdp_stat)++; 374 u64_stats_update_end(&rx_ring->syncp); 375out: 376 rcu_read_unlock(); 377 378 return verdict; 379} 380 381static void ena_init_all_xdp_queues(struct ena_adapter *adapter) 382{ 383 adapter->xdp_first_ring = adapter->num_io_queues; 384 adapter->xdp_num_queues = adapter->num_io_queues; 385 386 ena_init_io_rings(adapter, 387 adapter->xdp_first_ring, 388 adapter->xdp_num_queues); 389} 390 391static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) 392{ 393 u32 xdp_first_ring = adapter->xdp_first_ring; 394 u32 xdp_num_queues = adapter->xdp_num_queues; 395 int rc = 0; 396 397 rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); 398 if (rc) 399 goto setup_err; 400 401 rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues); 402 if (rc) 403 goto create_err; 404 405 return 0; 406 407create_err: 408 ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); 409setup_err: 410 return rc; 411} 412 413/* Provides a way for both kernel and bpf-prog to know 414 * more about the RX-queue a given XDP frame arrived on. 415 */ 416static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring) 417{ 418 int rc; 419 420 rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid); 421 422 if (rc) { 423 netif_err(rx_ring->adapter, ifup, rx_ring->netdev, 424 "Failed to register xdp rx queue info. RX queue num %d rc: %d\n", 425 rx_ring->qid, rc); 426 goto err; 427 } 428 429 rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, 430 NULL); 431 432 if (rc) { 433 netif_err(rx_ring->adapter, ifup, rx_ring->netdev, 434 "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n", 435 rx_ring->qid, rc); 436 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 437 } 438 439err: 440 return rc; 441} 442 443static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring) 444{ 445 xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq); 446 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 447} 448 449static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, 450 struct bpf_prog *prog, 451 int first, int count) 452{ 453 struct ena_ring *rx_ring; 454 int i = 0; 455 456 for (i = first; i < count; i++) { 457 rx_ring = &adapter->rx_ring[i]; 458 xchg(&rx_ring->xdp_bpf_prog, prog); 459 if (prog) { 460 ena_xdp_register_rxq_info(rx_ring); 461 rx_ring->rx_headroom = XDP_PACKET_HEADROOM; 462 } else { 463 ena_xdp_unregister_rxq_info(rx_ring); 464 rx_ring->rx_headroom = 0; 465 } 466 } 467} 468 469static void ena_xdp_exchange_program(struct ena_adapter *adapter, 470 struct bpf_prog *prog) 471{ 472 struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); 473 474 ena_xdp_exchange_program_rx_in_range(adapter, 475 prog, 476 0, 477 adapter->num_io_queues); 478 479 if (old_bpf_prog) 480 bpf_prog_put(old_bpf_prog); 481} 482 483static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter) 484{ 485 bool was_up; 486 int rc; 487 488 was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 489 490 if (was_up) 491 ena_down(adapter); 492 493 adapter->xdp_first_ring = 0; 494 adapter->xdp_num_queues = 0; 495 ena_xdp_exchange_program(adapter, NULL); 496 if (was_up) { 497 rc = ena_up(adapter); 498 if (rc) 499 return rc; 500 } 501 return 0; 502} 503 504static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) 505{ 506 struct ena_adapter *adapter = netdev_priv(netdev); 507 struct bpf_prog *prog = bpf->prog; 508 struct bpf_prog *old_bpf_prog; 509 int rc, prev_mtu; 510 bool is_up; 511 512 is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 513 rc = ena_xdp_allowed(adapter); 514 if (rc == ENA_XDP_ALLOWED) { 515 old_bpf_prog = adapter->xdp_bpf_prog; 516 if (prog) { 517 if (!is_up) { 518 ena_init_all_xdp_queues(adapter); 519 } else if (!old_bpf_prog) { 520 ena_down(adapter); 521 ena_init_all_xdp_queues(adapter); 522 } 523 ena_xdp_exchange_program(adapter, prog); 524 525 if (is_up && !old_bpf_prog) { 526 rc = ena_up(adapter); 527 if (rc) 528 return rc; 529 } 530 } else if (old_bpf_prog) { 531 rc = ena_destroy_and_free_all_xdp_queues(adapter); 532 if (rc) 533 return rc; 534 } 535 536 prev_mtu = netdev->max_mtu; 537 netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu; 538 539 if (!old_bpf_prog) 540 netif_info(adapter, drv, adapter->netdev, 541 "XDP program is set, changing the max_mtu from %d to %d", 542 prev_mtu, netdev->max_mtu); 543 544 } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) { 545 netif_err(adapter, drv, adapter->netdev, 546 "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on", 547 netdev->mtu, ENA_XDP_MAX_MTU); 548 NL_SET_ERR_MSG_MOD(bpf->extack, 549 "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info"); 550 return -EINVAL; 551 } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) { 552 netif_err(adapter, drv, adapter->netdev, 553 "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n", 554 adapter->num_io_queues, adapter->max_num_io_queues); 555 NL_SET_ERR_MSG_MOD(bpf->extack, 556 "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info"); 557 return -EINVAL; 558 } 559 560 return 0; 561} 562 563/* This is the main xdp callback, it's used by the kernel to set/unset the xdp 564 * program as well as to query the current xdp program id. 565 */ 566static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) 567{ 568 switch (bpf->command) { 569 case XDP_SETUP_PROG: 570 return ena_xdp_set(netdev, bpf); 571 default: 572 return -EINVAL; 573 } 574 return 0; 575} 576 577static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) 578{ 579#ifdef CONFIG_RFS_ACCEL 580 u32 i; 581 int rc; 582 583 adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues); 584 if (!adapter->netdev->rx_cpu_rmap) 585 return -ENOMEM; 586 for (i = 0; i < adapter->num_io_queues; i++) { 587 int irq_idx = ENA_IO_IRQ_IDX(i); 588 589 rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, 590 pci_irq_vector(adapter->pdev, irq_idx)); 591 if (rc) { 592 free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); 593 adapter->netdev->rx_cpu_rmap = NULL; 594 return rc; 595 } 596 } 597#endif /* CONFIG_RFS_ACCEL */ 598 return 0; 599} 600 601static void ena_init_io_rings_common(struct ena_adapter *adapter, 602 struct ena_ring *ring, u16 qid) 603{ 604 ring->qid = qid; 605 ring->pdev = adapter->pdev; 606 ring->dev = &adapter->pdev->dev; 607 ring->netdev = adapter->netdev; 608 ring->napi = &adapter->ena_napi[qid].napi; 609 ring->adapter = adapter; 610 ring->ena_dev = adapter->ena_dev; 611 ring->per_napi_packets = 0; 612 ring->cpu = 0; 613 ring->first_interrupt = false; 614 ring->no_interrupt_event_cnt = 0; 615 u64_stats_init(&ring->syncp); 616} 617 618static void ena_init_io_rings(struct ena_adapter *adapter, 619 int first_index, int count) 620{ 621 struct ena_com_dev *ena_dev; 622 struct ena_ring *txr, *rxr; 623 int i; 624 625 ena_dev = adapter->ena_dev; 626 627 for (i = first_index; i < first_index + count; i++) { 628 txr = &adapter->tx_ring[i]; 629 rxr = &adapter->rx_ring[i]; 630 631 /* TX common ring state */ 632 ena_init_io_rings_common(adapter, txr, i); 633 634 /* TX specific ring state */ 635 txr->ring_size = adapter->requested_tx_ring_size; 636 txr->tx_max_header_size = ena_dev->tx_max_header_size; 637 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; 638 txr->sgl_size = adapter->max_tx_sgl_size; 639 txr->smoothed_interval = 640 ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); 641 txr->disable_meta_caching = adapter->disable_meta_caching; 642 643 /* Don't init RX queues for xdp queues */ 644 if (!ENA_IS_XDP_INDEX(adapter, i)) { 645 /* RX common ring state */ 646 ena_init_io_rings_common(adapter, rxr, i); 647 648 /* RX specific ring state */ 649 rxr->ring_size = adapter->requested_rx_ring_size; 650 rxr->rx_copybreak = adapter->rx_copybreak; 651 rxr->sgl_size = adapter->max_rx_sgl_size; 652 rxr->smoothed_interval = 653 ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); 654 rxr->empty_rx_queue = 0; 655 adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; 656 } 657 } 658} 659 660/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors) 661 * @adapter: network interface device structure 662 * @qid: queue index 663 * 664 * Return 0 on success, negative on failure 665 */ 666static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) 667{ 668 struct ena_ring *tx_ring = &adapter->tx_ring[qid]; 669 struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; 670 int size, i, node; 671 672 if (tx_ring->tx_buffer_info) { 673 netif_err(adapter, ifup, 674 adapter->netdev, "tx_buffer_info info is not NULL"); 675 return -EEXIST; 676 } 677 678 size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size; 679 node = cpu_to_node(ena_irq->cpu); 680 681 tx_ring->tx_buffer_info = vzalloc_node(size, node); 682 if (!tx_ring->tx_buffer_info) { 683 tx_ring->tx_buffer_info = vzalloc(size); 684 if (!tx_ring->tx_buffer_info) 685 goto err_tx_buffer_info; 686 } 687 688 size = sizeof(u16) * tx_ring->ring_size; 689 tx_ring->free_ids = vzalloc_node(size, node); 690 if (!tx_ring->free_ids) { 691 tx_ring->free_ids = vzalloc(size); 692 if (!tx_ring->free_ids) 693 goto err_tx_free_ids; 694 } 695 696 size = tx_ring->tx_max_header_size; 697 tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node); 698 if (!tx_ring->push_buf_intermediate_buf) { 699 tx_ring->push_buf_intermediate_buf = vzalloc(size); 700 if (!tx_ring->push_buf_intermediate_buf) 701 goto err_push_buf_intermediate_buf; 702 } 703 704 /* Req id ring for TX out of order completions */ 705 for (i = 0; i < tx_ring->ring_size; i++) 706 tx_ring->free_ids[i] = i; 707 708 /* Reset tx statistics */ 709 memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats)); 710 711 tx_ring->next_to_use = 0; 712 tx_ring->next_to_clean = 0; 713 tx_ring->cpu = ena_irq->cpu; 714 return 0; 715 716err_push_buf_intermediate_buf: 717 vfree(tx_ring->free_ids); 718 tx_ring->free_ids = NULL; 719err_tx_free_ids: 720 vfree(tx_ring->tx_buffer_info); 721 tx_ring->tx_buffer_info = NULL; 722err_tx_buffer_info: 723 return -ENOMEM; 724} 725 726/* ena_free_tx_resources - Free I/O Tx Resources per Queue 727 * @adapter: network interface device structure 728 * @qid: queue index 729 * 730 * Free all transmit software resources 731 */ 732static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) 733{ 734 struct ena_ring *tx_ring = &adapter->tx_ring[qid]; 735 736 vfree(tx_ring->tx_buffer_info); 737 tx_ring->tx_buffer_info = NULL; 738 739 vfree(tx_ring->free_ids); 740 tx_ring->free_ids = NULL; 741 742 vfree(tx_ring->push_buf_intermediate_buf); 743 tx_ring->push_buf_intermediate_buf = NULL; 744} 745 746static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, 747 int first_index, 748 int count) 749{ 750 int i, rc = 0; 751 752 for (i = first_index; i < first_index + count; i++) { 753 rc = ena_setup_tx_resources(adapter, i); 754 if (rc) 755 goto err_setup_tx; 756 } 757 758 return 0; 759 760err_setup_tx: 761 762 netif_err(adapter, ifup, adapter->netdev, 763 "Tx queue %d: allocation failed\n", i); 764 765 /* rewind the index freeing the rings as we go */ 766 while (first_index < i--) 767 ena_free_tx_resources(adapter, i); 768 return rc; 769} 770 771static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, 772 int first_index, int count) 773{ 774 int i; 775 776 for (i = first_index; i < first_index + count; i++) 777 ena_free_tx_resources(adapter, i); 778} 779 780/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues 781 * @adapter: board private structure 782 * 783 * Free all transmit software resources 784 */ 785static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) 786{ 787 ena_free_all_io_tx_resources_in_range(adapter, 788 0, 789 adapter->xdp_num_queues + 790 adapter->num_io_queues); 791} 792 793/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors) 794 * @adapter: network interface device structure 795 * @qid: queue index 796 * 797 * Returns 0 on success, negative on failure 798 */ 799static int ena_setup_rx_resources(struct ena_adapter *adapter, 800 u32 qid) 801{ 802 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 803 struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; 804 int size, node, i; 805 806 if (rx_ring->rx_buffer_info) { 807 netif_err(adapter, ifup, adapter->netdev, 808 "rx_buffer_info is not NULL"); 809 return -EEXIST; 810 } 811 812 /* alloc extra element so in rx path 813 * we can always prefetch rx_info + 1 814 */ 815 size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1); 816 node = cpu_to_node(ena_irq->cpu); 817 818 rx_ring->rx_buffer_info = vzalloc_node(size, node); 819 if (!rx_ring->rx_buffer_info) { 820 rx_ring->rx_buffer_info = vzalloc(size); 821 if (!rx_ring->rx_buffer_info) 822 return -ENOMEM; 823 } 824 825 size = sizeof(u16) * rx_ring->ring_size; 826 rx_ring->free_ids = vzalloc_node(size, node); 827 if (!rx_ring->free_ids) { 828 rx_ring->free_ids = vzalloc(size); 829 if (!rx_ring->free_ids) { 830 vfree(rx_ring->rx_buffer_info); 831 rx_ring->rx_buffer_info = NULL; 832 return -ENOMEM; 833 } 834 } 835 836 /* Req id ring for receiving RX pkts out of order */ 837 for (i = 0; i < rx_ring->ring_size; i++) 838 rx_ring->free_ids[i] = i; 839 840 /* Reset rx statistics */ 841 memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats)); 842 843 rx_ring->next_to_clean = 0; 844 rx_ring->next_to_use = 0; 845 rx_ring->cpu = ena_irq->cpu; 846 847 return 0; 848} 849 850/* ena_free_rx_resources - Free I/O Rx Resources 851 * @adapter: network interface device structure 852 * @qid: queue index 853 * 854 * Free all receive software resources 855 */ 856static void ena_free_rx_resources(struct ena_adapter *adapter, 857 u32 qid) 858{ 859 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 860 861 vfree(rx_ring->rx_buffer_info); 862 rx_ring->rx_buffer_info = NULL; 863 864 vfree(rx_ring->free_ids); 865 rx_ring->free_ids = NULL; 866} 867 868/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues 869 * @adapter: board private structure 870 * 871 * Return 0 on success, negative on failure 872 */ 873static int ena_setup_all_rx_resources(struct ena_adapter *adapter) 874{ 875 int i, rc = 0; 876 877 for (i = 0; i < adapter->num_io_queues; i++) { 878 rc = ena_setup_rx_resources(adapter, i); 879 if (rc) 880 goto err_setup_rx; 881 } 882 883 return 0; 884 885err_setup_rx: 886 887 netif_err(adapter, ifup, adapter->netdev, 888 "Rx queue %d: allocation failed\n", i); 889 890 /* rewind the index freeing the rings as we go */ 891 while (i--) 892 ena_free_rx_resources(adapter, i); 893 return rc; 894} 895 896/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues 897 * @adapter: board private structure 898 * 899 * Free all receive software resources 900 */ 901static void ena_free_all_io_rx_resources(struct ena_adapter *adapter) 902{ 903 int i; 904 905 for (i = 0; i < adapter->num_io_queues; i++) 906 ena_free_rx_resources(adapter, i); 907} 908 909static int ena_alloc_rx_page(struct ena_ring *rx_ring, 910 struct ena_rx_buffer *rx_info, gfp_t gfp) 911{ 912 int headroom = rx_ring->rx_headroom; 913 struct ena_com_buf *ena_buf; 914 struct page *page; 915 dma_addr_t dma; 916 917 /* restore page offset value in case it has been changed by device */ 918 rx_info->page_offset = headroom; 919 920 /* if previous allocated page is not used */ 921 if (unlikely(rx_info->page)) 922 return 0; 923 924 page = alloc_page(gfp); 925 if (unlikely(!page)) { 926 u64_stats_update_begin(&rx_ring->syncp); 927 rx_ring->rx_stats.page_alloc_fail++; 928 u64_stats_update_end(&rx_ring->syncp); 929 return -ENOMEM; 930 } 931 932 /* To enable NIC-side port-mirroring, AKA SPAN port, 933 * we make the buffer readable from the nic as well 934 */ 935 dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE, 936 DMA_BIDIRECTIONAL); 937 if (unlikely(dma_mapping_error(rx_ring->dev, dma))) { 938 u64_stats_update_begin(&rx_ring->syncp); 939 rx_ring->rx_stats.dma_mapping_err++; 940 u64_stats_update_end(&rx_ring->syncp); 941 942 __free_page(page); 943 return -EIO; 944 } 945 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 946 "Allocate page %p, rx_info %p\n", page, rx_info); 947 948 rx_info->page = page; 949 ena_buf = &rx_info->ena_buf; 950 ena_buf->paddr = dma + headroom; 951 ena_buf->len = ENA_PAGE_SIZE - headroom; 952 953 return 0; 954} 955 956static void ena_free_rx_page(struct ena_ring *rx_ring, 957 struct ena_rx_buffer *rx_info) 958{ 959 struct page *page = rx_info->page; 960 struct ena_com_buf *ena_buf = &rx_info->ena_buf; 961 962 if (unlikely(!page)) { 963 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, 964 "Trying to free unallocated buffer\n"); 965 return; 966 } 967 968 dma_unmap_page(rx_ring->dev, ena_buf->paddr - rx_ring->rx_headroom, 969 ENA_PAGE_SIZE, 970 DMA_BIDIRECTIONAL); 971 972 __free_page(page); 973 rx_info->page = NULL; 974} 975 976static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) 977{ 978 u16 next_to_use, req_id; 979 u32 i; 980 int rc; 981 982 next_to_use = rx_ring->next_to_use; 983 984 for (i = 0; i < num; i++) { 985 struct ena_rx_buffer *rx_info; 986 987 req_id = rx_ring->free_ids[next_to_use]; 988 989 rx_info = &rx_ring->rx_buffer_info[req_id]; 990 991 rc = ena_alloc_rx_page(rx_ring, rx_info, 992 GFP_ATOMIC | __GFP_COMP); 993 if (unlikely(rc < 0)) { 994 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, 995 "Failed to allocate buffer for rx queue %d\n", 996 rx_ring->qid); 997 break; 998 } 999 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq, 1000 &rx_info->ena_buf, 1001 req_id); 1002 if (unlikely(rc)) { 1003 netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, 1004 "Failed to add buffer for rx queue %d\n", 1005 rx_ring->qid); 1006 break; 1007 } 1008 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use, 1009 rx_ring->ring_size); 1010 } 1011 1012 if (unlikely(i < num)) { 1013 u64_stats_update_begin(&rx_ring->syncp); 1014 rx_ring->rx_stats.refil_partial++; 1015 u64_stats_update_end(&rx_ring->syncp); 1016 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, 1017 "Refilled rx qid %d with only %d buffers (from %d)\n", 1018 rx_ring->qid, i, num); 1019 } 1020 1021 /* ena_com_write_sq_doorbell issues a wmb() */ 1022 if (likely(i)) 1023 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); 1024 1025 rx_ring->next_to_use = next_to_use; 1026 1027 return i; 1028} 1029 1030static void ena_free_rx_bufs(struct ena_adapter *adapter, 1031 u32 qid) 1032{ 1033 struct ena_ring *rx_ring = &adapter->rx_ring[qid]; 1034 u32 i; 1035 1036 for (i = 0; i < rx_ring->ring_size; i++) { 1037 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i]; 1038 1039 if (rx_info->page) 1040 ena_free_rx_page(rx_ring, rx_info); 1041 } 1042} 1043 1044/* ena_refill_all_rx_bufs - allocate all queues Rx buffers 1045 * @adapter: board private structure 1046 */ 1047static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) 1048{ 1049 struct ena_ring *rx_ring; 1050 int i, rc, bufs_num; 1051 1052 for (i = 0; i < adapter->num_io_queues; i++) { 1053 rx_ring = &adapter->rx_ring[i]; 1054 bufs_num = rx_ring->ring_size - 1; 1055 rc = ena_refill_rx_bufs(rx_ring, bufs_num); 1056 1057 if (unlikely(rc != bufs_num)) 1058 netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, 1059 "Refilling Queue %d failed. allocated %d buffers from: %d\n", 1060 i, rc, bufs_num); 1061 } 1062} 1063 1064static void ena_free_all_rx_bufs(struct ena_adapter *adapter) 1065{ 1066 int i; 1067 1068 for (i = 0; i < adapter->num_io_queues; i++) 1069 ena_free_rx_bufs(adapter, i); 1070} 1071 1072static void ena_unmap_tx_buff(struct ena_ring *tx_ring, 1073 struct ena_tx_buffer *tx_info) 1074{ 1075 struct ena_com_buf *ena_buf; 1076 u32 cnt; 1077 int i; 1078 1079 ena_buf = tx_info->bufs; 1080 cnt = tx_info->num_of_bufs; 1081 1082 if (unlikely(!cnt)) 1083 return; 1084 1085 if (tx_info->map_linear_data) { 1086 dma_unmap_single(tx_ring->dev, 1087 dma_unmap_addr(ena_buf, paddr), 1088 dma_unmap_len(ena_buf, len), 1089 DMA_TO_DEVICE); 1090 ena_buf++; 1091 cnt--; 1092 } 1093 1094 /* unmap remaining mapped pages */ 1095 for (i = 0; i < cnt; i++) { 1096 dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), 1097 dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); 1098 ena_buf++; 1099 } 1100} 1101 1102/* ena_free_tx_bufs - Free Tx Buffers per Queue 1103 * @tx_ring: TX ring for which buffers be freed 1104 */ 1105static void ena_free_tx_bufs(struct ena_ring *tx_ring) 1106{ 1107 bool print_once = true; 1108 u32 i; 1109 1110 for (i = 0; i < tx_ring->ring_size; i++) { 1111 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; 1112 1113 if (!tx_info->skb) 1114 continue; 1115 1116 if (print_once) { 1117 netif_notice(tx_ring->adapter, ifdown, tx_ring->netdev, 1118 "Free uncompleted tx skb qid %d idx 0x%x\n", 1119 tx_ring->qid, i); 1120 print_once = false; 1121 } else { 1122 netif_dbg(tx_ring->adapter, ifdown, tx_ring->netdev, 1123 "Free uncompleted tx skb qid %d idx 0x%x\n", 1124 tx_ring->qid, i); 1125 } 1126 1127 ena_unmap_tx_buff(tx_ring, tx_info); 1128 1129 dev_kfree_skb_any(tx_info->skb); 1130 } 1131 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, 1132 tx_ring->qid)); 1133} 1134 1135static void ena_free_all_tx_bufs(struct ena_adapter *adapter) 1136{ 1137 struct ena_ring *tx_ring; 1138 int i; 1139 1140 for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { 1141 tx_ring = &adapter->tx_ring[i]; 1142 ena_free_tx_bufs(tx_ring); 1143 } 1144} 1145 1146static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) 1147{ 1148 u16 ena_qid; 1149 int i; 1150 1151 for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) { 1152 ena_qid = ENA_IO_TXQ_IDX(i); 1153 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1154 } 1155} 1156 1157static void ena_destroy_all_rx_queues(struct ena_adapter *adapter) 1158{ 1159 u16 ena_qid; 1160 int i; 1161 1162 for (i = 0; i < adapter->num_io_queues; i++) { 1163 ena_qid = ENA_IO_RXQ_IDX(i); 1164 cancel_work_sync(&adapter->ena_napi[i].dim.work); 1165 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); 1166 } 1167} 1168 1169static void ena_destroy_all_io_queues(struct ena_adapter *adapter) 1170{ 1171 ena_destroy_all_tx_queues(adapter); 1172 ena_destroy_all_rx_queues(adapter); 1173} 1174 1175static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, 1176 struct ena_tx_buffer *tx_info, bool is_xdp) 1177{ 1178 if (tx_info) 1179 netif_err(ring->adapter, 1180 tx_done, 1181 ring->netdev, 1182 "tx_info doesn't have valid %s", 1183 is_xdp ? "xdp frame" : "skb"); 1184 else 1185 netif_err(ring->adapter, 1186 tx_done, 1187 ring->netdev, 1188 "Invalid req_id: %hu\n", 1189 req_id); 1190 1191 u64_stats_update_begin(&ring->syncp); 1192 ring->tx_stats.bad_req_id++; 1193 u64_stats_update_end(&ring->syncp); 1194 1195 /* Trigger device reset */ 1196 ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID; 1197 set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags); 1198 return -EFAULT; 1199} 1200 1201static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) 1202{ 1203 struct ena_tx_buffer *tx_info; 1204 1205 tx_info = &tx_ring->tx_buffer_info[req_id]; 1206 if (likely(tx_info->skb)) 1207 return 0; 1208 1209 return handle_invalid_req_id(tx_ring, req_id, tx_info, false); 1210} 1211 1212static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) 1213{ 1214 struct ena_tx_buffer *tx_info; 1215 1216 tx_info = &xdp_ring->tx_buffer_info[req_id]; 1217 if (likely(tx_info->xdpf)) 1218 return 0; 1219 1220 return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); 1221} 1222 1223static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) 1224{ 1225 struct netdev_queue *txq; 1226 bool above_thresh; 1227 u32 tx_bytes = 0; 1228 u32 total_done = 0; 1229 u16 next_to_clean; 1230 u16 req_id; 1231 int tx_pkts = 0; 1232 int rc; 1233 1234 next_to_clean = tx_ring->next_to_clean; 1235 txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid); 1236 1237 while (tx_pkts < budget) { 1238 struct ena_tx_buffer *tx_info; 1239 struct sk_buff *skb; 1240 1241 rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, 1242 &req_id); 1243 if (rc) { 1244 if (unlikely(rc == -EINVAL)) 1245 handle_invalid_req_id(tx_ring, req_id, NULL, 1246 false); 1247 break; 1248 } 1249 1250 /* validate that the request id points to a valid skb */ 1251 rc = validate_tx_req_id(tx_ring, req_id); 1252 if (rc) 1253 break; 1254 1255 tx_info = &tx_ring->tx_buffer_info[req_id]; 1256 skb = tx_info->skb; 1257 1258 /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */ 1259 prefetch(&skb->end); 1260 1261 tx_info->skb = NULL; 1262 tx_info->last_jiffies = 0; 1263 1264 ena_unmap_tx_buff(tx_ring, tx_info); 1265 1266 netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, 1267 "tx_poll: q %d skb %p completed\n", tx_ring->qid, 1268 skb); 1269 1270 tx_bytes += skb->len; 1271 dev_kfree_skb(skb); 1272 tx_pkts++; 1273 total_done += tx_info->tx_descs; 1274 1275 tx_ring->free_ids[next_to_clean] = req_id; 1276 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, 1277 tx_ring->ring_size); 1278 } 1279 1280 tx_ring->next_to_clean = next_to_clean; 1281 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done); 1282 ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); 1283 1284 netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); 1285 1286 netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, 1287 "tx_poll: q %d done. total pkts: %d\n", 1288 tx_ring->qid, tx_pkts); 1289 1290 /* need to make the rings circular update visible to 1291 * ena_start_xmit() before checking for netif_queue_stopped(). 1292 */ 1293 smp_mb(); 1294 1295 above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1296 ENA_TX_WAKEUP_THRESH); 1297 if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) { 1298 __netif_tx_lock(txq, smp_processor_id()); 1299 above_thresh = 1300 ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 1301 ENA_TX_WAKEUP_THRESH); 1302 if (netif_tx_queue_stopped(txq) && above_thresh && 1303 test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) { 1304 netif_tx_wake_queue(txq); 1305 u64_stats_update_begin(&tx_ring->syncp); 1306 tx_ring->tx_stats.queue_wakeup++; 1307 u64_stats_update_end(&tx_ring->syncp); 1308 } 1309 __netif_tx_unlock(txq); 1310 } 1311 1312 return tx_pkts; 1313} 1314 1315static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags) 1316{ 1317 struct sk_buff *skb; 1318 1319 if (frags) 1320 skb = napi_get_frags(rx_ring->napi); 1321 else 1322 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 1323 rx_ring->rx_copybreak); 1324 1325 if (unlikely(!skb)) { 1326 u64_stats_update_begin(&rx_ring->syncp); 1327 rx_ring->rx_stats.skb_alloc_fail++; 1328 u64_stats_update_end(&rx_ring->syncp); 1329 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, 1330 "Failed to allocate skb. frags: %d\n", frags); 1331 return NULL; 1332 } 1333 1334 return skb; 1335} 1336 1337static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, 1338 struct ena_com_rx_buf_info *ena_bufs, 1339 u32 descs, 1340 u16 *next_to_clean) 1341{ 1342 struct sk_buff *skb; 1343 struct ena_rx_buffer *rx_info; 1344 u16 len, req_id, buf = 0; 1345 void *va; 1346 1347 len = ena_bufs[buf].len; 1348 req_id = ena_bufs[buf].req_id; 1349 1350 rx_info = &rx_ring->rx_buffer_info[req_id]; 1351 1352 if (unlikely(!rx_info->page)) { 1353 netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, 1354 "Page is NULL\n"); 1355 return NULL; 1356 } 1357 1358 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1359 "rx_info %p page %p\n", 1360 rx_info, rx_info->page); 1361 1362 /* save virt address of first buffer */ 1363 va = page_address(rx_info->page) + rx_info->page_offset; 1364 1365 prefetch(va); 1366 1367 if (len <= rx_ring->rx_copybreak) { 1368 skb = ena_alloc_skb(rx_ring, false); 1369 if (unlikely(!skb)) 1370 return NULL; 1371 1372 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1373 "RX allocated small packet. len %d. data_len %d\n", 1374 skb->len, skb->data_len); 1375 1376 /* sync this buffer for CPU use */ 1377 dma_sync_single_for_cpu(rx_ring->dev, 1378 dma_unmap_addr(&rx_info->ena_buf, paddr), 1379 len, 1380 DMA_FROM_DEVICE); 1381 skb_copy_to_linear_data(skb, va, len); 1382 dma_sync_single_for_device(rx_ring->dev, 1383 dma_unmap_addr(&rx_info->ena_buf, paddr), 1384 len, 1385 DMA_FROM_DEVICE); 1386 1387 skb_put(skb, len); 1388 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1389 rx_ring->free_ids[*next_to_clean] = req_id; 1390 *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, 1391 rx_ring->ring_size); 1392 return skb; 1393 } 1394 1395 skb = ena_alloc_skb(rx_ring, true); 1396 if (unlikely(!skb)) 1397 return NULL; 1398 1399 do { 1400 dma_unmap_page(rx_ring->dev, 1401 dma_unmap_addr(&rx_info->ena_buf, paddr), 1402 ENA_PAGE_SIZE, DMA_BIDIRECTIONAL); 1403 1404 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, 1405 rx_info->page_offset, len, ENA_PAGE_SIZE); 1406 1407 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1408 "RX skb updated. len %d. data_len %d\n", 1409 skb->len, skb->data_len); 1410 1411 rx_info->page = NULL; 1412 1413 rx_ring->free_ids[*next_to_clean] = req_id; 1414 *next_to_clean = 1415 ENA_RX_RING_IDX_NEXT(*next_to_clean, 1416 rx_ring->ring_size); 1417 if (likely(--descs == 0)) 1418 break; 1419 1420 buf++; 1421 len = ena_bufs[buf].len; 1422 req_id = ena_bufs[buf].req_id; 1423 1424 rx_info = &rx_ring->rx_buffer_info[req_id]; 1425 } while (1); 1426 1427 return skb; 1428} 1429 1430/* ena_rx_checksum - indicate in skb if hw indicated a good cksum 1431 * @adapter: structure containing adapter specific data 1432 * @ena_rx_ctx: received packet context/metadata 1433 * @skb: skb currently being received and modified 1434 */ 1435static void ena_rx_checksum(struct ena_ring *rx_ring, 1436 struct ena_com_rx_ctx *ena_rx_ctx, 1437 struct sk_buff *skb) 1438{ 1439 /* Rx csum disabled */ 1440 if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) { 1441 skb->ip_summed = CHECKSUM_NONE; 1442 return; 1443 } 1444 1445 /* For fragmented packets the checksum isn't valid */ 1446 if (ena_rx_ctx->frag) { 1447 skb->ip_summed = CHECKSUM_NONE; 1448 return; 1449 } 1450 1451 /* if IP and error */ 1452 if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && 1453 (ena_rx_ctx->l3_csum_err))) { 1454 /* ipv4 checksum error */ 1455 skb->ip_summed = CHECKSUM_NONE; 1456 u64_stats_update_begin(&rx_ring->syncp); 1457 rx_ring->rx_stats.bad_csum++; 1458 u64_stats_update_end(&rx_ring->syncp); 1459 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, 1460 "RX IPv4 header checksum error\n"); 1461 return; 1462 } 1463 1464 /* if TCP/UDP */ 1465 if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || 1466 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) { 1467 if (unlikely(ena_rx_ctx->l4_csum_err)) { 1468 /* TCP/UDP checksum error */ 1469 u64_stats_update_begin(&rx_ring->syncp); 1470 rx_ring->rx_stats.bad_csum++; 1471 u64_stats_update_end(&rx_ring->syncp); 1472 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, 1473 "RX L4 checksum error\n"); 1474 skb->ip_summed = CHECKSUM_NONE; 1475 return; 1476 } 1477 1478 if (likely(ena_rx_ctx->l4_csum_checked)) { 1479 skb->ip_summed = CHECKSUM_UNNECESSARY; 1480 u64_stats_update_begin(&rx_ring->syncp); 1481 rx_ring->rx_stats.csum_good++; 1482 u64_stats_update_end(&rx_ring->syncp); 1483 } else { 1484 u64_stats_update_begin(&rx_ring->syncp); 1485 rx_ring->rx_stats.csum_unchecked++; 1486 u64_stats_update_end(&rx_ring->syncp); 1487 skb->ip_summed = CHECKSUM_NONE; 1488 } 1489 } else { 1490 skb->ip_summed = CHECKSUM_NONE; 1491 return; 1492 } 1493 1494} 1495 1496static void ena_set_rx_hash(struct ena_ring *rx_ring, 1497 struct ena_com_rx_ctx *ena_rx_ctx, 1498 struct sk_buff *skb) 1499{ 1500 enum pkt_hash_types hash_type; 1501 1502 if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) { 1503 if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || 1504 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) 1505 1506 hash_type = PKT_HASH_TYPE_L4; 1507 else 1508 hash_type = PKT_HASH_TYPE_NONE; 1509 1510 /* Override hash type if the packet is fragmented */ 1511 if (ena_rx_ctx->frag) 1512 hash_type = PKT_HASH_TYPE_NONE; 1513 1514 skb_set_hash(skb, ena_rx_ctx->hash, hash_type); 1515 } 1516} 1517 1518static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) 1519{ 1520 struct ena_rx_buffer *rx_info; 1521 int ret; 1522 1523 rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; 1524 xdp->data = page_address(rx_info->page) + rx_info->page_offset; 1525 xdp_set_data_meta_invalid(xdp); 1526 xdp->data_hard_start = page_address(rx_info->page); 1527 xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len; 1528 /* If for some reason we received a bigger packet than 1529 * we expect, then we simply drop it 1530 */ 1531 if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) 1532 return XDP_DROP; 1533 1534 ret = ena_xdp_execute(rx_ring, xdp, rx_info); 1535 1536 /* The xdp program might expand the headers */ 1537 if (ret == XDP_PASS) { 1538 rx_info->page_offset = xdp->data - xdp->data_hard_start; 1539 rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; 1540 } 1541 1542 return ret; 1543} 1544/* ena_clean_rx_irq - Cleanup RX irq 1545 * @rx_ring: RX ring to clean 1546 * @napi: napi handler 1547 * @budget: how many packets driver is allowed to clean 1548 * 1549 * Returns the number of cleaned buffers. 1550 */ 1551static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, 1552 u32 budget) 1553{ 1554 u16 next_to_clean = rx_ring->next_to_clean; 1555 struct ena_com_rx_ctx ena_rx_ctx; 1556 struct ena_rx_buffer *rx_info; 1557 struct ena_adapter *adapter; 1558 u32 res_budget, work_done; 1559 int rx_copybreak_pkt = 0; 1560 int refill_threshold; 1561 struct sk_buff *skb; 1562 int refill_required; 1563 struct xdp_buff xdp; 1564 int total_len = 0; 1565 int xdp_verdict; 1566 int rc = 0; 1567 int i; 1568 1569 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1570 "%s qid %d\n", __func__, rx_ring->qid); 1571 res_budget = budget; 1572 xdp.rxq = &rx_ring->xdp_rxq; 1573 xdp.frame_sz = ENA_PAGE_SIZE; 1574 1575 do { 1576 xdp_verdict = XDP_PASS; 1577 skb = NULL; 1578 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; 1579 ena_rx_ctx.max_bufs = rx_ring->sgl_size; 1580 ena_rx_ctx.descs = 0; 1581 ena_rx_ctx.pkt_offset = 0; 1582 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, 1583 rx_ring->ena_com_io_sq, 1584 &ena_rx_ctx); 1585 if (unlikely(rc)) 1586 goto error; 1587 1588 if (unlikely(ena_rx_ctx.descs == 0)) 1589 break; 1590 1591 /* First descriptor might have an offset set by the device */ 1592 rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; 1593 rx_info->page_offset += ena_rx_ctx.pkt_offset; 1594 1595 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, 1596 "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", 1597 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, 1598 ena_rx_ctx.l4_proto, ena_rx_ctx.hash); 1599 1600 if (ena_xdp_present_ring(rx_ring)) 1601 xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); 1602 1603 /* allocate skb and fill it */ 1604 if (xdp_verdict == XDP_PASS) 1605 skb = ena_rx_skb(rx_ring, 1606 rx_ring->ena_bufs, 1607 ena_rx_ctx.descs, 1608 &next_to_clean); 1609 1610 if (unlikely(!skb)) { 1611 /* The page might not actually be freed here since the 1612 * page reference count is incremented in 1613 * ena_xdp_xmit_buff(), and it will be decreased only 1614 * when send completion was received from the device 1615 */ 1616 if (xdp_verdict == XDP_TX) 1617 ena_free_rx_page(rx_ring, 1618 &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]); 1619 for (i = 0; i < ena_rx_ctx.descs; i++) { 1620 rx_ring->free_ids[next_to_clean] = 1621 rx_ring->ena_bufs[i].req_id; 1622 next_to_clean = 1623 ENA_RX_RING_IDX_NEXT(next_to_clean, 1624 rx_ring->ring_size); 1625 } 1626 if (xdp_verdict != XDP_PASS) { 1627 res_budget--; 1628 continue; 1629 } 1630 break; 1631 } 1632 1633 ena_rx_checksum(rx_ring, &ena_rx_ctx, skb); 1634 1635 ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb); 1636 1637 skb_record_rx_queue(skb, rx_ring->qid); 1638 1639 if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) { 1640 total_len += rx_ring->ena_bufs[0].len; 1641 rx_copybreak_pkt++; 1642 napi_gro_receive(napi, skb); 1643 } else { 1644 total_len += skb->len; 1645 napi_gro_frags(napi); 1646 } 1647 1648 res_budget--; 1649 } while (likely(res_budget)); 1650 1651 work_done = budget - res_budget; 1652 rx_ring->per_napi_packets += work_done; 1653 u64_stats_update_begin(&rx_ring->syncp); 1654 rx_ring->rx_stats.bytes += total_len; 1655 rx_ring->rx_stats.cnt += work_done; 1656 rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt; 1657 u64_stats_update_end(&rx_ring->syncp); 1658 1659 rx_ring->next_to_clean = next_to_clean; 1660 1661 refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 1662 refill_threshold = 1663 min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER, 1664 ENA_RX_REFILL_THRESH_PACKET); 1665 1666 /* Optimization, try to batch new rx buffers */ 1667 if (refill_required > refill_threshold) { 1668 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); 1669 ena_refill_rx_bufs(rx_ring, refill_required); 1670 } 1671 1672 return work_done; 1673 1674error: 1675 adapter = netdev_priv(rx_ring->netdev); 1676 1677 if (rc == -ENOSPC) { 1678 u64_stats_update_begin(&rx_ring->syncp); 1679 rx_ring->rx_stats.bad_desc_num++; 1680 u64_stats_update_end(&rx_ring->syncp); 1681 adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS; 1682 } else { 1683 u64_stats_update_begin(&rx_ring->syncp); 1684 rx_ring->rx_stats.bad_req_id++; 1685 u64_stats_update_end(&rx_ring->syncp); 1686 adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID; 1687 } 1688 1689 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 1690 1691 return 0; 1692} 1693 1694static void ena_dim_work(struct work_struct *w) 1695{ 1696 struct dim *dim = container_of(w, struct dim, work); 1697 struct dim_cq_moder cur_moder = 1698 net_dim_get_rx_moderation(dim->mode, dim->profile_ix); 1699 struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim); 1700 1701 ena_napi->rx_ring->smoothed_interval = cur_moder.usec; 1702 dim->state = DIM_START_MEASURE; 1703} 1704 1705static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) 1706{ 1707 struct dim_sample dim_sample; 1708 struct ena_ring *rx_ring = ena_napi->rx_ring; 1709 1710 if (!rx_ring->per_napi_packets) 1711 return; 1712 1713 rx_ring->non_empty_napi_events++; 1714 1715 dim_update_sample(rx_ring->non_empty_napi_events, 1716 rx_ring->rx_stats.cnt, 1717 rx_ring->rx_stats.bytes, 1718 &dim_sample); 1719 1720 net_dim(&ena_napi->dim, dim_sample); 1721 1722 rx_ring->per_napi_packets = 0; 1723} 1724 1725static void ena_unmask_interrupt(struct ena_ring *tx_ring, 1726 struct ena_ring *rx_ring) 1727{ 1728 struct ena_eth_io_intr_reg intr_reg; 1729 u32 rx_interval = 0; 1730 /* Rx ring can be NULL when for XDP tx queues which don't have an 1731 * accompanying rx_ring pair. 1732 */ 1733 if (rx_ring) 1734 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ? 1735 rx_ring->smoothed_interval : 1736 ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev); 1737 1738 /* Update intr register: rx intr delay, 1739 * tx intr delay and interrupt unmask 1740 */ 1741 ena_com_update_intr_reg(&intr_reg, 1742 rx_interval, 1743 tx_ring->smoothed_interval, 1744 true); 1745 1746 u64_stats_update_begin(&tx_ring->syncp); 1747 tx_ring->tx_stats.unmask_interrupt++; 1748 u64_stats_update_end(&tx_ring->syncp); 1749 1750 /* It is a shared MSI-X. 1751 * Tx and Rx CQ have pointer to it. 1752 * So we use one of them to reach the intr reg 1753 * The Tx ring is used because the rx_ring is NULL for XDP queues 1754 */ 1755 ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg); 1756} 1757 1758static void ena_update_ring_numa_node(struct ena_ring *tx_ring, 1759 struct ena_ring *rx_ring) 1760{ 1761 int cpu = get_cpu(); 1762 int numa_node; 1763 1764 /* Check only one ring since the 2 rings are running on the same cpu */ 1765 if (likely(tx_ring->cpu == cpu)) 1766 goto out; 1767 1768 numa_node = cpu_to_node(cpu); 1769 put_cpu(); 1770 1771 if (numa_node != NUMA_NO_NODE) { 1772 ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); 1773 if (rx_ring) 1774 ena_com_update_numa_node(rx_ring->ena_com_io_cq, 1775 numa_node); 1776 } 1777 1778 tx_ring->cpu = cpu; 1779 if (rx_ring) 1780 rx_ring->cpu = cpu; 1781 1782 return; 1783out: 1784 put_cpu(); 1785} 1786 1787static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) 1788{ 1789 u32 total_done = 0; 1790 u16 next_to_clean; 1791 u32 tx_bytes = 0; 1792 int tx_pkts = 0; 1793 u16 req_id; 1794 int rc; 1795 1796 if (unlikely(!xdp_ring)) 1797 return 0; 1798 next_to_clean = xdp_ring->next_to_clean; 1799 1800 while (tx_pkts < budget) { 1801 struct ena_tx_buffer *tx_info; 1802 struct xdp_frame *xdpf; 1803 1804 rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, 1805 &req_id); 1806 if (rc) { 1807 if (unlikely(rc == -EINVAL)) 1808 handle_invalid_req_id(xdp_ring, req_id, NULL, 1809 true); 1810 break; 1811 } 1812 1813 /* validate that the request id points to a valid xdp_frame */ 1814 rc = validate_xdp_req_id(xdp_ring, req_id); 1815 if (rc) 1816 break; 1817 1818 tx_info = &xdp_ring->tx_buffer_info[req_id]; 1819 xdpf = tx_info->xdpf; 1820 1821 tx_info->xdpf = NULL; 1822 tx_info->last_jiffies = 0; 1823 ena_unmap_tx_buff(xdp_ring, tx_info); 1824 1825 netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, 1826 "tx_poll: q %d skb %p completed\n", xdp_ring->qid, 1827 xdpf); 1828 1829 tx_bytes += xdpf->len; 1830 tx_pkts++; 1831 total_done += tx_info->tx_descs; 1832 1833 __free_page(tx_info->xdp_rx_page); 1834 xdp_ring->free_ids[next_to_clean] = req_id; 1835 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, 1836 xdp_ring->ring_size); 1837 } 1838 1839 xdp_ring->next_to_clean = next_to_clean; 1840 ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done); 1841 ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq); 1842 1843 netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, 1844 "tx_poll: q %d done. total pkts: %d\n", 1845 xdp_ring->qid, tx_pkts); 1846 1847 return tx_pkts; 1848} 1849 1850static int ena_io_poll(struct napi_struct *napi, int budget) 1851{ 1852 struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); 1853 struct ena_ring *tx_ring, *rx_ring; 1854 int tx_work_done; 1855 int rx_work_done = 0; 1856 int tx_budget; 1857 int napi_comp_call = 0; 1858 int ret; 1859 1860 tx_ring = ena_napi->tx_ring; 1861 rx_ring = ena_napi->rx_ring; 1862 1863 tx_ring->first_interrupt = ena_napi->first_interrupt; 1864 rx_ring->first_interrupt = ena_napi->first_interrupt; 1865 1866 tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; 1867 1868 if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) || 1869 test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) { 1870 napi_complete_done(napi, 0); 1871 return 0; 1872 } 1873 1874 tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget); 1875 /* On netpoll the budget is zero and the handler should only clean the 1876 * tx completions. 1877 */ 1878 if (likely(budget)) 1879 rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); 1880 1881 /* If the device is about to reset or down, avoid unmask 1882 * the interrupt and return 0 so NAPI won't reschedule 1883 */ 1884 if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) || 1885 test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) { 1886 napi_complete_done(napi, 0); 1887 ret = 0; 1888 1889 } else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) { 1890 napi_comp_call = 1; 1891 1892 /* Update numa and unmask the interrupt only when schedule 1893 * from the interrupt context (vs from sk_busy_loop) 1894 */ 1895 if (napi_complete_done(napi, rx_work_done) && 1896 READ_ONCE(ena_napi->interrupts_masked)) { 1897 smp_rmb(); /* make sure interrupts_masked is read */ 1898 WRITE_ONCE(ena_napi->interrupts_masked, false); 1899 /* We apply adaptive moderation on Rx path only. 1900 * Tx uses static interrupt moderation. 1901 */ 1902 if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) 1903 ena_adjust_adaptive_rx_intr_moderation(ena_napi); 1904 1905 ena_unmask_interrupt(tx_ring, rx_ring); 1906 } 1907 1908 ena_update_ring_numa_node(tx_ring, rx_ring); 1909 1910 ret = rx_work_done; 1911 } else { 1912 ret = budget; 1913 } 1914 1915 u64_stats_update_begin(&tx_ring->syncp); 1916 tx_ring->tx_stats.napi_comp += napi_comp_call; 1917 tx_ring->tx_stats.tx_poll++; 1918 u64_stats_update_end(&tx_ring->syncp); 1919 1920 return ret; 1921} 1922 1923static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data) 1924{ 1925 struct ena_adapter *adapter = (struct ena_adapter *)data; 1926 1927 ena_com_admin_q_comp_intr_handler(adapter->ena_dev); 1928 1929 /* Don't call the aenq handler before probe is done */ 1930 if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))) 1931 ena_com_aenq_intr_handler(adapter->ena_dev, data); 1932 1933 return IRQ_HANDLED; 1934} 1935 1936/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx 1937 * @irq: interrupt number 1938 * @data: pointer to a network interface private napi device structure 1939 */ 1940static irqreturn_t ena_intr_msix_io(int irq, void *data) 1941{ 1942 struct ena_napi *ena_napi = data; 1943 1944 ena_napi->first_interrupt = true; 1945 1946 WRITE_ONCE(ena_napi->interrupts_masked, true); 1947 smp_wmb(); /* write interrupts_masked before calling napi */ 1948 1949 napi_schedule_irqoff(&ena_napi->napi); 1950 1951 return IRQ_HANDLED; 1952} 1953 1954/* Reserve a single MSI-X vector for management (admin + aenq). 1955 * plus reserve one vector for each potential io queue. 1956 * the number of potential io queues is the minimum of what the device 1957 * supports and the number of vCPUs. 1958 */ 1959static int ena_enable_msix(struct ena_adapter *adapter) 1960{ 1961 int msix_vecs, irq_cnt; 1962 1963 if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { 1964 netif_err(adapter, probe, adapter->netdev, 1965 "Error, MSI-X is already enabled\n"); 1966 return -EPERM; 1967 } 1968 1969 /* Reserved the max msix vectors we might need */ 1970 msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues); 1971 netif_dbg(adapter, probe, adapter->netdev, 1972 "Trying to enable MSI-X, vectors %d\n", msix_vecs); 1973 1974 irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC, 1975 msix_vecs, PCI_IRQ_MSIX); 1976 1977 if (irq_cnt < 0) { 1978 netif_err(adapter, probe, adapter->netdev, 1979 "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt); 1980 return -ENOSPC; 1981 } 1982 1983 if (irq_cnt != msix_vecs) { 1984 netif_notice(adapter, probe, adapter->netdev, 1985 "Enable only %d MSI-X (out of %d), reduce the number of queues\n", 1986 irq_cnt, msix_vecs); 1987 adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC; 1988 } 1989 1990 if (ena_init_rx_cpu_rmap(adapter)) 1991 netif_warn(adapter, probe, adapter->netdev, 1992 "Failed to map IRQs to CPUs\n"); 1993 1994 adapter->msix_vecs = irq_cnt; 1995 set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags); 1996 1997 return 0; 1998} 1999 2000static void ena_setup_mgmnt_intr(struct ena_adapter *adapter) 2001{ 2002 u32 cpu; 2003 2004 snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, 2005 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s", 2006 pci_name(adapter->pdev)); 2007 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = 2008 ena_intr_msix_mgmnt; 2009 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter; 2010 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector = 2011 pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX); 2012 cpu = cpumask_first(cpu_online_mask); 2013 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu; 2014 cpumask_set_cpu(cpu, 2015 &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask); 2016} 2017 2018static void ena_setup_io_intr(struct ena_adapter *adapter) 2019{ 2020 struct net_device *netdev; 2021 int irq_idx, i, cpu; 2022 int io_queue_count; 2023 2024 netdev = adapter->netdev; 2025 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2026 2027 for (i = 0; i < io_queue_count; i++) { 2028 irq_idx = ENA_IO_IRQ_IDX(i); 2029 cpu = i % num_online_cpus(); 2030 2031 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE, 2032 "%s-Tx-Rx-%d", netdev->name, i); 2033 adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io; 2034 adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i]; 2035 adapter->irq_tbl[irq_idx].vector = 2036 pci_irq_vector(adapter->pdev, irq_idx); 2037 adapter->irq_tbl[irq_idx].cpu = cpu; 2038 2039 cpumask_set_cpu(cpu, 2040 &adapter->irq_tbl[irq_idx].affinity_hint_mask); 2041 } 2042} 2043 2044static int ena_request_mgmnt_irq(struct ena_adapter *adapter) 2045{ 2046 unsigned long flags = 0; 2047 struct ena_irq *irq; 2048 int rc; 2049 2050 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; 2051 rc = request_irq(irq->vector, irq->handler, flags, irq->name, 2052 irq->data); 2053 if (rc) { 2054 netif_err(adapter, probe, adapter->netdev, 2055 "Failed to request admin irq\n"); 2056 return rc; 2057 } 2058 2059 netif_dbg(adapter, probe, adapter->netdev, 2060 "Set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n", 2061 irq->affinity_hint_mask.bits[0], irq->vector); 2062 2063 irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); 2064 2065 return rc; 2066} 2067 2068static int ena_request_io_irq(struct ena_adapter *adapter) 2069{ 2070 u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2071 unsigned long flags = 0; 2072 struct ena_irq *irq; 2073 int rc = 0, i, k; 2074 2075 if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { 2076 netif_err(adapter, ifup, adapter->netdev, 2077 "Failed to request I/O IRQ: MSI-X is not enabled\n"); 2078 return -EINVAL; 2079 } 2080 2081 for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) { 2082 irq = &adapter->irq_tbl[i]; 2083 rc = request_irq(irq->vector, irq->handler, flags, irq->name, 2084 irq->data); 2085 if (rc) { 2086 netif_err(adapter, ifup, adapter->netdev, 2087 "Failed to request I/O IRQ. index %d rc %d\n", 2088 i, rc); 2089 goto err; 2090 } 2091 2092 netif_dbg(adapter, ifup, adapter->netdev, 2093 "Set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n", 2094 i, irq->affinity_hint_mask.bits[0], irq->vector); 2095 2096 irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); 2097 } 2098 2099 return rc; 2100 2101err: 2102 for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) { 2103 irq = &adapter->irq_tbl[k]; 2104 free_irq(irq->vector, irq->data); 2105 } 2106 2107 return rc; 2108} 2109 2110static void ena_free_mgmnt_irq(struct ena_adapter *adapter) 2111{ 2112 struct ena_irq *irq; 2113 2114 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; 2115 synchronize_irq(irq->vector); 2116 irq_set_affinity_hint(irq->vector, NULL); 2117 free_irq(irq->vector, irq->data); 2118} 2119 2120static void ena_free_io_irq(struct ena_adapter *adapter) 2121{ 2122 u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2123 struct ena_irq *irq; 2124 int i; 2125 2126#ifdef CONFIG_RFS_ACCEL 2127 if (adapter->msix_vecs >= 1) { 2128 free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); 2129 adapter->netdev->rx_cpu_rmap = NULL; 2130 } 2131#endif /* CONFIG_RFS_ACCEL */ 2132 2133 for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) { 2134 irq = &adapter->irq_tbl[i]; 2135 irq_set_affinity_hint(irq->vector, NULL); 2136 free_irq(irq->vector, irq->data); 2137 } 2138} 2139 2140static void ena_disable_msix(struct ena_adapter *adapter) 2141{ 2142 if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) 2143 pci_free_irq_vectors(adapter->pdev); 2144} 2145 2146static void ena_disable_io_intr_sync(struct ena_adapter *adapter) 2147{ 2148 u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2149 int i; 2150 2151 if (!netif_running(adapter->netdev)) 2152 return; 2153 2154 for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) 2155 synchronize_irq(adapter->irq_tbl[i].vector); 2156} 2157 2158static void ena_del_napi_in_range(struct ena_adapter *adapter, 2159 int first_index, 2160 int count) 2161{ 2162 int i; 2163 2164 for (i = first_index; i < first_index + count; i++) { 2165 netif_napi_del(&adapter->ena_napi[i].napi); 2166 2167 WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) && 2168 adapter->ena_napi[i].xdp_ring); 2169 } 2170} 2171 2172static void ena_init_napi_in_range(struct ena_adapter *adapter, 2173 int first_index, int count) 2174{ 2175 int i; 2176 2177 for (i = first_index; i < first_index + count; i++) { 2178 struct ena_napi *napi = &adapter->ena_napi[i]; 2179 2180 netif_napi_add(adapter->netdev, 2181 &napi->napi, 2182 ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll, 2183 ENA_NAPI_BUDGET); 2184 2185 if (!ENA_IS_XDP_INDEX(adapter, i)) { 2186 napi->rx_ring = &adapter->rx_ring[i]; 2187 napi->tx_ring = &adapter->tx_ring[i]; 2188 } else { 2189 napi->xdp_ring = &adapter->tx_ring[i]; 2190 } 2191 napi->qid = i; 2192 } 2193} 2194 2195static void ena_napi_disable_in_range(struct ena_adapter *adapter, 2196 int first_index, 2197 int count) 2198{ 2199 int i; 2200 2201 for (i = first_index; i < first_index + count; i++) 2202 napi_disable(&adapter->ena_napi[i].napi); 2203} 2204 2205static void ena_napi_enable_in_range(struct ena_adapter *adapter, 2206 int first_index, 2207 int count) 2208{ 2209 int i; 2210 2211 for (i = first_index; i < first_index + count; i++) 2212 napi_enable(&adapter->ena_napi[i].napi); 2213} 2214 2215/* Configure the Rx forwarding */ 2216static int ena_rss_configure(struct ena_adapter *adapter) 2217{ 2218 struct ena_com_dev *ena_dev = adapter->ena_dev; 2219 int rc; 2220 2221 /* In case the RSS table wasn't initialized by probe */ 2222 if (!ena_dev->rss.tbl_log_size) { 2223 rc = ena_rss_init_default(adapter); 2224 if (rc && (rc != -EOPNOTSUPP)) { 2225 netif_err(adapter, ifup, adapter->netdev, 2226 "Failed to init RSS rc: %d\n", rc); 2227 return rc; 2228 } 2229 } 2230 2231 /* Set indirect table */ 2232 rc = ena_com_indirect_table_set(ena_dev); 2233 if (unlikely(rc && rc != -EOPNOTSUPP)) 2234 return rc; 2235 2236 /* Configure hash function (if supported) */ 2237 rc = ena_com_set_hash_function(ena_dev); 2238 if (unlikely(rc && (rc != -EOPNOTSUPP))) 2239 return rc; 2240 2241 /* Configure hash inputs (if supported) */ 2242 rc = ena_com_set_hash_ctrl(ena_dev); 2243 if (unlikely(rc && (rc != -EOPNOTSUPP))) 2244 return rc; 2245 2246 return 0; 2247} 2248 2249static int ena_up_complete(struct ena_adapter *adapter) 2250{ 2251 int rc; 2252 2253 rc = ena_rss_configure(adapter); 2254 if (rc) 2255 return rc; 2256 2257 ena_change_mtu(adapter->netdev, adapter->netdev->mtu); 2258 2259 ena_refill_all_rx_bufs(adapter); 2260 2261 /* enable transmits */ 2262 netif_tx_start_all_queues(adapter->netdev); 2263 2264 ena_napi_enable_in_range(adapter, 2265 0, 2266 adapter->xdp_num_queues + adapter->num_io_queues); 2267 2268 return 0; 2269} 2270 2271static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) 2272{ 2273 struct ena_com_create_io_ctx ctx; 2274 struct ena_com_dev *ena_dev; 2275 struct ena_ring *tx_ring; 2276 u32 msix_vector; 2277 u16 ena_qid; 2278 int rc; 2279 2280 ena_dev = adapter->ena_dev; 2281 2282 tx_ring = &adapter->tx_ring[qid]; 2283 msix_vector = ENA_IO_IRQ_IDX(qid); 2284 ena_qid = ENA_IO_TXQ_IDX(qid); 2285 2286 memset(&ctx, 0x0, sizeof(ctx)); 2287 2288 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; 2289 ctx.qid = ena_qid; 2290 ctx.mem_queue_type = ena_dev->tx_mem_queue_type; 2291 ctx.msix_vector = msix_vector; 2292 ctx.queue_size = tx_ring->ring_size; 2293 ctx.numa_node = cpu_to_node(tx_ring->cpu); 2294 2295 rc = ena_com_create_io_queue(ena_dev, &ctx); 2296 if (rc) { 2297 netif_err(adapter, ifup, adapter->netdev, 2298 "Failed to create I/O TX queue num %d rc: %d\n", 2299 qid, rc); 2300 return rc; 2301 } 2302 2303 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 2304 &tx_ring->ena_com_io_sq, 2305 &tx_ring->ena_com_io_cq); 2306 if (rc) { 2307 netif_err(adapter, ifup, adapter->netdev, 2308 "Failed to get TX queue handlers. TX queue num %d rc: %d\n", 2309 qid, rc); 2310 ena_com_destroy_io_queue(ena_dev, ena_qid); 2311 return rc; 2312 } 2313 2314 ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node); 2315 return rc; 2316} 2317 2318static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, 2319 int first_index, int count) 2320{ 2321 struct ena_com_dev *ena_dev = adapter->ena_dev; 2322 int rc, i; 2323 2324 for (i = first_index; i < first_index + count; i++) { 2325 rc = ena_create_io_tx_queue(adapter, i); 2326 if (rc) 2327 goto create_err; 2328 } 2329 2330 return 0; 2331 2332create_err: 2333 while (i-- > first_index) 2334 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); 2335 2336 return rc; 2337} 2338 2339static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) 2340{ 2341 struct ena_com_dev *ena_dev; 2342 struct ena_com_create_io_ctx ctx; 2343 struct ena_ring *rx_ring; 2344 u32 msix_vector; 2345 u16 ena_qid; 2346 int rc; 2347 2348 ena_dev = adapter->ena_dev; 2349 2350 rx_ring = &adapter->rx_ring[qid]; 2351 msix_vector = ENA_IO_IRQ_IDX(qid); 2352 ena_qid = ENA_IO_RXQ_IDX(qid); 2353 2354 memset(&ctx, 0x0, sizeof(ctx)); 2355 2356 ctx.qid = ena_qid; 2357 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; 2358 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 2359 ctx.msix_vector = msix_vector; 2360 ctx.queue_size = rx_ring->ring_size; 2361 ctx.numa_node = cpu_to_node(rx_ring->cpu); 2362 2363 rc = ena_com_create_io_queue(ena_dev, &ctx); 2364 if (rc) { 2365 netif_err(adapter, ifup, adapter->netdev, 2366 "Failed to create I/O RX queue num %d rc: %d\n", 2367 qid, rc); 2368 return rc; 2369 } 2370 2371 rc = ena_com_get_io_handlers(ena_dev, ena_qid, 2372 &rx_ring->ena_com_io_sq, 2373 &rx_ring->ena_com_io_cq); 2374 if (rc) { 2375 netif_err(adapter, ifup, adapter->netdev, 2376 "Failed to get RX queue handlers. RX queue num %d rc: %d\n", 2377 qid, rc); 2378 goto err; 2379 } 2380 2381 ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); 2382 2383 return rc; 2384err: 2385 ena_com_destroy_io_queue(ena_dev, ena_qid); 2386 return rc; 2387} 2388 2389static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) 2390{ 2391 struct ena_com_dev *ena_dev = adapter->ena_dev; 2392 int rc, i; 2393 2394 for (i = 0; i < adapter->num_io_queues; i++) { 2395 rc = ena_create_io_rx_queue(adapter, i); 2396 if (rc) 2397 goto create_err; 2398 INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work); 2399 } 2400 2401 return 0; 2402 2403create_err: 2404 while (i--) { 2405 cancel_work_sync(&adapter->ena_napi[i].dim.work); 2406 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); 2407 } 2408 2409 return rc; 2410} 2411 2412static void set_io_rings_size(struct ena_adapter *adapter, 2413 int new_tx_size, 2414 int new_rx_size) 2415{ 2416 int i; 2417 2418 for (i = 0; i < adapter->num_io_queues; i++) { 2419 adapter->tx_ring[i].ring_size = new_tx_size; 2420 adapter->rx_ring[i].ring_size = new_rx_size; 2421 } 2422} 2423 2424/* This function allows queue allocation to backoff when the system is 2425 * low on memory. If there is not enough memory to allocate io queues 2426 * the driver will try to allocate smaller queues. 2427 * 2428 * The backoff algorithm is as follows: 2429 * 1. Try to allocate TX and RX and if successful. 2430 * 1.1. return success 2431 * 2432 * 2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same). 2433 * 2434 * 3. If TX or RX is smaller than 256 2435 * 3.1. return failure. 2436 * 4. else 2437 * 4.1. go back to 1. 2438 */ 2439static int create_queues_with_size_backoff(struct ena_adapter *adapter) 2440{ 2441 int rc, cur_rx_ring_size, cur_tx_ring_size; 2442 int new_rx_ring_size, new_tx_ring_size; 2443 2444 /* current queue sizes might be set to smaller than the requested 2445 * ones due to past queue allocation failures. 2446 */ 2447 set_io_rings_size(adapter, adapter->requested_tx_ring_size, 2448 adapter->requested_rx_ring_size); 2449 2450 while (1) { 2451 if (ena_xdp_present(adapter)) { 2452 rc = ena_setup_and_create_all_xdp_queues(adapter); 2453 2454 if (rc) 2455 goto err_setup_tx; 2456 } 2457 rc = ena_setup_tx_resources_in_range(adapter, 2458 0, 2459 adapter->num_io_queues); 2460 if (rc) 2461 goto err_setup_tx; 2462 2463 rc = ena_create_io_tx_queues_in_range(adapter, 2464 0, 2465 adapter->num_io_queues); 2466 if (rc) 2467 goto err_create_tx_queues; 2468 2469 rc = ena_setup_all_rx_resources(adapter); 2470 if (rc) 2471 goto err_setup_rx; 2472 2473 rc = ena_create_all_io_rx_queues(adapter); 2474 if (rc) 2475 goto err_create_rx_queues; 2476 2477 return 0; 2478 2479err_create_rx_queues: 2480 ena_free_all_io_rx_resources(adapter); 2481err_setup_rx: 2482 ena_destroy_all_tx_queues(adapter); 2483err_create_tx_queues: 2484 ena_free_all_io_tx_resources(adapter); 2485err_setup_tx: 2486 if (rc != -ENOMEM) { 2487 netif_err(adapter, ifup, adapter->netdev, 2488 "Queue creation failed with error code %d\n", 2489 rc); 2490 return rc; 2491 } 2492 2493 cur_tx_ring_size = adapter->tx_ring[0].ring_size; 2494 cur_rx_ring_size = adapter->rx_ring[0].ring_size; 2495 2496 netif_err(adapter, ifup, adapter->netdev, 2497 "Not enough memory to create queues with sizes TX=%d, RX=%d\n", 2498 cur_tx_ring_size, cur_rx_ring_size); 2499 2500 new_tx_ring_size = cur_tx_ring_size; 2501 new_rx_ring_size = cur_rx_ring_size; 2502 2503 /* Decrease the size of the larger queue, or 2504 * decrease both if they are the same size. 2505 */ 2506 if (cur_rx_ring_size <= cur_tx_ring_size) 2507 new_tx_ring_size = cur_tx_ring_size / 2; 2508 if (cur_rx_ring_size >= cur_tx_ring_size) 2509 new_rx_ring_size = cur_rx_ring_size / 2; 2510 2511 if (new_tx_ring_size < ENA_MIN_RING_SIZE || 2512 new_rx_ring_size < ENA_MIN_RING_SIZE) { 2513 netif_err(adapter, ifup, adapter->netdev, 2514 "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", 2515 ENA_MIN_RING_SIZE); 2516 return rc; 2517 } 2518 2519 netif_err(adapter, ifup, adapter->netdev, 2520 "Retrying queue creation with sizes TX=%d, RX=%d\n", 2521 new_tx_ring_size, 2522 new_rx_ring_size); 2523 2524 set_io_rings_size(adapter, new_tx_ring_size, 2525 new_rx_ring_size); 2526 } 2527} 2528 2529static int ena_up(struct ena_adapter *adapter) 2530{ 2531 int io_queue_count, rc, i; 2532 2533 netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__); 2534 2535 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2536 ena_setup_io_intr(adapter); 2537 2538 /* napi poll functions should be initialized before running 2539 * request_irq(), to handle a rare condition where there is a pending 2540 * interrupt, causing the ISR to fire immediately while the poll 2541 * function wasn't set yet, causing a null dereference 2542 */ 2543 ena_init_napi_in_range(adapter, 0, io_queue_count); 2544 2545 rc = ena_request_io_irq(adapter); 2546 if (rc) 2547 goto err_req_irq; 2548 2549 rc = create_queues_with_size_backoff(adapter); 2550 if (rc) 2551 goto err_create_queues_with_backoff; 2552 2553 rc = ena_up_complete(adapter); 2554 if (rc) 2555 goto err_up; 2556 2557 if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) 2558 netif_carrier_on(adapter->netdev); 2559 2560 u64_stats_update_begin(&adapter->syncp); 2561 adapter->dev_stats.interface_up++; 2562 u64_stats_update_end(&adapter->syncp); 2563 2564 set_bit(ENA_FLAG_DEV_UP, &adapter->flags); 2565 2566 /* Enable completion queues interrupt */ 2567 for (i = 0; i < adapter->num_io_queues; i++) 2568 ena_unmask_interrupt(&adapter->tx_ring[i], 2569 &adapter->rx_ring[i]); 2570 2571 /* schedule napi in case we had pending packets 2572 * from the last time we disable napi 2573 */ 2574 for (i = 0; i < io_queue_count; i++) 2575 napi_schedule(&adapter->ena_napi[i].napi); 2576 2577 return rc; 2578 2579err_up: 2580 ena_destroy_all_tx_queues(adapter); 2581 ena_free_all_io_tx_resources(adapter); 2582 ena_destroy_all_rx_queues(adapter); 2583 ena_free_all_io_rx_resources(adapter); 2584err_create_queues_with_backoff: 2585 ena_free_io_irq(adapter); 2586err_req_irq: 2587 ena_del_napi_in_range(adapter, 0, io_queue_count); 2588 2589 return rc; 2590} 2591 2592static void ena_down(struct ena_adapter *adapter) 2593{ 2594 int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; 2595 2596 netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); 2597 2598 clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); 2599 2600 u64_stats_update_begin(&adapter->syncp); 2601 adapter->dev_stats.interface_down++; 2602 u64_stats_update_end(&adapter->syncp); 2603 2604 netif_carrier_off(adapter->netdev); 2605 netif_tx_disable(adapter->netdev); 2606 2607 /* After this point the napi handler won't enable the tx queue */ 2608 ena_napi_disable_in_range(adapter, 0, io_queue_count); 2609 2610 /* After destroy the queue there won't be any new interrupts */ 2611 2612 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) { 2613 int rc; 2614 2615 rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); 2616 if (rc) 2617 netif_err(adapter, ifdown, adapter->netdev, 2618 "Device reset failed\n"); 2619 /* stop submitting admin commands on a device that was reset */ 2620 ena_com_set_admin_running_state(adapter->ena_dev, false); 2621 } 2622 2623 ena_destroy_all_io_queues(adapter); 2624 2625 ena_disable_io_intr_sync(adapter); 2626 ena_free_io_irq(adapter); 2627 ena_del_napi_in_range(adapter, 0, io_queue_count); 2628 2629 ena_free_all_tx_bufs(adapter); 2630 ena_free_all_rx_bufs(adapter); 2631 ena_free_all_io_tx_resources(adapter); 2632 ena_free_all_io_rx_resources(adapter); 2633} 2634 2635/* ena_open - Called when a network interface is made active 2636 * @netdev: network interface device structure 2637 * 2638 * Returns 0 on success, negative value on failure 2639 * 2640 * The open entry point is called when a network interface is made 2641 * active by the system (IFF_UP). At this point all resources needed 2642 * for transmit and receive operations are allocated, the interrupt 2643 * handler is registered with the OS, the watchdog timer is started, 2644 * and the stack is notified that the interface is ready. 2645 */ 2646static int ena_open(struct net_device *netdev) 2647{ 2648 struct ena_adapter *adapter = netdev_priv(netdev); 2649 int rc; 2650 2651 /* Notify the stack of the actual queue counts. */ 2652 rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues); 2653 if (rc) { 2654 netif_err(adapter, ifup, netdev, "Can't set num tx queues\n"); 2655 return rc; 2656 } 2657 2658 rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues); 2659 if (rc) { 2660 netif_err(adapter, ifup, netdev, "Can't set num rx queues\n"); 2661 return rc; 2662 } 2663 2664 rc = ena_up(adapter); 2665 if (rc) 2666 return rc; 2667 2668 return rc; 2669} 2670 2671/* ena_close - Disables a network interface 2672 * @netdev: network interface device structure 2673 * 2674 * Returns 0, this is not allowed to fail 2675 * 2676 * The close entry point is called when an interface is de-activated 2677 * by the OS. The hardware is still under the drivers control, but 2678 * needs to be disabled. A global MAC reset is issued to stop the 2679 * hardware, and all transmit and receive resources are freed. 2680 */ 2681static int ena_close(struct net_device *netdev) 2682{ 2683 struct ena_adapter *adapter = netdev_priv(netdev); 2684 2685 netif_dbg(adapter, ifdown, netdev, "%s\n", __func__); 2686 2687 if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) 2688 return 0; 2689 2690 if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 2691 ena_down(adapter); 2692 2693 /* Check for device status and issue reset if needed*/ 2694 check_for_admin_com_state(adapter); 2695 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { 2696 netif_err(adapter, ifdown, adapter->netdev, 2697 "Destroy failure, restarting device\n"); 2698 ena_dump_stats_to_dmesg(adapter); 2699 /* rtnl lock already obtained in dev_ioctl() layer */ 2700 ena_destroy_device(adapter, false); 2701 ena_restore_device(adapter); 2702 } 2703 2704 return 0; 2705} 2706 2707int ena_update_queue_sizes(struct ena_adapter *adapter, 2708 u32 new_tx_size, 2709 u32 new_rx_size) 2710{ 2711 bool dev_was_up; 2712 2713 dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 2714 ena_close(adapter->netdev); 2715 adapter->requested_tx_ring_size = new_tx_size; 2716 adapter->requested_rx_ring_size = new_rx_size; 2717 ena_init_io_rings(adapter, 2718 0, 2719 adapter->xdp_num_queues + 2720 adapter->num_io_queues); 2721 return dev_was_up ? ena_up(adapter) : 0; 2722} 2723 2724int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) 2725{ 2726 struct ena_com_dev *ena_dev = adapter->ena_dev; 2727 int prev_channel_count; 2728 bool dev_was_up; 2729 2730 dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 2731 ena_close(adapter->netdev); 2732 prev_channel_count = adapter->num_io_queues; 2733 adapter->num_io_queues = new_channel_count; 2734 if (ena_xdp_present(adapter) && 2735 ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) { 2736 adapter->xdp_first_ring = new_channel_count; 2737 adapter->xdp_num_queues = new_channel_count; 2738 if (prev_channel_count > new_channel_count) 2739 ena_xdp_exchange_program_rx_in_range(adapter, 2740 NULL, 2741 new_channel_count, 2742 prev_channel_count); 2743 else 2744 ena_xdp_exchange_program_rx_in_range(adapter, 2745 adapter->xdp_bpf_prog, 2746 prev_channel_count, 2747 new_channel_count); 2748 } 2749 2750 /* We need to destroy the rss table so that the indirection 2751 * table will be reinitialized by ena_up() 2752 */ 2753 ena_com_rss_destroy(ena_dev); 2754 ena_init_io_rings(adapter, 2755 0, 2756 adapter->xdp_num_queues + 2757 adapter->num_io_queues); 2758 return dev_was_up ? ena_open(adapter->netdev) : 0; 2759} 2760 2761static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, 2762 struct sk_buff *skb, 2763 bool disable_meta_caching) 2764{ 2765 u32 mss = skb_shinfo(skb)->gso_size; 2766 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; 2767 u8 l4_protocol = 0; 2768 2769 if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) { 2770 ena_tx_ctx->l4_csum_enable = 1; 2771 if (mss) { 2772 ena_tx_ctx->tso_enable = 1; 2773 ena_meta->l4_hdr_len = tcp_hdr(skb)->doff; 2774 ena_tx_ctx->l4_csum_partial = 0; 2775 } else { 2776 ena_tx_ctx->tso_enable = 0; 2777 ena_meta->l4_hdr_len = 0; 2778 ena_tx_ctx->l4_csum_partial = 1; 2779 } 2780 2781 switch (ip_hdr(skb)->version) { 2782 case IPVERSION: 2783 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; 2784 if (ip_hdr(skb)->frag_off & htons(IP_DF)) 2785 ena_tx_ctx->df = 1; 2786 if (mss) 2787 ena_tx_ctx->l3_csum_enable = 1; 2788 l4_protocol = ip_hdr(skb)->protocol; 2789 break; 2790 case 6: 2791 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; 2792 l4_protocol = ipv6_hdr(skb)->nexthdr; 2793 break; 2794 default: 2795 break; 2796 } 2797 2798 if (l4_protocol == IPPROTO_TCP) 2799 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; 2800 else 2801 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; 2802 2803 ena_meta->mss = mss; 2804 ena_meta->l3_hdr_len = skb_network_header_len(skb); 2805 ena_meta->l3_hdr_offset = skb_network_offset(skb); 2806 ena_tx_ctx->meta_valid = 1; 2807 } else if (disable_meta_caching) { 2808 memset(ena_meta, 0, sizeof(*ena_meta)); 2809 ena_tx_ctx->meta_valid = 1; 2810 } else { 2811 ena_tx_ctx->meta_valid = 0; 2812 } 2813} 2814 2815static int ena_check_and_linearize_skb(struct ena_ring *tx_ring, 2816 struct sk_buff *skb) 2817{ 2818 int num_frags, header_len, rc; 2819 2820 num_frags = skb_shinfo(skb)->nr_frags; 2821 header_len = skb_headlen(skb); 2822 2823 if (num_frags < tx_ring->sgl_size) 2824 return 0; 2825 2826 if ((num_frags == tx_ring->sgl_size) && 2827 (header_len < tx_ring->tx_max_header_size)) 2828 return 0; 2829 2830 u64_stats_update_begin(&tx_ring->syncp); 2831 tx_ring->tx_stats.linearize++; 2832 u64_stats_update_end(&tx_ring->syncp); 2833 2834 rc = skb_linearize(skb); 2835 if (unlikely(rc)) { 2836 u64_stats_update_begin(&tx_ring->syncp); 2837 tx_ring->tx_stats.linearize_failed++; 2838 u64_stats_update_end(&tx_ring->syncp); 2839 } 2840 2841 return rc; 2842} 2843 2844static int ena_tx_map_skb(struct ena_ring *tx_ring, 2845 struct ena_tx_buffer *tx_info, 2846 struct sk_buff *skb, 2847 void **push_hdr, 2848 u16 *header_len) 2849{ 2850 struct ena_adapter *adapter = tx_ring->adapter; 2851 struct ena_com_buf *ena_buf; 2852 dma_addr_t dma; 2853 u32 skb_head_len, frag_len, last_frag; 2854 u16 push_len = 0; 2855 u16 delta = 0; 2856 int i = 0; 2857 2858 skb_head_len = skb_headlen(skb); 2859 tx_info->skb = skb; 2860 ena_buf = tx_info->bufs; 2861 2862 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 2863 /* When the device is LLQ mode, the driver will copy 2864 * the header into the device memory space. 2865 * the ena_com layer assume the header is in a linear 2866 * memory space. 2867 * This assumption might be wrong since part of the header 2868 * can be in the fragmented buffers. 2869 * Use skb_header_pointer to make sure the header is in a 2870 * linear memory space. 2871 */ 2872 2873 push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size); 2874 *push_hdr = skb_header_pointer(skb, 0, push_len, 2875 tx_ring->push_buf_intermediate_buf); 2876 *header_len = push_len; 2877 if (unlikely(skb->data != *push_hdr)) { 2878 u64_stats_update_begin(&tx_ring->syncp); 2879 tx_ring->tx_stats.llq_buffer_copy++; 2880 u64_stats_update_end(&tx_ring->syncp); 2881 2882 delta = push_len - skb_head_len; 2883 } 2884 } else { 2885 *push_hdr = NULL; 2886 *header_len = min_t(u32, skb_head_len, 2887 tx_ring->tx_max_header_size); 2888 } 2889 2890 netif_dbg(adapter, tx_queued, adapter->netdev, 2891 "skb: %p header_buf->vaddr: %p push_len: %d\n", skb, 2892 *push_hdr, push_len); 2893 2894 if (skb_head_len > push_len) { 2895 dma = dma_map_single(tx_ring->dev, skb->data + push_len, 2896 skb_head_len - push_len, DMA_TO_DEVICE); 2897 if (unlikely(dma_mapping_error(tx_ring->dev, dma))) 2898 goto error_report_dma_error; 2899 2900 ena_buf->paddr = dma; 2901 ena_buf->len = skb_head_len - push_len; 2902 2903 ena_buf++; 2904 tx_info->num_of_bufs++; 2905 tx_info->map_linear_data = 1; 2906 } else { 2907 tx_info->map_linear_data = 0; 2908 } 2909 2910 last_frag = skb_shinfo(skb)->nr_frags; 2911 2912 for (i = 0; i < last_frag; i++) { 2913 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 2914 2915 frag_len = skb_frag_size(frag); 2916 2917 if (unlikely(delta >= frag_len)) { 2918 delta -= frag_len; 2919 continue; 2920 } 2921 2922 dma = skb_frag_dma_map(tx_ring->dev, frag, delta, 2923 frag_len - delta, DMA_TO_DEVICE); 2924 if (unlikely(dma_mapping_error(tx_ring->dev, dma))) 2925 goto error_report_dma_error; 2926 2927 ena_buf->paddr = dma; 2928 ena_buf->len = frag_len - delta; 2929 ena_buf++; 2930 tx_info->num_of_bufs++; 2931 delta = 0; 2932 } 2933 2934 return 0; 2935 2936error_report_dma_error: 2937 u64_stats_update_begin(&tx_ring->syncp); 2938 tx_ring->tx_stats.dma_mapping_err++; 2939 u64_stats_update_end(&tx_ring->syncp); 2940 netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map skb\n"); 2941 2942 tx_info->skb = NULL; 2943 2944 tx_info->num_of_bufs += i; 2945 ena_unmap_tx_buff(tx_ring, tx_info); 2946 2947 return -EINVAL; 2948} 2949 2950/* Called with netif_tx_lock. */ 2951static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) 2952{ 2953 struct ena_adapter *adapter = netdev_priv(dev); 2954 struct ena_tx_buffer *tx_info; 2955 struct ena_com_tx_ctx ena_tx_ctx; 2956 struct ena_ring *tx_ring; 2957 struct netdev_queue *txq; 2958 void *push_hdr; 2959 u16 next_to_use, req_id, header_len; 2960 int qid, rc; 2961 2962 netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); 2963 /* Determine which tx ring we will be placed on */ 2964 qid = skb_get_queue_mapping(skb); 2965 tx_ring = &adapter->tx_ring[qid]; 2966 txq = netdev_get_tx_queue(dev, qid); 2967 2968 rc = ena_check_and_linearize_skb(tx_ring, skb); 2969 if (unlikely(rc)) 2970 goto error_drop_packet; 2971 2972 skb_tx_timestamp(skb); 2973 2974 next_to_use = tx_ring->next_to_use; 2975 req_id = tx_ring->free_ids[next_to_use]; 2976 tx_info = &tx_ring->tx_buffer_info[req_id]; 2977 tx_info->num_of_bufs = 0; 2978 2979 WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id); 2980 2981 rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len); 2982 if (unlikely(rc)) 2983 goto error_drop_packet; 2984 2985 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); 2986 ena_tx_ctx.ena_bufs = tx_info->bufs; 2987 ena_tx_ctx.push_header = push_hdr; 2988 ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 2989 ena_tx_ctx.req_id = req_id; 2990 ena_tx_ctx.header_len = header_len; 2991 2992 /* set flags and meta data */ 2993 ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching); 2994 2995 rc = ena_xmit_common(dev, 2996 tx_ring, 2997 tx_info, 2998 &ena_tx_ctx, 2999 next_to_use, 3000 skb->len); 3001 if (rc) 3002 goto error_unmap_dma; 3003 3004 netdev_tx_sent_queue(txq, skb->len); 3005 3006 /* stop the queue when no more space available, the packet can have up 3007 * to sgl_size + 2. one for the meta descriptor and one for header 3008 * (if the header is larger than tx_max_header_size). 3009 */ 3010 if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3011 tx_ring->sgl_size + 2))) { 3012 netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n", 3013 __func__, qid); 3014 3015 netif_tx_stop_queue(txq); 3016 u64_stats_update_begin(&tx_ring->syncp); 3017 tx_ring->tx_stats.queue_stop++; 3018 u64_stats_update_end(&tx_ring->syncp); 3019 3020 /* There is a rare condition where this function decide to 3021 * stop the queue but meanwhile clean_tx_irq updates 3022 * next_to_completion and terminates. 3023 * The queue will remain stopped forever. 3024 * To solve this issue add a mb() to make sure that 3025 * netif_tx_stop_queue() write is vissible before checking if 3026 * there is additional space in the queue. 3027 */ 3028 smp_mb(); 3029 3030 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq, 3031 ENA_TX_WAKEUP_THRESH)) { 3032 netif_tx_wake_queue(txq); 3033 u64_stats_update_begin(&tx_ring->syncp); 3034 tx_ring->tx_stats.queue_wakeup++; 3035 u64_stats_update_end(&tx_ring->syncp); 3036 } 3037 } 3038 3039 if (netif_xmit_stopped(txq) || !netdev_xmit_more()) { 3040 /* trigger the dma engine. ena_com_write_sq_doorbell() 3041 * has a mb 3042 */ 3043 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); 3044 u64_stats_update_begin(&tx_ring->syncp); 3045 tx_ring->tx_stats.doorbells++; 3046 u64_stats_update_end(&tx_ring->syncp); 3047 } 3048 3049 return NETDEV_TX_OK; 3050 3051error_unmap_dma: 3052 ena_unmap_tx_buff(tx_ring, tx_info); 3053 tx_info->skb = NULL; 3054 3055error_drop_packet: 3056 dev_kfree_skb(skb); 3057 return NETDEV_TX_OK; 3058} 3059 3060static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, 3061 struct net_device *sb_dev) 3062{ 3063 u16 qid; 3064 /* we suspect that this is good for in--kernel network services that 3065 * want to loop incoming skb rx to tx in normal user generated traffic, 3066 * most probably we will not get to this 3067 */ 3068 if (skb_rx_queue_recorded(skb)) 3069 qid = skb_get_rx_queue(skb); 3070 else 3071 qid = netdev_pick_tx(dev, skb, NULL); 3072 3073 return qid; 3074} 3075 3076static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev) 3077{ 3078 struct device *dev = &pdev->dev; 3079 struct ena_admin_host_info *host_info; 3080 int rc; 3081 3082 /* Allocate only the host info */ 3083 rc = ena_com_allocate_host_info(ena_dev); 3084 if (rc) { 3085 dev_err(dev, "Cannot allocate host info\n"); 3086 return; 3087 } 3088 3089 host_info = ena_dev->host_attr.host_info; 3090 3091 host_info->bdf = (pdev->bus->number << 8) | pdev->devfn; 3092 host_info->os_type = ENA_ADMIN_OS_LINUX; 3093 host_info->kernel_ver = LINUX_VERSION_CODE; 3094 strlcpy(host_info->kernel_ver_str, utsname()->version, 3095 sizeof(host_info->kernel_ver_str) - 1); 3096 host_info->os_dist = 0; 3097 strncpy(host_info->os_dist_str, utsname()->release, 3098 sizeof(host_info->os_dist_str) - 1); 3099 host_info->driver_version = 3100 (DRV_MODULE_GEN_MAJOR) | 3101 (DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | 3102 (DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) | 3103 ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT); 3104 host_info->num_cpus = num_online_cpus(); 3105 3106 host_info->driver_supported_features = 3107 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | 3108 ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK | 3109 ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK | 3110 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; 3111 3112 rc = ena_com_set_host_attributes(ena_dev); 3113 if (rc) { 3114 if (rc == -EOPNOTSUPP) 3115 dev_warn(dev, "Cannot set host attributes\n"); 3116 else 3117 dev_err(dev, "Cannot set host attributes\n"); 3118 3119 goto err; 3120 } 3121 3122 return; 3123 3124err: 3125 ena_com_delete_host_info(ena_dev); 3126} 3127 3128static void ena_config_debug_area(struct ena_adapter *adapter) 3129{ 3130 u32 debug_area_size; 3131 int rc, ss_count; 3132 3133 ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS); 3134 if (ss_count <= 0) { 3135 netif_err(adapter, drv, adapter->netdev, 3136 "SS count is negative\n"); 3137 return; 3138 } 3139 3140 /* allocate 32 bytes for each string and 64bit for the value */ 3141 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; 3142 3143 rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size); 3144 if (rc) { 3145 netif_err(adapter, drv, adapter->netdev, 3146 "Cannot allocate debug area\n"); 3147 return; 3148 } 3149 3150 rc = ena_com_set_host_attributes(adapter->ena_dev); 3151 if (rc) { 3152 if (rc == -EOPNOTSUPP) 3153 netif_warn(adapter, drv, adapter->netdev, 3154 "Cannot set host attributes\n"); 3155 else 3156 netif_err(adapter, drv, adapter->netdev, 3157 "Cannot set host attributes\n"); 3158 goto err; 3159 } 3160 3161 return; 3162err: 3163 ena_com_delete_debug_area(adapter->ena_dev); 3164} 3165 3166int ena_update_hw_stats(struct ena_adapter *adapter) 3167{ 3168 int rc = 0; 3169 3170 rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_stats); 3171 if (rc) { 3172 dev_info_once(&adapter->pdev->dev, "Failed to get ENI stats\n"); 3173 return rc; 3174 } 3175 3176 return 0; 3177} 3178 3179static void ena_get_stats64(struct net_device *netdev, 3180 struct rtnl_link_stats64 *stats) 3181{ 3182 struct ena_adapter *adapter = netdev_priv(netdev); 3183 struct ena_ring *rx_ring, *tx_ring; 3184 unsigned int start; 3185 u64 rx_drops; 3186 u64 tx_drops; 3187 int i; 3188 3189 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 3190 return; 3191 3192 for (i = 0; i < adapter->num_io_queues; i++) { 3193 u64 bytes, packets; 3194 3195 tx_ring = &adapter->tx_ring[i]; 3196 3197 do { 3198 start = u64_stats_fetch_begin_irq(&tx_ring->syncp); 3199 packets = tx_ring->tx_stats.cnt; 3200 bytes = tx_ring->tx_stats.bytes; 3201 } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start)); 3202 3203 stats->tx_packets += packets; 3204 stats->tx_bytes += bytes; 3205 3206 rx_ring = &adapter->rx_ring[i]; 3207 3208 do { 3209 start = u64_stats_fetch_begin_irq(&rx_ring->syncp); 3210 packets = rx_ring->rx_stats.cnt; 3211 bytes = rx_ring->rx_stats.bytes; 3212 } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start)); 3213 3214 stats->rx_packets += packets; 3215 stats->rx_bytes += bytes; 3216 } 3217 3218 do { 3219 start = u64_stats_fetch_begin_irq(&adapter->syncp); 3220 rx_drops = adapter->dev_stats.rx_drops; 3221 tx_drops = adapter->dev_stats.tx_drops; 3222 } while (u64_stats_fetch_retry_irq(&adapter->syncp, start)); 3223 3224 stats->rx_dropped = rx_drops; 3225 stats->tx_dropped = tx_drops; 3226 3227 stats->multicast = 0; 3228 stats->collisions = 0; 3229 3230 stats->rx_length_errors = 0; 3231 stats->rx_crc_errors = 0; 3232 stats->rx_frame_errors = 0; 3233 stats->rx_fifo_errors = 0; 3234 stats->rx_missed_errors = 0; 3235 stats->tx_window_errors = 0; 3236 3237 stats->rx_errors = 0; 3238 stats->tx_errors = 0; 3239} 3240 3241static const struct net_device_ops ena_netdev_ops = { 3242 .ndo_open = ena_open, 3243 .ndo_stop = ena_close, 3244 .ndo_start_xmit = ena_start_xmit, 3245 .ndo_select_queue = ena_select_queue, 3246 .ndo_get_stats64 = ena_get_stats64, 3247 .ndo_tx_timeout = ena_tx_timeout, 3248 .ndo_change_mtu = ena_change_mtu, 3249 .ndo_set_mac_address = NULL, 3250 .ndo_validate_addr = eth_validate_addr, 3251 .ndo_bpf = ena_xdp, 3252}; 3253 3254static int ena_device_validate_params(struct ena_adapter *adapter, 3255 struct ena_com_dev_get_features_ctx *get_feat_ctx) 3256{ 3257 struct net_device *netdev = adapter->netdev; 3258 int rc; 3259 3260 rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr, 3261 adapter->mac_addr); 3262 if (!rc) { 3263 netif_err(adapter, drv, netdev, 3264 "Error, mac address are different\n"); 3265 return -EINVAL; 3266 } 3267 3268 if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) { 3269 netif_err(adapter, drv, netdev, 3270 "Error, device max mtu is smaller than netdev MTU\n"); 3271 return -EINVAL; 3272 } 3273 3274 return 0; 3275} 3276 3277static void set_default_llq_configurations(struct ena_llq_configurations *llq_config) 3278{ 3279 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; 3280 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY; 3281 llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2; 3282 llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B; 3283 llq_config->llq_ring_entry_size_value = 128; 3284} 3285 3286static int ena_set_queues_placement_policy(struct pci_dev *pdev, 3287 struct ena_com_dev *ena_dev, 3288 struct ena_admin_feature_llq_desc *llq, 3289 struct ena_llq_configurations *llq_default_configurations) 3290{ 3291 int rc; 3292 u32 llq_feature_mask; 3293 3294 llq_feature_mask = 1 << ENA_ADMIN_LLQ; 3295 if (!(ena_dev->supported_features & llq_feature_mask)) { 3296 dev_err(&pdev->dev, 3297 "LLQ is not supported Fallback to host mode policy.\n"); 3298 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 3299 return 0; 3300 } 3301 3302 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations); 3303 if (unlikely(rc)) { 3304 dev_err(&pdev->dev, 3305 "Failed to configure the device mode. Fallback to host mode policy.\n"); 3306 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 3307 } 3308 3309 return 0; 3310} 3311 3312static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev, 3313 int bars) 3314{ 3315 bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR)); 3316 3317 if (!has_mem_bar) { 3318 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 3319 dev_err(&pdev->dev, 3320 "ENA device does not expose LLQ bar. Fallback to host mode policy.\n"); 3321 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; 3322 } 3323 3324 return 0; 3325 } 3326 3327 ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev, 3328 pci_resource_start(pdev, ENA_MEM_BAR), 3329 pci_resource_len(pdev, ENA_MEM_BAR)); 3330 3331 if (!ena_dev->mem_bar) 3332 return -EFAULT; 3333 3334 return 0; 3335} 3336 3337static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, 3338 struct ena_com_dev_get_features_ctx *get_feat_ctx, 3339 bool *wd_state) 3340{ 3341 struct ena_llq_configurations llq_config; 3342 struct device *dev = &pdev->dev; 3343 bool readless_supported; 3344 u32 aenq_groups; 3345 int dma_width; 3346 int rc; 3347 3348 rc = ena_com_mmio_reg_read_request_init(ena_dev); 3349 if (rc) { 3350 dev_err(dev, "Failed to init mmio read less\n"); 3351 return rc; 3352 } 3353 3354 /* The PCIe configuration space revision id indicate if mmio reg 3355 * read is disabled 3356 */ 3357 readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ); 3358 ena_com_set_mmio_read_mode(ena_dev, readless_supported); 3359 3360 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL); 3361 if (rc) { 3362 dev_err(dev, "Can not reset device\n"); 3363 goto err_mmio_read_less; 3364 } 3365 3366 rc = ena_com_validate_version(ena_dev); 3367 if (rc) { 3368 dev_err(dev, "Device version is too low\n"); 3369 goto err_mmio_read_less; 3370 } 3371 3372 dma_width = ena_com_get_dma_width(ena_dev); 3373 if (dma_width < 0) { 3374 dev_err(dev, "Invalid dma width value %d", dma_width); 3375 rc = dma_width; 3376 goto err_mmio_read_less; 3377 } 3378 3379 rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width)); 3380 if (rc) { 3381 dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc); 3382 goto err_mmio_read_less; 3383 } 3384 3385 /* ENA admin level init */ 3386 rc = ena_com_admin_init(ena_dev, &aenq_handlers); 3387 if (rc) { 3388 dev_err(dev, 3389 "Can not initialize ena admin queue with device\n"); 3390 goto err_mmio_read_less; 3391 } 3392 3393 /* To enable the msix interrupts the driver needs to know the number 3394 * of queues. So the driver uses polling mode to retrieve this 3395 * information 3396 */ 3397 ena_com_set_admin_polling_mode(ena_dev, true); 3398 3399 ena_config_host_info(ena_dev, pdev); 3400 3401 /* Get Device Attributes*/ 3402 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); 3403 if (rc) { 3404 dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc); 3405 goto err_admin_init; 3406 } 3407 3408 /* Try to turn all the available aenq groups */ 3409 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | 3410 BIT(ENA_ADMIN_FATAL_ERROR) | 3411 BIT(ENA_ADMIN_WARNING) | 3412 BIT(ENA_ADMIN_NOTIFICATION) | 3413 BIT(ENA_ADMIN_KEEP_ALIVE); 3414 3415 aenq_groups &= get_feat_ctx->aenq.supported_groups; 3416 3417 rc = ena_com_set_aenq_config(ena_dev, aenq_groups); 3418 if (rc) { 3419 dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc); 3420 goto err_admin_init; 3421 } 3422 3423 *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); 3424 3425 set_default_llq_configurations(&llq_config); 3426 3427 rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq, 3428 &llq_config); 3429 if (rc) { 3430 dev_err(dev, "ENA device init failed\n"); 3431 goto err_admin_init; 3432 } 3433 3434 return 0; 3435 3436err_admin_init: 3437 ena_com_delete_host_info(ena_dev); 3438 ena_com_admin_destroy(ena_dev); 3439err_mmio_read_less: 3440 ena_com_mmio_reg_read_request_destroy(ena_dev); 3441 3442 return rc; 3443} 3444 3445static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter) 3446{ 3447 struct ena_com_dev *ena_dev = adapter->ena_dev; 3448 struct device *dev = &adapter->pdev->dev; 3449 int rc; 3450 3451 rc = ena_enable_msix(adapter); 3452 if (rc) { 3453 dev_err(dev, "Can not reserve msix vectors\n"); 3454 return rc; 3455 } 3456 3457 ena_setup_mgmnt_intr(adapter); 3458 3459 rc = ena_request_mgmnt_irq(adapter); 3460 if (rc) { 3461 dev_err(dev, "Can not setup management interrupts\n"); 3462 goto err_disable_msix; 3463 } 3464 3465 ena_com_set_admin_polling_mode(ena_dev, false); 3466 3467 ena_com_admin_aenq_enable(ena_dev); 3468 3469 return 0; 3470 3471err_disable_msix: 3472 ena_disable_msix(adapter); 3473 3474 return rc; 3475} 3476 3477static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) 3478{ 3479 struct net_device *netdev = adapter->netdev; 3480 struct ena_com_dev *ena_dev = adapter->ena_dev; 3481 bool dev_up; 3482 3483 if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) 3484 return; 3485 3486 netif_carrier_off(netdev); 3487 3488 del_timer_sync(&adapter->timer_service); 3489 3490 dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); 3491 adapter->dev_up_before_reset = dev_up; 3492 if (!graceful) 3493 ena_com_set_admin_running_state(ena_dev, false); 3494 3495 if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 3496 ena_down(adapter); 3497 3498 /* Stop the device from sending AENQ events (in case reset flag is set 3499 * and device is up, ena_down() already reset the device. 3500 */ 3501 if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) 3502 ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); 3503 3504 ena_free_mgmnt_irq(adapter); 3505 3506 ena_disable_msix(adapter); 3507 3508 ena_com_abort_admin_commands(ena_dev); 3509 3510 ena_com_wait_for_abort_completion(ena_dev); 3511 3512 ena_com_admin_destroy(ena_dev); 3513 3514 ena_com_mmio_reg_read_request_destroy(ena_dev); 3515 3516 /* return reset reason to default value */ 3517 adapter->reset_reason = ENA_REGS_RESET_NORMAL; 3518 3519 clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3520 clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); 3521} 3522 3523static int ena_restore_device(struct ena_adapter *adapter) 3524{ 3525 struct ena_com_dev_get_features_ctx get_feat_ctx; 3526 struct ena_com_dev *ena_dev = adapter->ena_dev; 3527 struct pci_dev *pdev = adapter->pdev; 3528 bool wd_state; 3529 int rc; 3530 3531 set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); 3532 rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state); 3533 if (rc) { 3534 dev_err(&pdev->dev, "Can not initialize device\n"); 3535 goto err; 3536 } 3537 adapter->wd_state = wd_state; 3538 3539 rc = ena_device_validate_params(adapter, &get_feat_ctx); 3540 if (rc) { 3541 dev_err(&pdev->dev, "Validation of device parameters failed\n"); 3542 goto err_device_destroy; 3543 } 3544 3545 rc = ena_enable_msix_and_set_admin_interrupts(adapter); 3546 if (rc) { 3547 dev_err(&pdev->dev, "Enable MSI-X failed\n"); 3548 goto err_device_destroy; 3549 } 3550 /* If the interface was up before the reset bring it up */ 3551 if (adapter->dev_up_before_reset) { 3552 rc = ena_up(adapter); 3553 if (rc) { 3554 dev_err(&pdev->dev, "Failed to create I/O queues\n"); 3555 goto err_disable_msix; 3556 } 3557 } 3558 3559 set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); 3560 3561 clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); 3562 if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) 3563 netif_carrier_on(adapter->netdev); 3564 3565 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); 3566 adapter->last_keep_alive_jiffies = jiffies; 3567 3568 dev_err(&pdev->dev, "Device reset completed successfully\n"); 3569 3570 return rc; 3571err_disable_msix: 3572 ena_free_mgmnt_irq(adapter); 3573 ena_disable_msix(adapter); 3574err_device_destroy: 3575 ena_com_abort_admin_commands(ena_dev); 3576 ena_com_wait_for_abort_completion(ena_dev); 3577 ena_com_admin_destroy(ena_dev); 3578 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE); 3579 ena_com_mmio_reg_read_request_destroy(ena_dev); 3580err: 3581 clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); 3582 clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); 3583 dev_err(&pdev->dev, 3584 "Reset attempt failed. Can not reset the device\n"); 3585 3586 return rc; 3587} 3588 3589static void ena_fw_reset_device(struct work_struct *work) 3590{ 3591 struct ena_adapter *adapter = 3592 container_of(work, struct ena_adapter, reset_task); 3593 3594 rtnl_lock(); 3595 3596 if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { 3597 ena_destroy_device(adapter, false); 3598 ena_restore_device(adapter); 3599 } 3600 3601 rtnl_unlock(); 3602} 3603 3604static int check_for_rx_interrupt_queue(struct ena_adapter *adapter, 3605 struct ena_ring *rx_ring) 3606{ 3607 if (likely(rx_ring->first_interrupt)) 3608 return 0; 3609 3610 if (ena_com_cq_empty(rx_ring->ena_com_io_cq)) 3611 return 0; 3612 3613 rx_ring->no_interrupt_event_cnt++; 3614 3615 if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) { 3616 netif_err(adapter, rx_err, adapter->netdev, 3617 "Potential MSIX issue on Rx side Queue = %d. Reset the device\n", 3618 rx_ring->qid); 3619 adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT; 3620 smp_mb__before_atomic(); 3621 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3622 return -EIO; 3623 } 3624 3625 return 0; 3626} 3627 3628static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter, 3629 struct ena_ring *tx_ring) 3630{ 3631 struct ena_tx_buffer *tx_buf; 3632 unsigned long last_jiffies; 3633 u32 missed_tx = 0; 3634 int i, rc = 0; 3635 3636 for (i = 0; i < tx_ring->ring_size; i++) { 3637 tx_buf = &tx_ring->tx_buffer_info[i]; 3638 last_jiffies = tx_buf->last_jiffies; 3639 3640 if (last_jiffies == 0) 3641 /* no pending Tx at this location */ 3642 continue; 3643 3644 if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies + 3645 2 * adapter->missing_tx_completion_to))) { 3646 /* If after graceful period interrupt is still not 3647 * received, we schedule a reset 3648 */ 3649 netif_err(adapter, tx_err, adapter->netdev, 3650 "Potential MSIX issue on Tx side Queue = %d. Reset the device\n", 3651 tx_ring->qid); 3652 adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT; 3653 smp_mb__before_atomic(); 3654 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3655 return -EIO; 3656 } 3657 3658 if (unlikely(time_is_before_jiffies(last_jiffies + 3659 adapter->missing_tx_completion_to))) { 3660 if (!tx_buf->print_once) 3661 netif_notice(adapter, tx_err, adapter->netdev, 3662 "Found a Tx that wasn't completed on time, qid %d, index %d.\n", 3663 tx_ring->qid, i); 3664 3665 tx_buf->print_once = 1; 3666 missed_tx++; 3667 } 3668 } 3669 3670 if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) { 3671 netif_err(adapter, tx_err, adapter->netdev, 3672 "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", 3673 missed_tx, 3674 adapter->missing_tx_completion_threshold); 3675 adapter->reset_reason = 3676 ENA_REGS_RESET_MISS_TX_CMPL; 3677 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3678 rc = -EIO; 3679 } 3680 3681 u64_stats_update_begin(&tx_ring->syncp); 3682 tx_ring->tx_stats.missed_tx += missed_tx; 3683 u64_stats_update_end(&tx_ring->syncp); 3684 3685 return rc; 3686} 3687 3688static void check_for_missing_completions(struct ena_adapter *adapter) 3689{ 3690 struct ena_ring *tx_ring; 3691 struct ena_ring *rx_ring; 3692 int i, budget, rc; 3693 int io_queue_count; 3694 3695 io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues; 3696 /* Make sure the driver doesn't turn the device in other process */ 3697 smp_rmb(); 3698 3699 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 3700 return; 3701 3702 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) 3703 return; 3704 3705 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT) 3706 return; 3707 3708 budget = ENA_MONITORED_TX_QUEUES; 3709 3710 for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) { 3711 tx_ring = &adapter->tx_ring[i]; 3712 rx_ring = &adapter->rx_ring[i]; 3713 3714 rc = check_missing_comp_in_tx_queue(adapter, tx_ring); 3715 if (unlikely(rc)) 3716 return; 3717 3718 rc = !ENA_IS_XDP_INDEX(adapter, i) ? 3719 check_for_rx_interrupt_queue(adapter, rx_ring) : 0; 3720 if (unlikely(rc)) 3721 return; 3722 3723 budget--; 3724 if (!budget) 3725 break; 3726 } 3727 3728 adapter->last_monitored_tx_qid = i % io_queue_count; 3729} 3730 3731/* trigger napi schedule after 2 consecutive detections */ 3732#define EMPTY_RX_REFILL 2 3733/* For the rare case where the device runs out of Rx descriptors and the 3734 * napi handler failed to refill new Rx descriptors (due to a lack of memory 3735 * for example). 3736 * This case will lead to a deadlock: 3737 * The device won't send interrupts since all the new Rx packets will be dropped 3738 * The napi handler won't allocate new Rx descriptors so the device will be 3739 * able to send new packets. 3740 * 3741 * This scenario can happen when the kernel's vm.min_free_kbytes is too small. 3742 * It is recommended to have at least 512MB, with a minimum of 128MB for 3743 * constrained environment). 3744 * 3745 * When such a situation is detected - Reschedule napi 3746 */ 3747static void check_for_empty_rx_ring(struct ena_adapter *adapter) 3748{ 3749 struct ena_ring *rx_ring; 3750 int i, refill_required; 3751 3752 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) 3753 return; 3754 3755 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) 3756 return; 3757 3758 for (i = 0; i < adapter->num_io_queues; i++) { 3759 rx_ring = &adapter->rx_ring[i]; 3760 3761 refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq); 3762 if (unlikely(refill_required == (rx_ring->ring_size - 1))) { 3763 rx_ring->empty_rx_queue++; 3764 3765 if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { 3766 u64_stats_update_begin(&rx_ring->syncp); 3767 rx_ring->rx_stats.empty_rx_ring++; 3768 u64_stats_update_end(&rx_ring->syncp); 3769 3770 netif_err(adapter, drv, adapter->netdev, 3771 "Trigger refill for ring %d\n", i); 3772 3773 napi_schedule(rx_ring->napi); 3774 rx_ring->empty_rx_queue = 0; 3775 } 3776 } else { 3777 rx_ring->empty_rx_queue = 0; 3778 } 3779 } 3780} 3781 3782/* Check for keep alive expiration */ 3783static void check_for_missing_keep_alive(struct ena_adapter *adapter) 3784{ 3785 unsigned long keep_alive_expired; 3786 3787 if (!adapter->wd_state) 3788 return; 3789 3790 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3791 return; 3792 3793 keep_alive_expired = adapter->last_keep_alive_jiffies + 3794 adapter->keep_alive_timeout; 3795 if (unlikely(time_is_before_jiffies(keep_alive_expired))) { 3796 netif_err(adapter, drv, adapter->netdev, 3797 "Keep alive watchdog timeout.\n"); 3798 u64_stats_update_begin(&adapter->syncp); 3799 adapter->dev_stats.wd_expired++; 3800 u64_stats_update_end(&adapter->syncp); 3801 adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO; 3802 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3803 } 3804} 3805 3806static void check_for_admin_com_state(struct ena_adapter *adapter) 3807{ 3808 if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) { 3809 netif_err(adapter, drv, adapter->netdev, 3810 "ENA admin queue is not in running state!\n"); 3811 u64_stats_update_begin(&adapter->syncp); 3812 adapter->dev_stats.admin_q_pause++; 3813 u64_stats_update_end(&adapter->syncp); 3814 adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO; 3815 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 3816 } 3817} 3818 3819static void ena_update_hints(struct ena_adapter *adapter, 3820 struct ena_admin_ena_hw_hints *hints) 3821{ 3822 struct net_device *netdev = adapter->netdev; 3823 3824 if (hints->admin_completion_tx_timeout) 3825 adapter->ena_dev->admin_queue.completion_timeout = 3826 hints->admin_completion_tx_timeout * 1000; 3827 3828 if (hints->mmio_read_timeout) 3829 /* convert to usec */ 3830 adapter->ena_dev->mmio_read.reg_read_to = 3831 hints->mmio_read_timeout * 1000; 3832 3833 if (hints->missed_tx_completion_count_threshold_to_reset) 3834 adapter->missing_tx_completion_threshold = 3835 hints->missed_tx_completion_count_threshold_to_reset; 3836 3837 if (hints->missing_tx_completion_timeout) { 3838 if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3839 adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT; 3840 else 3841 adapter->missing_tx_completion_to = 3842 msecs_to_jiffies(hints->missing_tx_completion_timeout); 3843 } 3844 3845 if (hints->netdev_wd_timeout) 3846 netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout); 3847 3848 if (hints->driver_watchdog_timeout) { 3849 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT) 3850 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT; 3851 else 3852 adapter->keep_alive_timeout = 3853 msecs_to_jiffies(hints->driver_watchdog_timeout); 3854 } 3855} 3856 3857static void ena_update_host_info(struct ena_admin_host_info *host_info, 3858 struct net_device *netdev) 3859{ 3860 host_info->supported_network_features[0] = 3861 netdev->features & GENMASK_ULL(31, 0); 3862 host_info->supported_network_features[1] = 3863 (netdev->features & GENMASK_ULL(63, 32)) >> 32; 3864} 3865 3866static void ena_timer_service(struct timer_list *t) 3867{ 3868 struct ena_adapter *adapter = from_timer(adapter, t, timer_service); 3869 u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr; 3870 struct ena_admin_host_info *host_info = 3871 adapter->ena_dev->host_attr.host_info; 3872 3873 check_for_missing_keep_alive(adapter); 3874 3875 check_for_admin_com_state(adapter); 3876 3877 check_for_missing_completions(adapter); 3878 3879 check_for_empty_rx_ring(adapter); 3880 3881 if (debug_area) 3882 ena_dump_stats_to_buf(adapter, debug_area); 3883 3884 if (host_info) 3885 ena_update_host_info(host_info, adapter->netdev); 3886 3887 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { 3888 netif_err(adapter, drv, adapter->netdev, 3889 "Trigger reset is on\n"); 3890 ena_dump_stats_to_dmesg(adapter); 3891 queue_work(ena_wq, &adapter->reset_task); 3892 return; 3893 } 3894 3895 /* Reset the timer */ 3896 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); 3897} 3898 3899static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev, 3900 struct ena_com_dev *ena_dev, 3901 struct ena_com_dev_get_features_ctx *get_feat_ctx) 3902{ 3903 u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; 3904 3905 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 3906 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 3907 &get_feat_ctx->max_queue_ext.max_queue_ext; 3908 io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num, 3909 max_queue_ext->max_rx_cq_num); 3910 3911 io_tx_sq_num = max_queue_ext->max_tx_sq_num; 3912 io_tx_cq_num = max_queue_ext->max_tx_cq_num; 3913 } else { 3914 struct ena_admin_queue_feature_desc *max_queues = 3915 &get_feat_ctx->max_queues; 3916 io_tx_sq_num = max_queues->max_sq_num; 3917 io_tx_cq_num = max_queues->max_cq_num; 3918 io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num); 3919 } 3920 3921 /* In case of LLQ use the llq fields for the tx SQ/CQ */ 3922 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 3923 io_tx_sq_num = get_feat_ctx->llq.max_llq_num; 3924 3925 max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); 3926 max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num); 3927 max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num); 3928 max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num); 3929 /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ 3930 max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1); 3931 3932 return max_num_io_queues; 3933} 3934 3935static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat, 3936 struct net_device *netdev) 3937{ 3938 netdev_features_t dev_features = 0; 3939 3940 /* Set offload features */ 3941 if (feat->offload.tx & 3942 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) 3943 dev_features |= NETIF_F_IP_CSUM; 3944 3945 if (feat->offload.tx & 3946 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) 3947 dev_features |= NETIF_F_IPV6_CSUM; 3948 3949 if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) 3950 dev_features |= NETIF_F_TSO; 3951 3952 if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) 3953 dev_features |= NETIF_F_TSO6; 3954 3955 if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK) 3956 dev_features |= NETIF_F_TSO_ECN; 3957 3958 if (feat->offload.rx_supported & 3959 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) 3960 dev_features |= NETIF_F_RXCSUM; 3961 3962 if (feat->offload.rx_supported & 3963 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) 3964 dev_features |= NETIF_F_RXCSUM; 3965 3966 netdev->features = 3967 dev_features | 3968 NETIF_F_SG | 3969 NETIF_F_RXHASH | 3970 NETIF_F_HIGHDMA; 3971 3972 netdev->hw_features |= netdev->features; 3973 netdev->vlan_features |= netdev->features; 3974} 3975 3976static void ena_set_conf_feat_params(struct ena_adapter *adapter, 3977 struct ena_com_dev_get_features_ctx *feat) 3978{ 3979 struct net_device *netdev = adapter->netdev; 3980 3981 /* Copy mac address */ 3982 if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) { 3983 eth_hw_addr_random(netdev); 3984 ether_addr_copy(adapter->mac_addr, netdev->dev_addr); 3985 } else { 3986 ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr); 3987 ether_addr_copy(netdev->dev_addr, adapter->mac_addr); 3988 } 3989 3990 /* Set offload features */ 3991 ena_set_dev_offloads(feat, netdev); 3992 3993 adapter->max_mtu = feat->dev_attr.max_mtu; 3994 netdev->max_mtu = adapter->max_mtu; 3995 netdev->min_mtu = ENA_MIN_MTU; 3996} 3997 3998static int ena_rss_init_default(struct ena_adapter *adapter) 3999{ 4000 struct ena_com_dev *ena_dev = adapter->ena_dev; 4001 struct device *dev = &adapter->pdev->dev; 4002 int rc, i; 4003 u32 val; 4004 4005 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); 4006 if (unlikely(rc)) { 4007 dev_err(dev, "Cannot init indirect table\n"); 4008 goto err_rss_init; 4009 } 4010 4011 for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { 4012 val = ethtool_rxfh_indir_default(i, adapter->num_io_queues); 4013 rc = ena_com_indirect_table_fill_entry(ena_dev, i, 4014 ENA_IO_RXQ_IDX(val)); 4015 if (unlikely(rc && (rc != -EOPNOTSUPP))) { 4016 dev_err(dev, "Cannot fill indirect table\n"); 4017 goto err_fill_indir; 4018 } 4019 } 4020 4021 rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, 4022 ENA_HASH_KEY_SIZE, 0xFFFFFFFF); 4023 if (unlikely(rc && (rc != -EOPNOTSUPP))) { 4024 dev_err(dev, "Cannot fill hash function\n"); 4025 goto err_fill_indir; 4026 } 4027 4028 rc = ena_com_set_default_hash_ctrl(ena_dev); 4029 if (unlikely(rc && (rc != -EOPNOTSUPP))) { 4030 dev_err(dev, "Cannot fill hash control\n"); 4031 goto err_fill_indir; 4032 } 4033 4034 return 0; 4035 4036err_fill_indir: 4037 ena_com_rss_destroy(ena_dev); 4038err_rss_init: 4039 4040 return rc; 4041} 4042 4043static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) 4044{ 4045 int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; 4046 4047 pci_release_selected_regions(pdev, release_bars); 4048} 4049 4050 4051static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx) 4052{ 4053 struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; 4054 struct ena_com_dev *ena_dev = ctx->ena_dev; 4055 u32 tx_queue_size = ENA_DEFAULT_RING_SIZE; 4056 u32 rx_queue_size = ENA_DEFAULT_RING_SIZE; 4057 u32 max_tx_queue_size; 4058 u32 max_rx_queue_size; 4059 4060 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { 4061 struct ena_admin_queue_ext_feature_fields *max_queue_ext = 4062 &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; 4063 max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth, 4064 max_queue_ext->max_rx_sq_depth); 4065 max_tx_queue_size = max_queue_ext->max_tx_cq_depth; 4066 4067 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 4068 max_tx_queue_size = min_t(u32, max_tx_queue_size, 4069 llq->max_llq_depth); 4070 else 4071 max_tx_queue_size = min_t(u32, max_tx_queue_size, 4072 max_queue_ext->max_tx_sq_depth); 4073 4074 ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, 4075 max_queue_ext->max_per_packet_tx_descs); 4076 ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, 4077 max_queue_ext->max_per_packet_rx_descs); 4078 } else { 4079 struct ena_admin_queue_feature_desc *max_queues = 4080 &ctx->get_feat_ctx->max_queues; 4081 max_rx_queue_size = min_t(u32, max_queues->max_cq_depth, 4082 max_queues->max_sq_depth); 4083 max_tx_queue_size = max_queues->max_cq_depth; 4084 4085 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 4086 max_tx_queue_size = min_t(u32, max_tx_queue_size, 4087 llq->max_llq_depth); 4088 else 4089 max_tx_queue_size = min_t(u32, max_tx_queue_size, 4090 max_queues->max_sq_depth); 4091 4092 ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, 4093 max_queues->max_packet_tx_descs); 4094 ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, 4095 max_queues->max_packet_rx_descs); 4096 } 4097 4098 max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size); 4099 max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size); 4100 4101 tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE, 4102 max_tx_queue_size); 4103 rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE, 4104 max_rx_queue_size); 4105 4106 tx_queue_size = rounddown_pow_of_two(tx_queue_size); 4107 rx_queue_size = rounddown_pow_of_two(rx_queue_size); 4108 4109 ctx->max_tx_queue_size = max_tx_queue_size; 4110 ctx->max_rx_queue_size = max_rx_queue_size; 4111 ctx->tx_queue_size = tx_queue_size; 4112 ctx->rx_queue_size = rx_queue_size; 4113 4114 return 0; 4115} 4116 4117/* ena_probe - Device Initialization Routine 4118 * @pdev: PCI device information struct 4119 * @ent: entry in ena_pci_tbl 4120 * 4121 * Returns 0 on success, negative on failure 4122 * 4123 * ena_probe initializes an adapter identified by a pci_dev structure. 4124 * The OS initialization, configuring of the adapter private structure, 4125 * and a hardware reset occur. 4126 */ 4127static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 4128{ 4129 struct ena_calc_queue_size_ctx calc_queue_ctx = {}; 4130 struct ena_com_dev_get_features_ctx get_feat_ctx; 4131 struct ena_com_dev *ena_dev = NULL; 4132 struct ena_adapter *adapter; 4133 struct net_device *netdev; 4134 static int adapters_found; 4135 u32 max_num_io_queues; 4136 bool wd_state; 4137 int bars, rc; 4138 4139 dev_dbg(&pdev->dev, "%s\n", __func__); 4140 4141 rc = pci_enable_device_mem(pdev); 4142 if (rc) { 4143 dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n"); 4144 return rc; 4145 } 4146 4147 rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS)); 4148 if (rc) { 4149 dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc); 4150 goto err_disable_device; 4151 } 4152 4153 pci_set_master(pdev); 4154 4155 ena_dev = vzalloc(sizeof(*ena_dev)); 4156 if (!ena_dev) { 4157 rc = -ENOMEM; 4158 goto err_disable_device; 4159 } 4160 4161 bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; 4162 rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); 4163 if (rc) { 4164 dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n", 4165 rc); 4166 goto err_free_ena_dev; 4167 } 4168 4169 ena_dev->reg_bar = devm_ioremap(&pdev->dev, 4170 pci_resource_start(pdev, ENA_REG_BAR), 4171 pci_resource_len(pdev, ENA_REG_BAR)); 4172 if (!ena_dev->reg_bar) { 4173 dev_err(&pdev->dev, "Failed to remap regs bar\n"); 4174 rc = -EFAULT; 4175 goto err_free_region; 4176 } 4177 4178 ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US; 4179 4180 ena_dev->dmadev = &pdev->dev; 4181 4182 rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state); 4183 if (rc) { 4184 dev_err(&pdev->dev, "ENA device init failed\n"); 4185 if (rc == -ETIME) 4186 rc = -EPROBE_DEFER; 4187 goto err_free_region; 4188 } 4189 4190 rc = ena_map_llq_mem_bar(pdev, ena_dev, bars); 4191 if (rc) { 4192 dev_err(&pdev->dev, "ENA llq bar mapping failed\n"); 4193 goto err_free_ena_dev; 4194 } 4195 4196 calc_queue_ctx.ena_dev = ena_dev; 4197 calc_queue_ctx.get_feat_ctx = &get_feat_ctx; 4198 calc_queue_ctx.pdev = pdev; 4199 4200 /* Initial TX and RX interrupt delay. Assumes 1 usec granularity. 4201 * Updated during device initialization with the real granularity 4202 */ 4203 ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; 4204 ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS; 4205 ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION; 4206 max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx); 4207 rc = ena_calc_io_queue_size(&calc_queue_ctx); 4208 if (rc || !max_num_io_queues) { 4209 rc = -EFAULT; 4210 goto err_device_destroy; 4211 } 4212 4213 /* dev zeroed in init_etherdev */ 4214 netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), max_num_io_queues); 4215 if (!netdev) { 4216 dev_err(&pdev->dev, "alloc_etherdev_mq failed\n"); 4217 rc = -ENOMEM; 4218 goto err_device_destroy; 4219 } 4220 4221 SET_NETDEV_DEV(netdev, &pdev->dev); 4222 4223 adapter = netdev_priv(netdev); 4224 pci_set_drvdata(pdev, adapter); 4225 4226 adapter->ena_dev = ena_dev; 4227 adapter->netdev = netdev; 4228 adapter->pdev = pdev; 4229 4230 ena_set_conf_feat_params(adapter, &get_feat_ctx); 4231 4232 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 4233 adapter->reset_reason = ENA_REGS_RESET_NORMAL; 4234 4235 adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size; 4236 adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size; 4237 adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; 4238 adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; 4239 adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; 4240 adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; 4241 4242 adapter->num_io_queues = max_num_io_queues; 4243 adapter->max_num_io_queues = max_num_io_queues; 4244 adapter->last_monitored_tx_qid = 0; 4245 4246 adapter->xdp_first_ring = 0; 4247 adapter->xdp_num_queues = 0; 4248 4249 adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; 4250 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) 4251 adapter->disable_meta_caching = 4252 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags & 4253 BIT(ENA_ADMIN_DISABLE_META_CACHING)); 4254 4255 adapter->wd_state = wd_state; 4256 4257 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found); 4258 4259 rc = ena_com_init_interrupt_moderation(adapter->ena_dev); 4260 if (rc) { 4261 dev_err(&pdev->dev, 4262 "Failed to query interrupt moderation feature\n"); 4263 goto err_netdev_destroy; 4264 } 4265 ena_init_io_rings(adapter, 4266 0, 4267 adapter->xdp_num_queues + 4268 adapter->num_io_queues); 4269 4270 netdev->netdev_ops = &ena_netdev_ops; 4271 netdev->watchdog_timeo = TX_TIMEOUT; 4272 ena_set_ethtool_ops(netdev); 4273 4274 netdev->priv_flags |= IFF_UNICAST_FLT; 4275 4276 u64_stats_init(&adapter->syncp); 4277 4278 rc = ena_enable_msix_and_set_admin_interrupts(adapter); 4279 if (rc) { 4280 dev_err(&pdev->dev, 4281 "Failed to enable and set the admin interrupts\n"); 4282 goto err_worker_destroy; 4283 } 4284 rc = ena_rss_init_default(adapter); 4285 if (rc && (rc != -EOPNOTSUPP)) { 4286 dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc); 4287 goto err_free_msix; 4288 } 4289 4290 ena_config_debug_area(adapter); 4291 4292 if (!ena_update_hw_stats(adapter)) 4293 adapter->eni_stats_supported = true; 4294 else 4295 adapter->eni_stats_supported = false; 4296 4297 memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len); 4298 4299 netif_carrier_off(netdev); 4300 4301 rc = register_netdev(netdev); 4302 if (rc) { 4303 dev_err(&pdev->dev, "Cannot register net device\n"); 4304 goto err_rss; 4305 } 4306 4307 INIT_WORK(&adapter->reset_task, ena_fw_reset_device); 4308 4309 adapter->last_keep_alive_jiffies = jiffies; 4310 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT; 4311 adapter->missing_tx_completion_to = TX_TIMEOUT; 4312 adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS; 4313 4314 ena_update_hints(adapter, &get_feat_ctx.hw_hints); 4315 4316 timer_setup(&adapter->timer_service, ena_timer_service, 0); 4317 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); 4318 4319 dev_info(&pdev->dev, 4320 "%s found at mem %lx, mac addr %pM\n", 4321 DEVICE_NAME, (long)pci_resource_start(pdev, 0), 4322 netdev->dev_addr); 4323 4324 set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); 4325 4326 adapters_found++; 4327 4328 return 0; 4329 4330err_rss: 4331 ena_com_delete_debug_area(ena_dev); 4332 ena_com_rss_destroy(ena_dev); 4333err_free_msix: 4334 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR); 4335 /* stop submitting admin commands on a device that was reset */ 4336 ena_com_set_admin_running_state(ena_dev, false); 4337 ena_free_mgmnt_irq(adapter); 4338 ena_disable_msix(adapter); 4339err_worker_destroy: 4340 del_timer(&adapter->timer_service); 4341err_netdev_destroy: 4342 free_netdev(netdev); 4343err_device_destroy: 4344 ena_com_delete_host_info(ena_dev); 4345 ena_com_admin_destroy(ena_dev); 4346err_free_region: 4347 ena_release_bars(ena_dev, pdev); 4348err_free_ena_dev: 4349 vfree(ena_dev); 4350err_disable_device: 4351 pci_disable_device(pdev); 4352 return rc; 4353} 4354 4355/*****************************************************************************/ 4356 4357/* __ena_shutoff - Helper used in both PCI remove/shutdown routines 4358 * @pdev: PCI device information struct 4359 * @shutdown: Is it a shutdown operation? If false, means it is a removal 4360 * 4361 * __ena_shutoff is a helper routine that does the real work on shutdown and 4362 * removal paths; the difference between those paths is with regards to whether 4363 * dettach or unregister the netdevice. 4364 */ 4365static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) 4366{ 4367 struct ena_adapter *adapter = pci_get_drvdata(pdev); 4368 struct ena_com_dev *ena_dev; 4369 struct net_device *netdev; 4370 4371 ena_dev = adapter->ena_dev; 4372 netdev = adapter->netdev; 4373 4374#ifdef CONFIG_RFS_ACCEL 4375 if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { 4376 free_irq_cpu_rmap(netdev->rx_cpu_rmap); 4377 netdev->rx_cpu_rmap = NULL; 4378 } 4379#endif /* CONFIG_RFS_ACCEL */ 4380 4381 /* Make sure timer and reset routine won't be called after 4382 * freeing device resources. 4383 */ 4384 del_timer_sync(&adapter->timer_service); 4385 cancel_work_sync(&adapter->reset_task); 4386 4387 rtnl_lock(); /* lock released inside the below if-else block */ 4388 adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN; 4389 ena_destroy_device(adapter, true); 4390 if (shutdown) { 4391 netif_device_detach(netdev); 4392 dev_close(netdev); 4393 rtnl_unlock(); 4394 } else { 4395 rtnl_unlock(); 4396 unregister_netdev(netdev); 4397 free_netdev(netdev); 4398 } 4399 4400 ena_com_rss_destroy(ena_dev); 4401 4402 ena_com_delete_debug_area(ena_dev); 4403 4404 ena_com_delete_host_info(ena_dev); 4405 4406 ena_release_bars(ena_dev, pdev); 4407 4408 pci_disable_device(pdev); 4409 4410 vfree(ena_dev); 4411} 4412 4413/* ena_remove - Device Removal Routine 4414 * @pdev: PCI device information struct 4415 * 4416 * ena_remove is called by the PCI subsystem to alert the driver 4417 * that it should release a PCI device. 4418 */ 4419 4420static void ena_remove(struct pci_dev *pdev) 4421{ 4422 __ena_shutoff(pdev, false); 4423} 4424 4425/* ena_shutdown - Device Shutdown Routine 4426 * @pdev: PCI device information struct 4427 * 4428 * ena_shutdown is called by the PCI subsystem to alert the driver that 4429 * a shutdown/reboot (or kexec) is happening and device must be disabled. 4430 */ 4431 4432static void ena_shutdown(struct pci_dev *pdev) 4433{ 4434 __ena_shutoff(pdev, true); 4435} 4436 4437/* ena_suspend - PM suspend callback 4438 * @dev_d: Device information struct 4439 */ 4440static int __maybe_unused ena_suspend(struct device *dev_d) 4441{ 4442 struct pci_dev *pdev = to_pci_dev(dev_d); 4443 struct ena_adapter *adapter = pci_get_drvdata(pdev); 4444 4445 u64_stats_update_begin(&adapter->syncp); 4446 adapter->dev_stats.suspend++; 4447 u64_stats_update_end(&adapter->syncp); 4448 4449 rtnl_lock(); 4450 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { 4451 dev_err(&pdev->dev, 4452 "Ignoring device reset request as the device is being suspended\n"); 4453 clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); 4454 } 4455 ena_destroy_device(adapter, true); 4456 rtnl_unlock(); 4457 return 0; 4458} 4459 4460/* ena_resume - PM resume callback 4461 * @dev_d: Device information struct 4462 */ 4463static int __maybe_unused ena_resume(struct device *dev_d) 4464{ 4465 struct ena_adapter *adapter = dev_get_drvdata(dev_d); 4466 int rc; 4467 4468 u64_stats_update_begin(&adapter->syncp); 4469 adapter->dev_stats.resume++; 4470 u64_stats_update_end(&adapter->syncp); 4471 4472 rtnl_lock(); 4473 rc = ena_restore_device(adapter); 4474 rtnl_unlock(); 4475 return rc; 4476} 4477 4478static SIMPLE_DEV_PM_OPS(ena_pm_ops, ena_suspend, ena_resume); 4479 4480static struct pci_driver ena_pci_driver = { 4481 .name = DRV_MODULE_NAME, 4482 .id_table = ena_pci_tbl, 4483 .probe = ena_probe, 4484 .remove = ena_remove, 4485 .shutdown = ena_shutdown, 4486 .driver.pm = &ena_pm_ops, 4487 .sriov_configure = pci_sriov_configure_simple, 4488}; 4489 4490static int __init ena_init(void) 4491{ 4492 int ret; 4493 4494 ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME); 4495 if (!ena_wq) { 4496 pr_err("Failed to create workqueue\n"); 4497 return -ENOMEM; 4498 } 4499 4500 ret = pci_register_driver(&ena_pci_driver); 4501 if (ret) 4502 destroy_workqueue(ena_wq); 4503 4504 return ret; 4505} 4506 4507static void __exit ena_cleanup(void) 4508{ 4509 pci_unregister_driver(&ena_pci_driver); 4510 4511 if (ena_wq) { 4512 destroy_workqueue(ena_wq); 4513 ena_wq = NULL; 4514 } 4515} 4516 4517/****************************************************************************** 4518 ******************************** AENQ Handlers ******************************* 4519 *****************************************************************************/ 4520/* ena_update_on_link_change: 4521 * Notify the network interface about the change in link status 4522 */ 4523static void ena_update_on_link_change(void *adapter_data, 4524 struct ena_admin_aenq_entry *aenq_e) 4525{ 4526 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4527 struct ena_admin_aenq_link_change_desc *aenq_desc = 4528 (struct ena_admin_aenq_link_change_desc *)aenq_e; 4529 int status = aenq_desc->flags & 4530 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK; 4531 4532 if (status) { 4533 netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__); 4534 set_bit(ENA_FLAG_LINK_UP, &adapter->flags); 4535 if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags)) 4536 netif_carrier_on(adapter->netdev); 4537 } else { 4538 clear_bit(ENA_FLAG_LINK_UP, &adapter->flags); 4539 netif_carrier_off(adapter->netdev); 4540 } 4541} 4542 4543static void ena_keep_alive_wd(void *adapter_data, 4544 struct ena_admin_aenq_entry *aenq_e) 4545{ 4546 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4547 struct ena_admin_aenq_keep_alive_desc *desc; 4548 u64 rx_drops; 4549 u64 tx_drops; 4550 4551 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e; 4552 adapter->last_keep_alive_jiffies = jiffies; 4553 4554 rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low; 4555 tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low; 4556 4557 u64_stats_update_begin(&adapter->syncp); 4558 /* These stats are accumulated by the device, so the counters indicate 4559 * all drops since last reset. 4560 */ 4561 adapter->dev_stats.rx_drops = rx_drops; 4562 adapter->dev_stats.tx_drops = tx_drops; 4563 u64_stats_update_end(&adapter->syncp); 4564} 4565 4566static void ena_notification(void *adapter_data, 4567 struct ena_admin_aenq_entry *aenq_e) 4568{ 4569 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; 4570 struct ena_admin_ena_hw_hints *hints; 4571 4572 WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION, 4573 "Invalid group(%x) expected %x\n", 4574 aenq_e->aenq_common_desc.group, 4575 ENA_ADMIN_NOTIFICATION); 4576 4577 switch (aenq_e->aenq_common_desc.syndrome) { 4578 case ENA_ADMIN_UPDATE_HINTS: 4579 hints = (struct ena_admin_ena_hw_hints *) 4580 (&aenq_e->inline_data_w4); 4581 ena_update_hints(adapter, hints); 4582 break; 4583 default: 4584 netif_err(adapter, drv, adapter->netdev, 4585 "Invalid aenq notification link state %d\n", 4586 aenq_e->aenq_common_desc.syndrome); 4587 } 4588} 4589 4590/* This handler will called for unknown event group or unimplemented handlers*/ 4591static void unimplemented_aenq_handler(void *data, 4592 struct ena_admin_aenq_entry *aenq_e) 4593{ 4594 struct ena_adapter *adapter = (struct ena_adapter *)data; 4595 4596 netif_err(adapter, drv, adapter->netdev, 4597 "Unknown event was received or event with unimplemented handler\n"); 4598} 4599 4600static struct ena_aenq_handlers aenq_handlers = { 4601 .handlers = { 4602 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, 4603 [ENA_ADMIN_NOTIFICATION] = ena_notification, 4604 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd, 4605 }, 4606 .unimplemented_handler = unimplemented_aenq_handler 4607}; 4608 4609module_init(ena_init); 4610module_exit(ena_cleanup); 4611