1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8#ifdef CONFIG_RFS_ACCEL
9#include <linux/cpu_rmap.h>
10#endif /* CONFIG_RFS_ACCEL */
11#include <linux/ethtool.h>
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/numa.h>
15#include <linux/pci.h>
16#include <linux/utsname.h>
17#include <linux/version.h>
18#include <linux/vmalloc.h>
19#include <net/ip.h>
20
21#include "ena_netdev.h"
22#include <linux/bpf_trace.h>
23#include "ena_pci_id_tbl.h"
24
25MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
26MODULE_DESCRIPTION(DEVICE_NAME);
27MODULE_LICENSE("GPL");
28
29/* Time in jiffies before concluding the transmitter is hung. */
30#define TX_TIMEOUT  (5 * HZ)
31
32#define ENA_NAPI_BUDGET 64
33
34#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
35		NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
36static int debug = -1;
37module_param(debug, int, 0);
38MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
39
40static struct ena_aenq_handlers aenq_handlers;
41
42static struct workqueue_struct *ena_wq;
43
44MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
45
46static int ena_rss_init_default(struct ena_adapter *adapter);
47static void check_for_admin_com_state(struct ena_adapter *adapter);
48static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
49static int ena_restore_device(struct ena_adapter *adapter);
50
51static void ena_init_io_rings(struct ena_adapter *adapter,
52			      int first_index, int count);
53static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
54				   int count);
55static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
56				  int count);
57static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
58static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
59					   int first_index,
60					   int count);
61static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
62static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
63static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
64static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
65static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
66static void ena_napi_disable_in_range(struct ena_adapter *adapter,
67				      int first_index, int count);
68static void ena_napi_enable_in_range(struct ena_adapter *adapter,
69				     int first_index, int count);
70static int ena_up(struct ena_adapter *adapter);
71static void ena_down(struct ena_adapter *adapter);
72static void ena_unmask_interrupt(struct ena_ring *tx_ring,
73				 struct ena_ring *rx_ring);
74static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
75				      struct ena_ring *rx_ring);
76static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
77			      struct ena_tx_buffer *tx_info);
78static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
79					    int first_index, int count);
80static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
81						  int first_index, int count);
82
83static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
84{
85	struct ena_adapter *adapter = netdev_priv(dev);
86
87	/* Change the state of the device to trigger reset
88	 * Check that we are not in the middle or a trigger already
89	 */
90
91	if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
92		return;
93
94	adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
95	u64_stats_update_begin(&adapter->syncp);
96	adapter->dev_stats.tx_timeout++;
97	u64_stats_update_end(&adapter->syncp);
98
99	netif_err(adapter, tx_err, dev, "Transmit time out\n");
100}
101
102static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
103{
104	int i;
105
106	for (i = 0; i < adapter->num_io_queues; i++)
107		adapter->rx_ring[i].mtu = mtu;
108}
109
110static int ena_change_mtu(struct net_device *dev, int new_mtu)
111{
112	struct ena_adapter *adapter = netdev_priv(dev);
113	int ret;
114
115	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
116	if (!ret) {
117		netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu);
118		update_rx_ring_mtu(adapter, new_mtu);
119		dev->mtu = new_mtu;
120	} else {
121		netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
122			  new_mtu);
123	}
124
125	return ret;
126}
127
128static int ena_xmit_common(struct net_device *dev,
129			   struct ena_ring *ring,
130			   struct ena_tx_buffer *tx_info,
131			   struct ena_com_tx_ctx *ena_tx_ctx,
132			   u16 next_to_use,
133			   u32 bytes)
134{
135	struct ena_adapter *adapter = netdev_priv(dev);
136	int rc, nb_hw_desc;
137
138	if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
139						ena_tx_ctx))) {
140		netif_dbg(adapter, tx_queued, dev,
141			  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
142			  ring->qid);
143		ena_com_write_sq_doorbell(ring->ena_com_io_sq);
144	}
145
146	/* prepare the packet's descriptors to dma engine */
147	rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
148				&nb_hw_desc);
149
150	/* In case there isn't enough space in the queue for the packet,
151	 * we simply drop it. All other failure reasons of
152	 * ena_com_prepare_tx() are fatal and therefore require a device reset.
153	 */
154	if (unlikely(rc)) {
155		netif_err(adapter, tx_queued, dev,
156			  "Failed to prepare tx bufs\n");
157		u64_stats_update_begin(&ring->syncp);
158		ring->tx_stats.prepare_ctx_err++;
159		u64_stats_update_end(&ring->syncp);
160		if (rc != -ENOMEM) {
161			adapter->reset_reason =
162				ENA_REGS_RESET_DRIVER_INVALID_STATE;
163			set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
164		}
165		return rc;
166	}
167
168	u64_stats_update_begin(&ring->syncp);
169	ring->tx_stats.cnt++;
170	ring->tx_stats.bytes += bytes;
171	u64_stats_update_end(&ring->syncp);
172
173	tx_info->tx_descs = nb_hw_desc;
174	tx_info->last_jiffies = jiffies;
175	tx_info->print_once = 0;
176
177	ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
178						 ring->ring_size);
179	return 0;
180}
181
182/* This is the XDP napi callback. XDP queues use a separate napi callback
183 * than Rx/Tx queues.
184 */
185static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
186{
187	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
188	u32 xdp_work_done, xdp_budget;
189	struct ena_ring *xdp_ring;
190	int napi_comp_call = 0;
191	int ret;
192
193	xdp_ring = ena_napi->xdp_ring;
194	xdp_ring->first_interrupt = ena_napi->first_interrupt;
195
196	xdp_budget = budget;
197
198	if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
199	    test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
200		napi_complete_done(napi, 0);
201		return 0;
202	}
203
204	xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
205
206	/* If the device is about to reset or down, avoid unmask
207	 * the interrupt and return 0 so NAPI won't reschedule
208	 */
209	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
210		napi_complete_done(napi, 0);
211		ret = 0;
212	} else if (xdp_budget > xdp_work_done) {
213		napi_comp_call = 1;
214		if (napi_complete_done(napi, xdp_work_done))
215			ena_unmask_interrupt(xdp_ring, NULL);
216		ena_update_ring_numa_node(xdp_ring, NULL);
217		ret = xdp_work_done;
218	} else {
219		ret = xdp_budget;
220	}
221
222	u64_stats_update_begin(&xdp_ring->syncp);
223	xdp_ring->tx_stats.napi_comp += napi_comp_call;
224	xdp_ring->tx_stats.tx_poll++;
225	u64_stats_update_end(&xdp_ring->syncp);
226
227	return ret;
228}
229
230static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring,
231			       struct ena_tx_buffer *tx_info,
232			       struct xdp_buff *xdp,
233			       void **push_hdr,
234			       u32 *push_len)
235{
236	struct ena_adapter *adapter = xdp_ring->adapter;
237	struct ena_com_buf *ena_buf;
238	dma_addr_t dma = 0;
239	u32 size;
240
241	tx_info->xdpf = xdp_convert_buff_to_frame(xdp);
242	size = tx_info->xdpf->len;
243	ena_buf = tx_info->bufs;
244
245	/* llq push buffer */
246	*push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
247	*push_hdr = tx_info->xdpf->data;
248
249	if (size - *push_len > 0) {
250		dma = dma_map_single(xdp_ring->dev,
251				     *push_hdr + *push_len,
252				     size - *push_len,
253				     DMA_TO_DEVICE);
254		if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
255			goto error_report_dma_error;
256
257		tx_info->map_linear_data = 1;
258		tx_info->num_of_bufs = 1;
259	}
260
261	ena_buf->paddr = dma;
262	ena_buf->len = size;
263
264	return 0;
265
266error_report_dma_error:
267	u64_stats_update_begin(&xdp_ring->syncp);
268	xdp_ring->tx_stats.dma_mapping_err++;
269	u64_stats_update_end(&xdp_ring->syncp);
270	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
271
272	xdp_return_frame_rx_napi(tx_info->xdpf);
273	tx_info->xdpf = NULL;
274	tx_info->num_of_bufs = 0;
275
276	return -EINVAL;
277}
278
279static int ena_xdp_xmit_buff(struct net_device *dev,
280			     struct xdp_buff *xdp,
281			     int qid,
282			     struct ena_rx_buffer *rx_info)
283{
284	struct ena_adapter *adapter = netdev_priv(dev);
285	struct ena_com_tx_ctx ena_tx_ctx = {};
286	struct ena_tx_buffer *tx_info;
287	struct ena_ring *xdp_ring;
288	u16 next_to_use, req_id;
289	int rc;
290	void *push_hdr;
291	u32 push_len;
292
293	xdp_ring = &adapter->tx_ring[qid];
294	next_to_use = xdp_ring->next_to_use;
295	req_id = xdp_ring->free_ids[next_to_use];
296	tx_info = &xdp_ring->tx_buffer_info[req_id];
297	tx_info->num_of_bufs = 0;
298	page_ref_inc(rx_info->page);
299	tx_info->xdp_rx_page = rx_info->page;
300
301	rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len);
302	if (unlikely(rc))
303		goto error_drop_packet;
304
305	ena_tx_ctx.ena_bufs = tx_info->bufs;
306	ena_tx_ctx.push_header = push_hdr;
307	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
308	ena_tx_ctx.req_id = req_id;
309	ena_tx_ctx.header_len = push_len;
310
311	rc = ena_xmit_common(dev,
312			     xdp_ring,
313			     tx_info,
314			     &ena_tx_ctx,
315			     next_to_use,
316			     xdp->data_end - xdp->data);
317	if (rc)
318		goto error_unmap_dma;
319	/* trigger the dma engine. ena_com_write_sq_doorbell()
320	 * has a mb
321	 */
322	ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
323	u64_stats_update_begin(&xdp_ring->syncp);
324	xdp_ring->tx_stats.doorbells++;
325	u64_stats_update_end(&xdp_ring->syncp);
326
327	return NETDEV_TX_OK;
328
329error_unmap_dma:
330	ena_unmap_tx_buff(xdp_ring, tx_info);
331	tx_info->xdpf = NULL;
332error_drop_packet:
333	__free_page(tx_info->xdp_rx_page);
334	return NETDEV_TX_OK;
335}
336
337static int ena_xdp_execute(struct ena_ring *rx_ring,
338			   struct xdp_buff *xdp,
339			   struct ena_rx_buffer *rx_info)
340{
341	struct bpf_prog *xdp_prog;
342	u32 verdict = XDP_PASS;
343	u64 *xdp_stat;
344
345	rcu_read_lock();
346	xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
347
348	if (!xdp_prog)
349		goto out;
350
351	verdict = bpf_prog_run_xdp(xdp_prog, xdp);
352
353	if (verdict == XDP_TX) {
354		ena_xdp_xmit_buff(rx_ring->netdev,
355				  xdp,
356				  rx_ring->qid + rx_ring->adapter->num_io_queues,
357				  rx_info);
358
359		xdp_stat = &rx_ring->rx_stats.xdp_tx;
360	} else if (unlikely(verdict == XDP_ABORTED)) {
361		trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
362		xdp_stat = &rx_ring->rx_stats.xdp_aborted;
363	} else if (unlikely(verdict == XDP_DROP)) {
364		xdp_stat = &rx_ring->rx_stats.xdp_drop;
365	} else if (unlikely(verdict == XDP_PASS)) {
366		xdp_stat = &rx_ring->rx_stats.xdp_pass;
367	} else {
368		bpf_warn_invalid_xdp_action(verdict);
369		xdp_stat = &rx_ring->rx_stats.xdp_invalid;
370	}
371
372	u64_stats_update_begin(&rx_ring->syncp);
373	(*xdp_stat)++;
374	u64_stats_update_end(&rx_ring->syncp);
375out:
376	rcu_read_unlock();
377
378	return verdict;
379}
380
381static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
382{
383	adapter->xdp_first_ring = adapter->num_io_queues;
384	adapter->xdp_num_queues = adapter->num_io_queues;
385
386	ena_init_io_rings(adapter,
387			  adapter->xdp_first_ring,
388			  adapter->xdp_num_queues);
389}
390
391static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
392{
393	u32 xdp_first_ring = adapter->xdp_first_ring;
394	u32 xdp_num_queues = adapter->xdp_num_queues;
395	int rc = 0;
396
397	rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
398	if (rc)
399		goto setup_err;
400
401	rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues);
402	if (rc)
403		goto create_err;
404
405	return 0;
406
407create_err:
408	ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues);
409setup_err:
410	return rc;
411}
412
413/* Provides a way for both kernel and bpf-prog to know
414 * more about the RX-queue a given XDP frame arrived on.
415 */
416static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
417{
418	int rc;
419
420	rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid);
421
422	if (rc) {
423		netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
424			  "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
425			  rx_ring->qid, rc);
426		goto err;
427	}
428
429	rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
430					NULL);
431
432	if (rc) {
433		netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
434			  "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
435			  rx_ring->qid, rc);
436		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
437	}
438
439err:
440	return rc;
441}
442
443static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
444{
445	xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
446	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
447}
448
449static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
450						 struct bpf_prog *prog,
451						 int first, int count)
452{
453	struct ena_ring *rx_ring;
454	int i = 0;
455
456	for (i = first; i < count; i++) {
457		rx_ring = &adapter->rx_ring[i];
458		xchg(&rx_ring->xdp_bpf_prog, prog);
459		if (prog) {
460			ena_xdp_register_rxq_info(rx_ring);
461			rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
462		} else {
463			ena_xdp_unregister_rxq_info(rx_ring);
464			rx_ring->rx_headroom = 0;
465		}
466	}
467}
468
469static void ena_xdp_exchange_program(struct ena_adapter *adapter,
470				     struct bpf_prog *prog)
471{
472	struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
473
474	ena_xdp_exchange_program_rx_in_range(adapter,
475					     prog,
476					     0,
477					     adapter->num_io_queues);
478
479	if (old_bpf_prog)
480		bpf_prog_put(old_bpf_prog);
481}
482
483static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
484{
485	bool was_up;
486	int rc;
487
488	was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
489
490	if (was_up)
491		ena_down(adapter);
492
493	adapter->xdp_first_ring = 0;
494	adapter->xdp_num_queues = 0;
495	ena_xdp_exchange_program(adapter, NULL);
496	if (was_up) {
497		rc = ena_up(adapter);
498		if (rc)
499			return rc;
500	}
501	return 0;
502}
503
504static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
505{
506	struct ena_adapter *adapter = netdev_priv(netdev);
507	struct bpf_prog *prog = bpf->prog;
508	struct bpf_prog *old_bpf_prog;
509	int rc, prev_mtu;
510	bool is_up;
511
512	is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
513	rc = ena_xdp_allowed(adapter);
514	if (rc == ENA_XDP_ALLOWED) {
515		old_bpf_prog = adapter->xdp_bpf_prog;
516		if (prog) {
517			if (!is_up) {
518				ena_init_all_xdp_queues(adapter);
519			} else if (!old_bpf_prog) {
520				ena_down(adapter);
521				ena_init_all_xdp_queues(adapter);
522			}
523			ena_xdp_exchange_program(adapter, prog);
524
525			if (is_up && !old_bpf_prog) {
526				rc = ena_up(adapter);
527				if (rc)
528					return rc;
529			}
530		} else if (old_bpf_prog) {
531			rc = ena_destroy_and_free_all_xdp_queues(adapter);
532			if (rc)
533				return rc;
534		}
535
536		prev_mtu = netdev->max_mtu;
537		netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
538
539		if (!old_bpf_prog)
540			netif_info(adapter, drv, adapter->netdev,
541				   "XDP program is set, changing the max_mtu from %d to %d",
542				   prev_mtu, netdev->max_mtu);
543
544	} else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
545		netif_err(adapter, drv, adapter->netdev,
546			  "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
547			  netdev->mtu, ENA_XDP_MAX_MTU);
548		NL_SET_ERR_MSG_MOD(bpf->extack,
549				   "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
550		return -EINVAL;
551	} else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
552		netif_err(adapter, drv, adapter->netdev,
553			  "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
554			  adapter->num_io_queues, adapter->max_num_io_queues);
555		NL_SET_ERR_MSG_MOD(bpf->extack,
556				   "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
557		return -EINVAL;
558	}
559
560	return 0;
561}
562
563/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
564 * program as well as to query the current xdp program id.
565 */
566static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
567{
568	switch (bpf->command) {
569	case XDP_SETUP_PROG:
570		return ena_xdp_set(netdev, bpf);
571	default:
572		return -EINVAL;
573	}
574	return 0;
575}
576
577static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
578{
579#ifdef CONFIG_RFS_ACCEL
580	u32 i;
581	int rc;
582
583	adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
584	if (!adapter->netdev->rx_cpu_rmap)
585		return -ENOMEM;
586	for (i = 0; i < adapter->num_io_queues; i++) {
587		int irq_idx = ENA_IO_IRQ_IDX(i);
588
589		rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
590				      pci_irq_vector(adapter->pdev, irq_idx));
591		if (rc) {
592			free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
593			adapter->netdev->rx_cpu_rmap = NULL;
594			return rc;
595		}
596	}
597#endif /* CONFIG_RFS_ACCEL */
598	return 0;
599}
600
601static void ena_init_io_rings_common(struct ena_adapter *adapter,
602				     struct ena_ring *ring, u16 qid)
603{
604	ring->qid = qid;
605	ring->pdev = adapter->pdev;
606	ring->dev = &adapter->pdev->dev;
607	ring->netdev = adapter->netdev;
608	ring->napi = &adapter->ena_napi[qid].napi;
609	ring->adapter = adapter;
610	ring->ena_dev = adapter->ena_dev;
611	ring->per_napi_packets = 0;
612	ring->cpu = 0;
613	ring->first_interrupt = false;
614	ring->no_interrupt_event_cnt = 0;
615	u64_stats_init(&ring->syncp);
616}
617
618static void ena_init_io_rings(struct ena_adapter *adapter,
619			      int first_index, int count)
620{
621	struct ena_com_dev *ena_dev;
622	struct ena_ring *txr, *rxr;
623	int i;
624
625	ena_dev = adapter->ena_dev;
626
627	for (i = first_index; i < first_index + count; i++) {
628		txr = &adapter->tx_ring[i];
629		rxr = &adapter->rx_ring[i];
630
631		/* TX common ring state */
632		ena_init_io_rings_common(adapter, txr, i);
633
634		/* TX specific ring state */
635		txr->ring_size = adapter->requested_tx_ring_size;
636		txr->tx_max_header_size = ena_dev->tx_max_header_size;
637		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
638		txr->sgl_size = adapter->max_tx_sgl_size;
639		txr->smoothed_interval =
640			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
641		txr->disable_meta_caching = adapter->disable_meta_caching;
642
643		/* Don't init RX queues for xdp queues */
644		if (!ENA_IS_XDP_INDEX(adapter, i)) {
645			/* RX common ring state */
646			ena_init_io_rings_common(adapter, rxr, i);
647
648			/* RX specific ring state */
649			rxr->ring_size = adapter->requested_rx_ring_size;
650			rxr->rx_copybreak = adapter->rx_copybreak;
651			rxr->sgl_size = adapter->max_rx_sgl_size;
652			rxr->smoothed_interval =
653				ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
654			rxr->empty_rx_queue = 0;
655			adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
656		}
657	}
658}
659
660/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
661 * @adapter: network interface device structure
662 * @qid: queue index
663 *
664 * Return 0 on success, negative on failure
665 */
666static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
667{
668	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
669	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
670	int size, i, node;
671
672	if (tx_ring->tx_buffer_info) {
673		netif_err(adapter, ifup,
674			  adapter->netdev, "tx_buffer_info info is not NULL");
675		return -EEXIST;
676	}
677
678	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
679	node = cpu_to_node(ena_irq->cpu);
680
681	tx_ring->tx_buffer_info = vzalloc_node(size, node);
682	if (!tx_ring->tx_buffer_info) {
683		tx_ring->tx_buffer_info = vzalloc(size);
684		if (!tx_ring->tx_buffer_info)
685			goto err_tx_buffer_info;
686	}
687
688	size = sizeof(u16) * tx_ring->ring_size;
689	tx_ring->free_ids = vzalloc_node(size, node);
690	if (!tx_ring->free_ids) {
691		tx_ring->free_ids = vzalloc(size);
692		if (!tx_ring->free_ids)
693			goto err_tx_free_ids;
694	}
695
696	size = tx_ring->tx_max_header_size;
697	tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
698	if (!tx_ring->push_buf_intermediate_buf) {
699		tx_ring->push_buf_intermediate_buf = vzalloc(size);
700		if (!tx_ring->push_buf_intermediate_buf)
701			goto err_push_buf_intermediate_buf;
702	}
703
704	/* Req id ring for TX out of order completions */
705	for (i = 0; i < tx_ring->ring_size; i++)
706		tx_ring->free_ids[i] = i;
707
708	/* Reset tx statistics */
709	memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
710
711	tx_ring->next_to_use = 0;
712	tx_ring->next_to_clean = 0;
713	tx_ring->cpu = ena_irq->cpu;
714	return 0;
715
716err_push_buf_intermediate_buf:
717	vfree(tx_ring->free_ids);
718	tx_ring->free_ids = NULL;
719err_tx_free_ids:
720	vfree(tx_ring->tx_buffer_info);
721	tx_ring->tx_buffer_info = NULL;
722err_tx_buffer_info:
723	return -ENOMEM;
724}
725
726/* ena_free_tx_resources - Free I/O Tx Resources per Queue
727 * @adapter: network interface device structure
728 * @qid: queue index
729 *
730 * Free all transmit software resources
731 */
732static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
733{
734	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
735
736	vfree(tx_ring->tx_buffer_info);
737	tx_ring->tx_buffer_info = NULL;
738
739	vfree(tx_ring->free_ids);
740	tx_ring->free_ids = NULL;
741
742	vfree(tx_ring->push_buf_intermediate_buf);
743	tx_ring->push_buf_intermediate_buf = NULL;
744}
745
746static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
747					   int first_index,
748					   int count)
749{
750	int i, rc = 0;
751
752	for (i = first_index; i < first_index + count; i++) {
753		rc = ena_setup_tx_resources(adapter, i);
754		if (rc)
755			goto err_setup_tx;
756	}
757
758	return 0;
759
760err_setup_tx:
761
762	netif_err(adapter, ifup, adapter->netdev,
763		  "Tx queue %d: allocation failed\n", i);
764
765	/* rewind the index freeing the rings as we go */
766	while (first_index < i--)
767		ena_free_tx_resources(adapter, i);
768	return rc;
769}
770
771static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
772						  int first_index, int count)
773{
774	int i;
775
776	for (i = first_index; i < first_index + count; i++)
777		ena_free_tx_resources(adapter, i);
778}
779
780/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
781 * @adapter: board private structure
782 *
783 * Free all transmit software resources
784 */
785static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
786{
787	ena_free_all_io_tx_resources_in_range(adapter,
788					      0,
789					      adapter->xdp_num_queues +
790					      adapter->num_io_queues);
791}
792
793/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
794 * @adapter: network interface device structure
795 * @qid: queue index
796 *
797 * Returns 0 on success, negative on failure
798 */
799static int ena_setup_rx_resources(struct ena_adapter *adapter,
800				  u32 qid)
801{
802	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
803	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
804	int size, node, i;
805
806	if (rx_ring->rx_buffer_info) {
807		netif_err(adapter, ifup, adapter->netdev,
808			  "rx_buffer_info is not NULL");
809		return -EEXIST;
810	}
811
812	/* alloc extra element so in rx path
813	 * we can always prefetch rx_info + 1
814	 */
815	size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
816	node = cpu_to_node(ena_irq->cpu);
817
818	rx_ring->rx_buffer_info = vzalloc_node(size, node);
819	if (!rx_ring->rx_buffer_info) {
820		rx_ring->rx_buffer_info = vzalloc(size);
821		if (!rx_ring->rx_buffer_info)
822			return -ENOMEM;
823	}
824
825	size = sizeof(u16) * rx_ring->ring_size;
826	rx_ring->free_ids = vzalloc_node(size, node);
827	if (!rx_ring->free_ids) {
828		rx_ring->free_ids = vzalloc(size);
829		if (!rx_ring->free_ids) {
830			vfree(rx_ring->rx_buffer_info);
831			rx_ring->rx_buffer_info = NULL;
832			return -ENOMEM;
833		}
834	}
835
836	/* Req id ring for receiving RX pkts out of order */
837	for (i = 0; i < rx_ring->ring_size; i++)
838		rx_ring->free_ids[i] = i;
839
840	/* Reset rx statistics */
841	memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
842
843	rx_ring->next_to_clean = 0;
844	rx_ring->next_to_use = 0;
845	rx_ring->cpu = ena_irq->cpu;
846
847	return 0;
848}
849
850/* ena_free_rx_resources - Free I/O Rx Resources
851 * @adapter: network interface device structure
852 * @qid: queue index
853 *
854 * Free all receive software resources
855 */
856static void ena_free_rx_resources(struct ena_adapter *adapter,
857				  u32 qid)
858{
859	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
860
861	vfree(rx_ring->rx_buffer_info);
862	rx_ring->rx_buffer_info = NULL;
863
864	vfree(rx_ring->free_ids);
865	rx_ring->free_ids = NULL;
866}
867
868/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
869 * @adapter: board private structure
870 *
871 * Return 0 on success, negative on failure
872 */
873static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
874{
875	int i, rc = 0;
876
877	for (i = 0; i < adapter->num_io_queues; i++) {
878		rc = ena_setup_rx_resources(adapter, i);
879		if (rc)
880			goto err_setup_rx;
881	}
882
883	return 0;
884
885err_setup_rx:
886
887	netif_err(adapter, ifup, adapter->netdev,
888		  "Rx queue %d: allocation failed\n", i);
889
890	/* rewind the index freeing the rings as we go */
891	while (i--)
892		ena_free_rx_resources(adapter, i);
893	return rc;
894}
895
896/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
897 * @adapter: board private structure
898 *
899 * Free all receive software resources
900 */
901static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
902{
903	int i;
904
905	for (i = 0; i < adapter->num_io_queues; i++)
906		ena_free_rx_resources(adapter, i);
907}
908
909static int ena_alloc_rx_page(struct ena_ring *rx_ring,
910				    struct ena_rx_buffer *rx_info, gfp_t gfp)
911{
912	int headroom = rx_ring->rx_headroom;
913	struct ena_com_buf *ena_buf;
914	struct page *page;
915	dma_addr_t dma;
916
917	/* restore page offset value in case it has been changed by device */
918	rx_info->page_offset = headroom;
919
920	/* if previous allocated page is not used */
921	if (unlikely(rx_info->page))
922		return 0;
923
924	page = alloc_page(gfp);
925	if (unlikely(!page)) {
926		u64_stats_update_begin(&rx_ring->syncp);
927		rx_ring->rx_stats.page_alloc_fail++;
928		u64_stats_update_end(&rx_ring->syncp);
929		return -ENOMEM;
930	}
931
932	/* To enable NIC-side port-mirroring, AKA SPAN port,
933	 * we make the buffer readable from the nic as well
934	 */
935	dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
936			   DMA_BIDIRECTIONAL);
937	if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
938		u64_stats_update_begin(&rx_ring->syncp);
939		rx_ring->rx_stats.dma_mapping_err++;
940		u64_stats_update_end(&rx_ring->syncp);
941
942		__free_page(page);
943		return -EIO;
944	}
945	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
946		  "Allocate page %p, rx_info %p\n", page, rx_info);
947
948	rx_info->page = page;
949	ena_buf = &rx_info->ena_buf;
950	ena_buf->paddr = dma + headroom;
951	ena_buf->len = ENA_PAGE_SIZE - headroom;
952
953	return 0;
954}
955
956static void ena_free_rx_page(struct ena_ring *rx_ring,
957			     struct ena_rx_buffer *rx_info)
958{
959	struct page *page = rx_info->page;
960	struct ena_com_buf *ena_buf = &rx_info->ena_buf;
961
962	if (unlikely(!page)) {
963		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
964			   "Trying to free unallocated buffer\n");
965		return;
966	}
967
968	dma_unmap_page(rx_ring->dev, ena_buf->paddr - rx_ring->rx_headroom,
969		       ENA_PAGE_SIZE,
970		       DMA_BIDIRECTIONAL);
971
972	__free_page(page);
973	rx_info->page = NULL;
974}
975
976static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
977{
978	u16 next_to_use, req_id;
979	u32 i;
980	int rc;
981
982	next_to_use = rx_ring->next_to_use;
983
984	for (i = 0; i < num; i++) {
985		struct ena_rx_buffer *rx_info;
986
987		req_id = rx_ring->free_ids[next_to_use];
988
989		rx_info = &rx_ring->rx_buffer_info[req_id];
990
991		rc = ena_alloc_rx_page(rx_ring, rx_info,
992				       GFP_ATOMIC | __GFP_COMP);
993		if (unlikely(rc < 0)) {
994			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
995				   "Failed to allocate buffer for rx queue %d\n",
996				   rx_ring->qid);
997			break;
998		}
999		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1000						&rx_info->ena_buf,
1001						req_id);
1002		if (unlikely(rc)) {
1003			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
1004				   "Failed to add buffer for rx queue %d\n",
1005				   rx_ring->qid);
1006			break;
1007		}
1008		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1009						   rx_ring->ring_size);
1010	}
1011
1012	if (unlikely(i < num)) {
1013		u64_stats_update_begin(&rx_ring->syncp);
1014		rx_ring->rx_stats.refil_partial++;
1015		u64_stats_update_end(&rx_ring->syncp);
1016		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
1017			   "Refilled rx qid %d with only %d buffers (from %d)\n",
1018			   rx_ring->qid, i, num);
1019	}
1020
1021	/* ena_com_write_sq_doorbell issues a wmb() */
1022	if (likely(i))
1023		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1024
1025	rx_ring->next_to_use = next_to_use;
1026
1027	return i;
1028}
1029
1030static void ena_free_rx_bufs(struct ena_adapter *adapter,
1031			     u32 qid)
1032{
1033	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1034	u32 i;
1035
1036	for (i = 0; i < rx_ring->ring_size; i++) {
1037		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1038
1039		if (rx_info->page)
1040			ena_free_rx_page(rx_ring, rx_info);
1041	}
1042}
1043
1044/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
1045 * @adapter: board private structure
1046 */
1047static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1048{
1049	struct ena_ring *rx_ring;
1050	int i, rc, bufs_num;
1051
1052	for (i = 0; i < adapter->num_io_queues; i++) {
1053		rx_ring = &adapter->rx_ring[i];
1054		bufs_num = rx_ring->ring_size - 1;
1055		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1056
1057		if (unlikely(rc != bufs_num))
1058			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
1059				   "Refilling Queue %d failed. allocated %d buffers from: %d\n",
1060				   i, rc, bufs_num);
1061	}
1062}
1063
1064static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
1065{
1066	int i;
1067
1068	for (i = 0; i < adapter->num_io_queues; i++)
1069		ena_free_rx_bufs(adapter, i);
1070}
1071
1072static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
1073			      struct ena_tx_buffer *tx_info)
1074{
1075	struct ena_com_buf *ena_buf;
1076	u32 cnt;
1077	int i;
1078
1079	ena_buf = tx_info->bufs;
1080	cnt = tx_info->num_of_bufs;
1081
1082	if (unlikely(!cnt))
1083		return;
1084
1085	if (tx_info->map_linear_data) {
1086		dma_unmap_single(tx_ring->dev,
1087				 dma_unmap_addr(ena_buf, paddr),
1088				 dma_unmap_len(ena_buf, len),
1089				 DMA_TO_DEVICE);
1090		ena_buf++;
1091		cnt--;
1092	}
1093
1094	/* unmap remaining mapped pages */
1095	for (i = 0; i < cnt; i++) {
1096		dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
1097			       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
1098		ena_buf++;
1099	}
1100}
1101
1102/* ena_free_tx_bufs - Free Tx Buffers per Queue
1103 * @tx_ring: TX ring for which buffers be freed
1104 */
1105static void ena_free_tx_bufs(struct ena_ring *tx_ring)
1106{
1107	bool print_once = true;
1108	u32 i;
1109
1110	for (i = 0; i < tx_ring->ring_size; i++) {
1111		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1112
1113		if (!tx_info->skb)
1114			continue;
1115
1116		if (print_once) {
1117			netif_notice(tx_ring->adapter, ifdown, tx_ring->netdev,
1118				     "Free uncompleted tx skb qid %d idx 0x%x\n",
1119				     tx_ring->qid, i);
1120			print_once = false;
1121		} else {
1122			netif_dbg(tx_ring->adapter, ifdown, tx_ring->netdev,
1123				  "Free uncompleted tx skb qid %d idx 0x%x\n",
1124				  tx_ring->qid, i);
1125		}
1126
1127		ena_unmap_tx_buff(tx_ring, tx_info);
1128
1129		dev_kfree_skb_any(tx_info->skb);
1130	}
1131	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
1132						  tx_ring->qid));
1133}
1134
1135static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
1136{
1137	struct ena_ring *tx_ring;
1138	int i;
1139
1140	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
1141		tx_ring = &adapter->tx_ring[i];
1142		ena_free_tx_bufs(tx_ring);
1143	}
1144}
1145
1146static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1147{
1148	u16 ena_qid;
1149	int i;
1150
1151	for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
1152		ena_qid = ENA_IO_TXQ_IDX(i);
1153		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1154	}
1155}
1156
1157static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1158{
1159	u16 ena_qid;
1160	int i;
1161
1162	for (i = 0; i < adapter->num_io_queues; i++) {
1163		ena_qid = ENA_IO_RXQ_IDX(i);
1164		cancel_work_sync(&adapter->ena_napi[i].dim.work);
1165		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1166	}
1167}
1168
1169static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
1170{
1171	ena_destroy_all_tx_queues(adapter);
1172	ena_destroy_all_rx_queues(adapter);
1173}
1174
1175static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
1176				 struct ena_tx_buffer *tx_info, bool is_xdp)
1177{
1178	if (tx_info)
1179		netif_err(ring->adapter,
1180			  tx_done,
1181			  ring->netdev,
1182			  "tx_info doesn't have valid %s",
1183			   is_xdp ? "xdp frame" : "skb");
1184	else
1185		netif_err(ring->adapter,
1186			  tx_done,
1187			  ring->netdev,
1188			  "Invalid req_id: %hu\n",
1189			  req_id);
1190
1191	u64_stats_update_begin(&ring->syncp);
1192	ring->tx_stats.bad_req_id++;
1193	u64_stats_update_end(&ring->syncp);
1194
1195	/* Trigger device reset */
1196	ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
1197	set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags);
1198	return -EFAULT;
1199}
1200
1201static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
1202{
1203	struct ena_tx_buffer *tx_info;
1204
1205	tx_info = &tx_ring->tx_buffer_info[req_id];
1206	if (likely(tx_info->skb))
1207		return 0;
1208
1209	return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
1210}
1211
1212static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
1213{
1214	struct ena_tx_buffer *tx_info;
1215
1216	tx_info = &xdp_ring->tx_buffer_info[req_id];
1217	if (likely(tx_info->xdpf))
1218		return 0;
1219
1220	return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
1221}
1222
1223static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
1224{
1225	struct netdev_queue *txq;
1226	bool above_thresh;
1227	u32 tx_bytes = 0;
1228	u32 total_done = 0;
1229	u16 next_to_clean;
1230	u16 req_id;
1231	int tx_pkts = 0;
1232	int rc;
1233
1234	next_to_clean = tx_ring->next_to_clean;
1235	txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
1236
1237	while (tx_pkts < budget) {
1238		struct ena_tx_buffer *tx_info;
1239		struct sk_buff *skb;
1240
1241		rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
1242						&req_id);
1243		if (rc) {
1244			if (unlikely(rc == -EINVAL))
1245				handle_invalid_req_id(tx_ring, req_id, NULL,
1246						      false);
1247			break;
1248		}
1249
1250		/* validate that the request id points to a valid skb */
1251		rc = validate_tx_req_id(tx_ring, req_id);
1252		if (rc)
1253			break;
1254
1255		tx_info = &tx_ring->tx_buffer_info[req_id];
1256		skb = tx_info->skb;
1257
1258		/* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
1259		prefetch(&skb->end);
1260
1261		tx_info->skb = NULL;
1262		tx_info->last_jiffies = 0;
1263
1264		ena_unmap_tx_buff(tx_ring, tx_info);
1265
1266		netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
1267			  "tx_poll: q %d skb %p completed\n", tx_ring->qid,
1268			  skb);
1269
1270		tx_bytes += skb->len;
1271		dev_kfree_skb(skb);
1272		tx_pkts++;
1273		total_done += tx_info->tx_descs;
1274
1275		tx_ring->free_ids[next_to_clean] = req_id;
1276		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1277						     tx_ring->ring_size);
1278	}
1279
1280	tx_ring->next_to_clean = next_to_clean;
1281	ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
1282	ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
1283
1284	netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
1285
1286	netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
1287		  "tx_poll: q %d done. total pkts: %d\n",
1288		  tx_ring->qid, tx_pkts);
1289
1290	/* need to make the rings circular update visible to
1291	 * ena_start_xmit() before checking for netif_queue_stopped().
1292	 */
1293	smp_mb();
1294
1295	above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1296						    ENA_TX_WAKEUP_THRESH);
1297	if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
1298		__netif_tx_lock(txq, smp_processor_id());
1299		above_thresh =
1300			ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1301						     ENA_TX_WAKEUP_THRESH);
1302		if (netif_tx_queue_stopped(txq) && above_thresh &&
1303		    test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
1304			netif_tx_wake_queue(txq);
1305			u64_stats_update_begin(&tx_ring->syncp);
1306			tx_ring->tx_stats.queue_wakeup++;
1307			u64_stats_update_end(&tx_ring->syncp);
1308		}
1309		__netif_tx_unlock(txq);
1310	}
1311
1312	return tx_pkts;
1313}
1314
1315static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags)
1316{
1317	struct sk_buff *skb;
1318
1319	if (frags)
1320		skb = napi_get_frags(rx_ring->napi);
1321	else
1322		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1323						rx_ring->rx_copybreak);
1324
1325	if (unlikely(!skb)) {
1326		u64_stats_update_begin(&rx_ring->syncp);
1327		rx_ring->rx_stats.skb_alloc_fail++;
1328		u64_stats_update_end(&rx_ring->syncp);
1329		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1330			  "Failed to allocate skb. frags: %d\n", frags);
1331		return NULL;
1332	}
1333
1334	return skb;
1335}
1336
1337static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
1338				  struct ena_com_rx_buf_info *ena_bufs,
1339				  u32 descs,
1340				  u16 *next_to_clean)
1341{
1342	struct sk_buff *skb;
1343	struct ena_rx_buffer *rx_info;
1344	u16 len, req_id, buf = 0;
1345	void *va;
1346
1347	len = ena_bufs[buf].len;
1348	req_id = ena_bufs[buf].req_id;
1349
1350	rx_info = &rx_ring->rx_buffer_info[req_id];
1351
1352	if (unlikely(!rx_info->page)) {
1353		netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
1354			  "Page is NULL\n");
1355		return NULL;
1356	}
1357
1358	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1359		  "rx_info %p page %p\n",
1360		  rx_info, rx_info->page);
1361
1362	/* save virt address of first buffer */
1363	va = page_address(rx_info->page) + rx_info->page_offset;
1364
1365	prefetch(va);
1366
1367	if (len <= rx_ring->rx_copybreak) {
1368		skb = ena_alloc_skb(rx_ring, false);
1369		if (unlikely(!skb))
1370			return NULL;
1371
1372		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1373			  "RX allocated small packet. len %d. data_len %d\n",
1374			  skb->len, skb->data_len);
1375
1376		/* sync this buffer for CPU use */
1377		dma_sync_single_for_cpu(rx_ring->dev,
1378					dma_unmap_addr(&rx_info->ena_buf, paddr),
1379					len,
1380					DMA_FROM_DEVICE);
1381		skb_copy_to_linear_data(skb, va, len);
1382		dma_sync_single_for_device(rx_ring->dev,
1383					   dma_unmap_addr(&rx_info->ena_buf, paddr),
1384					   len,
1385					   DMA_FROM_DEVICE);
1386
1387		skb_put(skb, len);
1388		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1389		rx_ring->free_ids[*next_to_clean] = req_id;
1390		*next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
1391						     rx_ring->ring_size);
1392		return skb;
1393	}
1394
1395	skb = ena_alloc_skb(rx_ring, true);
1396	if (unlikely(!skb))
1397		return NULL;
1398
1399	do {
1400		dma_unmap_page(rx_ring->dev,
1401			       dma_unmap_addr(&rx_info->ena_buf, paddr),
1402			       ENA_PAGE_SIZE, DMA_BIDIRECTIONAL);
1403
1404		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
1405				rx_info->page_offset, len, ENA_PAGE_SIZE);
1406
1407		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1408			  "RX skb updated. len %d. data_len %d\n",
1409			  skb->len, skb->data_len);
1410
1411		rx_info->page = NULL;
1412
1413		rx_ring->free_ids[*next_to_clean] = req_id;
1414		*next_to_clean =
1415			ENA_RX_RING_IDX_NEXT(*next_to_clean,
1416					     rx_ring->ring_size);
1417		if (likely(--descs == 0))
1418			break;
1419
1420		buf++;
1421		len = ena_bufs[buf].len;
1422		req_id = ena_bufs[buf].req_id;
1423
1424		rx_info = &rx_ring->rx_buffer_info[req_id];
1425	} while (1);
1426
1427	return skb;
1428}
1429
1430/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
1431 * @adapter: structure containing adapter specific data
1432 * @ena_rx_ctx: received packet context/metadata
1433 * @skb: skb currently being received and modified
1434 */
1435static void ena_rx_checksum(struct ena_ring *rx_ring,
1436				   struct ena_com_rx_ctx *ena_rx_ctx,
1437				   struct sk_buff *skb)
1438{
1439	/* Rx csum disabled */
1440	if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
1441		skb->ip_summed = CHECKSUM_NONE;
1442		return;
1443	}
1444
1445	/* For fragmented packets the checksum isn't valid */
1446	if (ena_rx_ctx->frag) {
1447		skb->ip_summed = CHECKSUM_NONE;
1448		return;
1449	}
1450
1451	/* if IP and error */
1452	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1453		     (ena_rx_ctx->l3_csum_err))) {
1454		/* ipv4 checksum error */
1455		skb->ip_summed = CHECKSUM_NONE;
1456		u64_stats_update_begin(&rx_ring->syncp);
1457		rx_ring->rx_stats.bad_csum++;
1458		u64_stats_update_end(&rx_ring->syncp);
1459		netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1460			  "RX IPv4 header checksum error\n");
1461		return;
1462	}
1463
1464	/* if TCP/UDP */
1465	if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1466		   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
1467		if (unlikely(ena_rx_ctx->l4_csum_err)) {
1468			/* TCP/UDP checksum error */
1469			u64_stats_update_begin(&rx_ring->syncp);
1470			rx_ring->rx_stats.bad_csum++;
1471			u64_stats_update_end(&rx_ring->syncp);
1472			netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1473				  "RX L4 checksum error\n");
1474			skb->ip_summed = CHECKSUM_NONE;
1475			return;
1476		}
1477
1478		if (likely(ena_rx_ctx->l4_csum_checked)) {
1479			skb->ip_summed = CHECKSUM_UNNECESSARY;
1480			u64_stats_update_begin(&rx_ring->syncp);
1481			rx_ring->rx_stats.csum_good++;
1482			u64_stats_update_end(&rx_ring->syncp);
1483		} else {
1484			u64_stats_update_begin(&rx_ring->syncp);
1485			rx_ring->rx_stats.csum_unchecked++;
1486			u64_stats_update_end(&rx_ring->syncp);
1487			skb->ip_summed = CHECKSUM_NONE;
1488		}
1489	} else {
1490		skb->ip_summed = CHECKSUM_NONE;
1491		return;
1492	}
1493
1494}
1495
1496static void ena_set_rx_hash(struct ena_ring *rx_ring,
1497			    struct ena_com_rx_ctx *ena_rx_ctx,
1498			    struct sk_buff *skb)
1499{
1500	enum pkt_hash_types hash_type;
1501
1502	if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
1503		if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1504			   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
1505
1506			hash_type = PKT_HASH_TYPE_L4;
1507		else
1508			hash_type = PKT_HASH_TYPE_NONE;
1509
1510		/* Override hash type if the packet is fragmented */
1511		if (ena_rx_ctx->frag)
1512			hash_type = PKT_HASH_TYPE_NONE;
1513
1514		skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
1515	}
1516}
1517
1518static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
1519{
1520	struct ena_rx_buffer *rx_info;
1521	int ret;
1522
1523	rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1524	xdp->data = page_address(rx_info->page) + rx_info->page_offset;
1525	xdp_set_data_meta_invalid(xdp);
1526	xdp->data_hard_start = page_address(rx_info->page);
1527	xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
1528	/* If for some reason we received a bigger packet than
1529	 * we expect, then we simply drop it
1530	 */
1531	if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
1532		return XDP_DROP;
1533
1534	ret = ena_xdp_execute(rx_ring, xdp, rx_info);
1535
1536	/* The xdp program might expand the headers */
1537	if (ret == XDP_PASS) {
1538		rx_info->page_offset = xdp->data - xdp->data_hard_start;
1539		rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
1540	}
1541
1542	return ret;
1543}
1544/* ena_clean_rx_irq - Cleanup RX irq
1545 * @rx_ring: RX ring to clean
1546 * @napi: napi handler
1547 * @budget: how many packets driver is allowed to clean
1548 *
1549 * Returns the number of cleaned buffers.
1550 */
1551static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
1552			    u32 budget)
1553{
1554	u16 next_to_clean = rx_ring->next_to_clean;
1555	struct ena_com_rx_ctx ena_rx_ctx;
1556	struct ena_rx_buffer *rx_info;
1557	struct ena_adapter *adapter;
1558	u32 res_budget, work_done;
1559	int rx_copybreak_pkt = 0;
1560	int refill_threshold;
1561	struct sk_buff *skb;
1562	int refill_required;
1563	struct xdp_buff xdp;
1564	int total_len = 0;
1565	int xdp_verdict;
1566	int rc = 0;
1567	int i;
1568
1569	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1570		  "%s qid %d\n", __func__, rx_ring->qid);
1571	res_budget = budget;
1572	xdp.rxq = &rx_ring->xdp_rxq;
1573	xdp.frame_sz = ENA_PAGE_SIZE;
1574
1575	do {
1576		xdp_verdict = XDP_PASS;
1577		skb = NULL;
1578		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1579		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
1580		ena_rx_ctx.descs = 0;
1581		ena_rx_ctx.pkt_offset = 0;
1582		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
1583				    rx_ring->ena_com_io_sq,
1584				    &ena_rx_ctx);
1585		if (unlikely(rc))
1586			goto error;
1587
1588		if (unlikely(ena_rx_ctx.descs == 0))
1589			break;
1590
1591		/* First descriptor might have an offset set by the device */
1592		rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1593		rx_info->page_offset += ena_rx_ctx.pkt_offset;
1594
1595		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1596			  "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1597			  rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1598			  ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1599
1600		if (ena_xdp_present_ring(rx_ring))
1601			xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
1602
1603		/* allocate skb and fill it */
1604		if (xdp_verdict == XDP_PASS)
1605			skb = ena_rx_skb(rx_ring,
1606					 rx_ring->ena_bufs,
1607					 ena_rx_ctx.descs,
1608					 &next_to_clean);
1609
1610		if (unlikely(!skb)) {
1611			/* The page might not actually be freed here since the
1612			 * page reference count is incremented in
1613			 * ena_xdp_xmit_buff(), and it will be decreased only
1614			 * when send completion was received from the device
1615			 */
1616			if (xdp_verdict == XDP_TX)
1617				ena_free_rx_page(rx_ring,
1618						 &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]);
1619			for (i = 0; i < ena_rx_ctx.descs; i++) {
1620				rx_ring->free_ids[next_to_clean] =
1621					rx_ring->ena_bufs[i].req_id;
1622				next_to_clean =
1623					ENA_RX_RING_IDX_NEXT(next_to_clean,
1624							     rx_ring->ring_size);
1625			}
1626			if (xdp_verdict != XDP_PASS) {
1627				res_budget--;
1628				continue;
1629			}
1630			break;
1631		}
1632
1633		ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
1634
1635		ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
1636
1637		skb_record_rx_queue(skb, rx_ring->qid);
1638
1639		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
1640			total_len += rx_ring->ena_bufs[0].len;
1641			rx_copybreak_pkt++;
1642			napi_gro_receive(napi, skb);
1643		} else {
1644			total_len += skb->len;
1645			napi_gro_frags(napi);
1646		}
1647
1648		res_budget--;
1649	} while (likely(res_budget));
1650
1651	work_done = budget - res_budget;
1652	rx_ring->per_napi_packets += work_done;
1653	u64_stats_update_begin(&rx_ring->syncp);
1654	rx_ring->rx_stats.bytes += total_len;
1655	rx_ring->rx_stats.cnt += work_done;
1656	rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
1657	u64_stats_update_end(&rx_ring->syncp);
1658
1659	rx_ring->next_to_clean = next_to_clean;
1660
1661	refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
1662	refill_threshold =
1663		min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1664		      ENA_RX_REFILL_THRESH_PACKET);
1665
1666	/* Optimization, try to batch new rx buffers */
1667	if (refill_required > refill_threshold) {
1668		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1669		ena_refill_rx_bufs(rx_ring, refill_required);
1670	}
1671
1672	return work_done;
1673
1674error:
1675	adapter = netdev_priv(rx_ring->netdev);
1676
1677	if (rc == -ENOSPC) {
1678		u64_stats_update_begin(&rx_ring->syncp);
1679		rx_ring->rx_stats.bad_desc_num++;
1680		u64_stats_update_end(&rx_ring->syncp);
1681		adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
1682	} else {
1683		u64_stats_update_begin(&rx_ring->syncp);
1684		rx_ring->rx_stats.bad_req_id++;
1685		u64_stats_update_end(&rx_ring->syncp);
1686		adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
1687	}
1688
1689	set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
1690
1691	return 0;
1692}
1693
1694static void ena_dim_work(struct work_struct *w)
1695{
1696	struct dim *dim = container_of(w, struct dim, work);
1697	struct dim_cq_moder cur_moder =
1698		net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
1699	struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
1700
1701	ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
1702	dim->state = DIM_START_MEASURE;
1703}
1704
1705static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
1706{
1707	struct dim_sample dim_sample;
1708	struct ena_ring *rx_ring = ena_napi->rx_ring;
1709
1710	if (!rx_ring->per_napi_packets)
1711		return;
1712
1713	rx_ring->non_empty_napi_events++;
1714
1715	dim_update_sample(rx_ring->non_empty_napi_events,
1716			  rx_ring->rx_stats.cnt,
1717			  rx_ring->rx_stats.bytes,
1718			  &dim_sample);
1719
1720	net_dim(&ena_napi->dim, dim_sample);
1721
1722	rx_ring->per_napi_packets = 0;
1723}
1724
1725static void ena_unmask_interrupt(struct ena_ring *tx_ring,
1726					struct ena_ring *rx_ring)
1727{
1728	struct ena_eth_io_intr_reg intr_reg;
1729	u32 rx_interval = 0;
1730	/* Rx ring can be NULL when for XDP tx queues which don't have an
1731	 * accompanying rx_ring pair.
1732	 */
1733	if (rx_ring)
1734		rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
1735			rx_ring->smoothed_interval :
1736			ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
1737
1738	/* Update intr register: rx intr delay,
1739	 * tx intr delay and interrupt unmask
1740	 */
1741	ena_com_update_intr_reg(&intr_reg,
1742				rx_interval,
1743				tx_ring->smoothed_interval,
1744				true);
1745
1746	u64_stats_update_begin(&tx_ring->syncp);
1747	tx_ring->tx_stats.unmask_interrupt++;
1748	u64_stats_update_end(&tx_ring->syncp);
1749
1750	/* It is a shared MSI-X.
1751	 * Tx and Rx CQ have pointer to it.
1752	 * So we use one of them to reach the intr reg
1753	 * The Tx ring is used because the rx_ring is NULL for XDP queues
1754	 */
1755	ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
1756}
1757
1758static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1759					     struct ena_ring *rx_ring)
1760{
1761	int cpu = get_cpu();
1762	int numa_node;
1763
1764	/* Check only one ring since the 2 rings are running on the same cpu */
1765	if (likely(tx_ring->cpu == cpu))
1766		goto out;
1767
1768	numa_node = cpu_to_node(cpu);
1769	put_cpu();
1770
1771	if (numa_node != NUMA_NO_NODE) {
1772		ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1773		if (rx_ring)
1774			ena_com_update_numa_node(rx_ring->ena_com_io_cq,
1775						 numa_node);
1776	}
1777
1778	tx_ring->cpu = cpu;
1779	if (rx_ring)
1780		rx_ring->cpu = cpu;
1781
1782	return;
1783out:
1784	put_cpu();
1785}
1786
1787static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
1788{
1789	u32 total_done = 0;
1790	u16 next_to_clean;
1791	u32 tx_bytes = 0;
1792	int tx_pkts = 0;
1793	u16 req_id;
1794	int rc;
1795
1796	if (unlikely(!xdp_ring))
1797		return 0;
1798	next_to_clean = xdp_ring->next_to_clean;
1799
1800	while (tx_pkts < budget) {
1801		struct ena_tx_buffer *tx_info;
1802		struct xdp_frame *xdpf;
1803
1804		rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
1805						&req_id);
1806		if (rc) {
1807			if (unlikely(rc == -EINVAL))
1808				handle_invalid_req_id(xdp_ring, req_id, NULL,
1809						      true);
1810			break;
1811		}
1812
1813		/* validate that the request id points to a valid xdp_frame */
1814		rc = validate_xdp_req_id(xdp_ring, req_id);
1815		if (rc)
1816			break;
1817
1818		tx_info = &xdp_ring->tx_buffer_info[req_id];
1819		xdpf = tx_info->xdpf;
1820
1821		tx_info->xdpf = NULL;
1822		tx_info->last_jiffies = 0;
1823		ena_unmap_tx_buff(xdp_ring, tx_info);
1824
1825		netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
1826			  "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
1827			  xdpf);
1828
1829		tx_bytes += xdpf->len;
1830		tx_pkts++;
1831		total_done += tx_info->tx_descs;
1832
1833		__free_page(tx_info->xdp_rx_page);
1834		xdp_ring->free_ids[next_to_clean] = req_id;
1835		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1836						     xdp_ring->ring_size);
1837	}
1838
1839	xdp_ring->next_to_clean = next_to_clean;
1840	ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
1841	ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
1842
1843	netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
1844		  "tx_poll: q %d done. total pkts: %d\n",
1845		  xdp_ring->qid, tx_pkts);
1846
1847	return tx_pkts;
1848}
1849
1850static int ena_io_poll(struct napi_struct *napi, int budget)
1851{
1852	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
1853	struct ena_ring *tx_ring, *rx_ring;
1854	int tx_work_done;
1855	int rx_work_done = 0;
1856	int tx_budget;
1857	int napi_comp_call = 0;
1858	int ret;
1859
1860	tx_ring = ena_napi->tx_ring;
1861	rx_ring = ena_napi->rx_ring;
1862
1863	tx_ring->first_interrupt = ena_napi->first_interrupt;
1864	rx_ring->first_interrupt = ena_napi->first_interrupt;
1865
1866	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
1867
1868	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1869	    test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
1870		napi_complete_done(napi, 0);
1871		return 0;
1872	}
1873
1874	tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
1875	/* On netpoll the budget is zero and the handler should only clean the
1876	 * tx completions.
1877	 */
1878	if (likely(budget))
1879		rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
1880
1881	/* If the device is about to reset or down, avoid unmask
1882	 * the interrupt and return 0 so NAPI won't reschedule
1883	 */
1884	if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1885		     test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
1886		napi_complete_done(napi, 0);
1887		ret = 0;
1888
1889	} else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
1890		napi_comp_call = 1;
1891
1892		/* Update numa and unmask the interrupt only when schedule
1893		 * from the interrupt context (vs from sk_busy_loop)
1894		 */
1895		if (napi_complete_done(napi, rx_work_done) &&
1896		    READ_ONCE(ena_napi->interrupts_masked)) {
1897			smp_rmb(); /* make sure interrupts_masked is read */
1898			WRITE_ONCE(ena_napi->interrupts_masked, false);
1899			/* We apply adaptive moderation on Rx path only.
1900			 * Tx uses static interrupt moderation.
1901			 */
1902			if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
1903				ena_adjust_adaptive_rx_intr_moderation(ena_napi);
1904
1905			ena_unmask_interrupt(tx_ring, rx_ring);
1906		}
1907
1908		ena_update_ring_numa_node(tx_ring, rx_ring);
1909
1910		ret = rx_work_done;
1911	} else {
1912		ret = budget;
1913	}
1914
1915	u64_stats_update_begin(&tx_ring->syncp);
1916	tx_ring->tx_stats.napi_comp += napi_comp_call;
1917	tx_ring->tx_stats.tx_poll++;
1918	u64_stats_update_end(&tx_ring->syncp);
1919
1920	return ret;
1921}
1922
1923static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
1924{
1925	struct ena_adapter *adapter = (struct ena_adapter *)data;
1926
1927	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1928
1929	/* Don't call the aenq handler before probe is done */
1930	if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
1931		ena_com_aenq_intr_handler(adapter->ena_dev, data);
1932
1933	return IRQ_HANDLED;
1934}
1935
1936/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
1937 * @irq: interrupt number
1938 * @data: pointer to a network interface private napi device structure
1939 */
1940static irqreturn_t ena_intr_msix_io(int irq, void *data)
1941{
1942	struct ena_napi *ena_napi = data;
1943
1944	ena_napi->first_interrupt = true;
1945
1946	WRITE_ONCE(ena_napi->interrupts_masked, true);
1947	smp_wmb(); /* write interrupts_masked before calling napi */
1948
1949	napi_schedule_irqoff(&ena_napi->napi);
1950
1951	return IRQ_HANDLED;
1952}
1953
1954/* Reserve a single MSI-X vector for management (admin + aenq).
1955 * plus reserve one vector for each potential io queue.
1956 * the number of potential io queues is the minimum of what the device
1957 * supports and the number of vCPUs.
1958 */
1959static int ena_enable_msix(struct ena_adapter *adapter)
1960{
1961	int msix_vecs, irq_cnt;
1962
1963	if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1964		netif_err(adapter, probe, adapter->netdev,
1965			  "Error, MSI-X is already enabled\n");
1966		return -EPERM;
1967	}
1968
1969	/* Reserved the max msix vectors we might need */
1970	msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1971	netif_dbg(adapter, probe, adapter->netdev,
1972		  "Trying to enable MSI-X, vectors %d\n", msix_vecs);
1973
1974	irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
1975					msix_vecs, PCI_IRQ_MSIX);
1976
1977	if (irq_cnt < 0) {
1978		netif_err(adapter, probe, adapter->netdev,
1979			  "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
1980		return -ENOSPC;
1981	}
1982
1983	if (irq_cnt != msix_vecs) {
1984		netif_notice(adapter, probe, adapter->netdev,
1985			     "Enable only %d MSI-X (out of %d), reduce the number of queues\n",
1986			     irq_cnt, msix_vecs);
1987		adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
1988	}
1989
1990	if (ena_init_rx_cpu_rmap(adapter))
1991		netif_warn(adapter, probe, adapter->netdev,
1992			   "Failed to map IRQs to CPUs\n");
1993
1994	adapter->msix_vecs = irq_cnt;
1995	set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
1996
1997	return 0;
1998}
1999
2000static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
2001{
2002	u32 cpu;
2003
2004	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
2005		 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
2006		 pci_name(adapter->pdev));
2007	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
2008		ena_intr_msix_mgmnt;
2009	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
2010	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
2011		pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
2012	cpu = cpumask_first(cpu_online_mask);
2013	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
2014	cpumask_set_cpu(cpu,
2015			&adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
2016}
2017
2018static void ena_setup_io_intr(struct ena_adapter *adapter)
2019{
2020	struct net_device *netdev;
2021	int irq_idx, i, cpu;
2022	int io_queue_count;
2023
2024	netdev = adapter->netdev;
2025	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2026
2027	for (i = 0; i < io_queue_count; i++) {
2028		irq_idx = ENA_IO_IRQ_IDX(i);
2029		cpu = i % num_online_cpus();
2030
2031		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
2032			 "%s-Tx-Rx-%d", netdev->name, i);
2033		adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
2034		adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
2035		adapter->irq_tbl[irq_idx].vector =
2036			pci_irq_vector(adapter->pdev, irq_idx);
2037		adapter->irq_tbl[irq_idx].cpu = cpu;
2038
2039		cpumask_set_cpu(cpu,
2040				&adapter->irq_tbl[irq_idx].affinity_hint_mask);
2041	}
2042}
2043
2044static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
2045{
2046	unsigned long flags = 0;
2047	struct ena_irq *irq;
2048	int rc;
2049
2050	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2051	rc = request_irq(irq->vector, irq->handler, flags, irq->name,
2052			 irq->data);
2053	if (rc) {
2054		netif_err(adapter, probe, adapter->netdev,
2055			  "Failed to request admin irq\n");
2056		return rc;
2057	}
2058
2059	netif_dbg(adapter, probe, adapter->netdev,
2060		  "Set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
2061		  irq->affinity_hint_mask.bits[0], irq->vector);
2062
2063	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
2064
2065	return rc;
2066}
2067
2068static int ena_request_io_irq(struct ena_adapter *adapter)
2069{
2070	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2071	unsigned long flags = 0;
2072	struct ena_irq *irq;
2073	int rc = 0, i, k;
2074
2075	if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
2076		netif_err(adapter, ifup, adapter->netdev,
2077			  "Failed to request I/O IRQ: MSI-X is not enabled\n");
2078		return -EINVAL;
2079	}
2080
2081	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
2082		irq = &adapter->irq_tbl[i];
2083		rc = request_irq(irq->vector, irq->handler, flags, irq->name,
2084				 irq->data);
2085		if (rc) {
2086			netif_err(adapter, ifup, adapter->netdev,
2087				  "Failed to request I/O IRQ. index %d rc %d\n",
2088				   i, rc);
2089			goto err;
2090		}
2091
2092		netif_dbg(adapter, ifup, adapter->netdev,
2093			  "Set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
2094			  i, irq->affinity_hint_mask.bits[0], irq->vector);
2095
2096		irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
2097	}
2098
2099	return rc;
2100
2101err:
2102	for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
2103		irq = &adapter->irq_tbl[k];
2104		free_irq(irq->vector, irq->data);
2105	}
2106
2107	return rc;
2108}
2109
2110static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
2111{
2112	struct ena_irq *irq;
2113
2114	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2115	synchronize_irq(irq->vector);
2116	irq_set_affinity_hint(irq->vector, NULL);
2117	free_irq(irq->vector, irq->data);
2118}
2119
2120static void ena_free_io_irq(struct ena_adapter *adapter)
2121{
2122	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2123	struct ena_irq *irq;
2124	int i;
2125
2126#ifdef CONFIG_RFS_ACCEL
2127	if (adapter->msix_vecs >= 1) {
2128		free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
2129		adapter->netdev->rx_cpu_rmap = NULL;
2130	}
2131#endif /* CONFIG_RFS_ACCEL */
2132
2133	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
2134		irq = &adapter->irq_tbl[i];
2135		irq_set_affinity_hint(irq->vector, NULL);
2136		free_irq(irq->vector, irq->data);
2137	}
2138}
2139
2140static void ena_disable_msix(struct ena_adapter *adapter)
2141{
2142	if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
2143		pci_free_irq_vectors(adapter->pdev);
2144}
2145
2146static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
2147{
2148	u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2149	int i;
2150
2151	if (!netif_running(adapter->netdev))
2152		return;
2153
2154	for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
2155		synchronize_irq(adapter->irq_tbl[i].vector);
2156}
2157
2158static void ena_del_napi_in_range(struct ena_adapter *adapter,
2159				  int first_index,
2160				  int count)
2161{
2162	int i;
2163
2164	for (i = first_index; i < first_index + count; i++) {
2165		netif_napi_del(&adapter->ena_napi[i].napi);
2166
2167		WARN_ON(!ENA_IS_XDP_INDEX(adapter, i) &&
2168			adapter->ena_napi[i].xdp_ring);
2169	}
2170}
2171
2172static void ena_init_napi_in_range(struct ena_adapter *adapter,
2173				   int first_index, int count)
2174{
2175	int i;
2176
2177	for (i = first_index; i < first_index + count; i++) {
2178		struct ena_napi *napi = &adapter->ena_napi[i];
2179
2180		netif_napi_add(adapter->netdev,
2181			       &napi->napi,
2182			       ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll,
2183			       ENA_NAPI_BUDGET);
2184
2185		if (!ENA_IS_XDP_INDEX(adapter, i)) {
2186			napi->rx_ring = &adapter->rx_ring[i];
2187			napi->tx_ring = &adapter->tx_ring[i];
2188		} else {
2189			napi->xdp_ring = &adapter->tx_ring[i];
2190		}
2191		napi->qid = i;
2192	}
2193}
2194
2195static void ena_napi_disable_in_range(struct ena_adapter *adapter,
2196				      int first_index,
2197				      int count)
2198{
2199	int i;
2200
2201	for (i = first_index; i < first_index + count; i++)
2202		napi_disable(&adapter->ena_napi[i].napi);
2203}
2204
2205static void ena_napi_enable_in_range(struct ena_adapter *adapter,
2206				     int first_index,
2207				     int count)
2208{
2209	int i;
2210
2211	for (i = first_index; i < first_index + count; i++)
2212		napi_enable(&adapter->ena_napi[i].napi);
2213}
2214
2215/* Configure the Rx forwarding */
2216static int ena_rss_configure(struct ena_adapter *adapter)
2217{
2218	struct ena_com_dev *ena_dev = adapter->ena_dev;
2219	int rc;
2220
2221	/* In case the RSS table wasn't initialized by probe */
2222	if (!ena_dev->rss.tbl_log_size) {
2223		rc = ena_rss_init_default(adapter);
2224		if (rc && (rc != -EOPNOTSUPP)) {
2225			netif_err(adapter, ifup, adapter->netdev,
2226				  "Failed to init RSS rc: %d\n", rc);
2227			return rc;
2228		}
2229	}
2230
2231	/* Set indirect table */
2232	rc = ena_com_indirect_table_set(ena_dev);
2233	if (unlikely(rc && rc != -EOPNOTSUPP))
2234		return rc;
2235
2236	/* Configure hash function (if supported) */
2237	rc = ena_com_set_hash_function(ena_dev);
2238	if (unlikely(rc && (rc != -EOPNOTSUPP)))
2239		return rc;
2240
2241	/* Configure hash inputs (if supported) */
2242	rc = ena_com_set_hash_ctrl(ena_dev);
2243	if (unlikely(rc && (rc != -EOPNOTSUPP)))
2244		return rc;
2245
2246	return 0;
2247}
2248
2249static int ena_up_complete(struct ena_adapter *adapter)
2250{
2251	int rc;
2252
2253	rc = ena_rss_configure(adapter);
2254	if (rc)
2255		return rc;
2256
2257	ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
2258
2259	ena_refill_all_rx_bufs(adapter);
2260
2261	/* enable transmits */
2262	netif_tx_start_all_queues(adapter->netdev);
2263
2264	ena_napi_enable_in_range(adapter,
2265				 0,
2266				 adapter->xdp_num_queues + adapter->num_io_queues);
2267
2268	return 0;
2269}
2270
2271static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
2272{
2273	struct ena_com_create_io_ctx ctx;
2274	struct ena_com_dev *ena_dev;
2275	struct ena_ring *tx_ring;
2276	u32 msix_vector;
2277	u16 ena_qid;
2278	int rc;
2279
2280	ena_dev = adapter->ena_dev;
2281
2282	tx_ring = &adapter->tx_ring[qid];
2283	msix_vector = ENA_IO_IRQ_IDX(qid);
2284	ena_qid = ENA_IO_TXQ_IDX(qid);
2285
2286	memset(&ctx, 0x0, sizeof(ctx));
2287
2288	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
2289	ctx.qid = ena_qid;
2290	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
2291	ctx.msix_vector = msix_vector;
2292	ctx.queue_size = tx_ring->ring_size;
2293	ctx.numa_node = cpu_to_node(tx_ring->cpu);
2294
2295	rc = ena_com_create_io_queue(ena_dev, &ctx);
2296	if (rc) {
2297		netif_err(adapter, ifup, adapter->netdev,
2298			  "Failed to create I/O TX queue num %d rc: %d\n",
2299			  qid, rc);
2300		return rc;
2301	}
2302
2303	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
2304				     &tx_ring->ena_com_io_sq,
2305				     &tx_ring->ena_com_io_cq);
2306	if (rc) {
2307		netif_err(adapter, ifup, adapter->netdev,
2308			  "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
2309			  qid, rc);
2310		ena_com_destroy_io_queue(ena_dev, ena_qid);
2311		return rc;
2312	}
2313
2314	ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
2315	return rc;
2316}
2317
2318static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
2319					    int first_index, int count)
2320{
2321	struct ena_com_dev *ena_dev = adapter->ena_dev;
2322	int rc, i;
2323
2324	for (i = first_index; i < first_index + count; i++) {
2325		rc = ena_create_io_tx_queue(adapter, i);
2326		if (rc)
2327			goto create_err;
2328	}
2329
2330	return 0;
2331
2332create_err:
2333	while (i-- > first_index)
2334		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
2335
2336	return rc;
2337}
2338
2339static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
2340{
2341	struct ena_com_dev *ena_dev;
2342	struct ena_com_create_io_ctx ctx;
2343	struct ena_ring *rx_ring;
2344	u32 msix_vector;
2345	u16 ena_qid;
2346	int rc;
2347
2348	ena_dev = adapter->ena_dev;
2349
2350	rx_ring = &adapter->rx_ring[qid];
2351	msix_vector = ENA_IO_IRQ_IDX(qid);
2352	ena_qid = ENA_IO_RXQ_IDX(qid);
2353
2354	memset(&ctx, 0x0, sizeof(ctx));
2355
2356	ctx.qid = ena_qid;
2357	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
2358	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2359	ctx.msix_vector = msix_vector;
2360	ctx.queue_size = rx_ring->ring_size;
2361	ctx.numa_node = cpu_to_node(rx_ring->cpu);
2362
2363	rc = ena_com_create_io_queue(ena_dev, &ctx);
2364	if (rc) {
2365		netif_err(adapter, ifup, adapter->netdev,
2366			  "Failed to create I/O RX queue num %d rc: %d\n",
2367			  qid, rc);
2368		return rc;
2369	}
2370
2371	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
2372				     &rx_ring->ena_com_io_sq,
2373				     &rx_ring->ena_com_io_cq);
2374	if (rc) {
2375		netif_err(adapter, ifup, adapter->netdev,
2376			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
2377			  qid, rc);
2378		goto err;
2379	}
2380
2381	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
2382
2383	return rc;
2384err:
2385	ena_com_destroy_io_queue(ena_dev, ena_qid);
2386	return rc;
2387}
2388
2389static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
2390{
2391	struct ena_com_dev *ena_dev = adapter->ena_dev;
2392	int rc, i;
2393
2394	for (i = 0; i < adapter->num_io_queues; i++) {
2395		rc = ena_create_io_rx_queue(adapter, i);
2396		if (rc)
2397			goto create_err;
2398		INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
2399	}
2400
2401	return 0;
2402
2403create_err:
2404	while (i--) {
2405		cancel_work_sync(&adapter->ena_napi[i].dim.work);
2406		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
2407	}
2408
2409	return rc;
2410}
2411
2412static void set_io_rings_size(struct ena_adapter *adapter,
2413			      int new_tx_size,
2414			      int new_rx_size)
2415{
2416	int i;
2417
2418	for (i = 0; i < adapter->num_io_queues; i++) {
2419		adapter->tx_ring[i].ring_size = new_tx_size;
2420		adapter->rx_ring[i].ring_size = new_rx_size;
2421	}
2422}
2423
2424/* This function allows queue allocation to backoff when the system is
2425 * low on memory. If there is not enough memory to allocate io queues
2426 * the driver will try to allocate smaller queues.
2427 *
2428 * The backoff algorithm is as follows:
2429 *  1. Try to allocate TX and RX and if successful.
2430 *  1.1. return success
2431 *
2432 *  2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
2433 *
2434 *  3. If TX or RX is smaller than 256
2435 *  3.1. return failure.
2436 *  4. else
2437 *  4.1. go back to 1.
2438 */
2439static int create_queues_with_size_backoff(struct ena_adapter *adapter)
2440{
2441	int rc, cur_rx_ring_size, cur_tx_ring_size;
2442	int new_rx_ring_size, new_tx_ring_size;
2443
2444	/* current queue sizes might be set to smaller than the requested
2445	 * ones due to past queue allocation failures.
2446	 */
2447	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2448			  adapter->requested_rx_ring_size);
2449
2450	while (1) {
2451		if (ena_xdp_present(adapter)) {
2452			rc = ena_setup_and_create_all_xdp_queues(adapter);
2453
2454			if (rc)
2455				goto err_setup_tx;
2456		}
2457		rc = ena_setup_tx_resources_in_range(adapter,
2458						     0,
2459						     adapter->num_io_queues);
2460		if (rc)
2461			goto err_setup_tx;
2462
2463		rc = ena_create_io_tx_queues_in_range(adapter,
2464						      0,
2465						      adapter->num_io_queues);
2466		if (rc)
2467			goto err_create_tx_queues;
2468
2469		rc = ena_setup_all_rx_resources(adapter);
2470		if (rc)
2471			goto err_setup_rx;
2472
2473		rc = ena_create_all_io_rx_queues(adapter);
2474		if (rc)
2475			goto err_create_rx_queues;
2476
2477		return 0;
2478
2479err_create_rx_queues:
2480		ena_free_all_io_rx_resources(adapter);
2481err_setup_rx:
2482		ena_destroy_all_tx_queues(adapter);
2483err_create_tx_queues:
2484		ena_free_all_io_tx_resources(adapter);
2485err_setup_tx:
2486		if (rc != -ENOMEM) {
2487			netif_err(adapter, ifup, adapter->netdev,
2488				  "Queue creation failed with error code %d\n",
2489				  rc);
2490			return rc;
2491		}
2492
2493		cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2494		cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2495
2496		netif_err(adapter, ifup, adapter->netdev,
2497			  "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2498			  cur_tx_ring_size, cur_rx_ring_size);
2499
2500		new_tx_ring_size = cur_tx_ring_size;
2501		new_rx_ring_size = cur_rx_ring_size;
2502
2503		/* Decrease the size of the larger queue, or
2504		 * decrease both if they are the same size.
2505		 */
2506		if (cur_rx_ring_size <= cur_tx_ring_size)
2507			new_tx_ring_size = cur_tx_ring_size / 2;
2508		if (cur_rx_ring_size >= cur_tx_ring_size)
2509			new_rx_ring_size = cur_rx_ring_size / 2;
2510
2511		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2512		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
2513			netif_err(adapter, ifup, adapter->netdev,
2514				  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
2515				  ENA_MIN_RING_SIZE);
2516			return rc;
2517		}
2518
2519		netif_err(adapter, ifup, adapter->netdev,
2520			  "Retrying queue creation with sizes TX=%d, RX=%d\n",
2521			  new_tx_ring_size,
2522			  new_rx_ring_size);
2523
2524		set_io_rings_size(adapter, new_tx_ring_size,
2525				  new_rx_ring_size);
2526	}
2527}
2528
2529static int ena_up(struct ena_adapter *adapter)
2530{
2531	int io_queue_count, rc, i;
2532
2533	netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
2534
2535	io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2536	ena_setup_io_intr(adapter);
2537
2538	/* napi poll functions should be initialized before running
2539	 * request_irq(), to handle a rare condition where there is a pending
2540	 * interrupt, causing the ISR to fire immediately while the poll
2541	 * function wasn't set yet, causing a null dereference
2542	 */
2543	ena_init_napi_in_range(adapter, 0, io_queue_count);
2544
2545	rc = ena_request_io_irq(adapter);
2546	if (rc)
2547		goto err_req_irq;
2548
2549	rc = create_queues_with_size_backoff(adapter);
2550	if (rc)
2551		goto err_create_queues_with_backoff;
2552
2553	rc = ena_up_complete(adapter);
2554	if (rc)
2555		goto err_up;
2556
2557	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
2558		netif_carrier_on(adapter->netdev);
2559
2560	u64_stats_update_begin(&adapter->syncp);
2561	adapter->dev_stats.interface_up++;
2562	u64_stats_update_end(&adapter->syncp);
2563
2564	set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2565
2566	/* Enable completion queues interrupt */
2567	for (i = 0; i < adapter->num_io_queues; i++)
2568		ena_unmask_interrupt(&adapter->tx_ring[i],
2569				     &adapter->rx_ring[i]);
2570
2571	/* schedule napi in case we had pending packets
2572	 * from the last time we disable napi
2573	 */
2574	for (i = 0; i < io_queue_count; i++)
2575		napi_schedule(&adapter->ena_napi[i].napi);
2576
2577	return rc;
2578
2579err_up:
2580	ena_destroy_all_tx_queues(adapter);
2581	ena_free_all_io_tx_resources(adapter);
2582	ena_destroy_all_rx_queues(adapter);
2583	ena_free_all_io_rx_resources(adapter);
2584err_create_queues_with_backoff:
2585	ena_free_io_irq(adapter);
2586err_req_irq:
2587	ena_del_napi_in_range(adapter, 0, io_queue_count);
2588
2589	return rc;
2590}
2591
2592static void ena_down(struct ena_adapter *adapter)
2593{
2594	int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2595
2596	netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
2597
2598	clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2599
2600	u64_stats_update_begin(&adapter->syncp);
2601	adapter->dev_stats.interface_down++;
2602	u64_stats_update_end(&adapter->syncp);
2603
2604	netif_carrier_off(adapter->netdev);
2605	netif_tx_disable(adapter->netdev);
2606
2607	/* After this point the napi handler won't enable the tx queue */
2608	ena_napi_disable_in_range(adapter, 0, io_queue_count);
2609
2610	/* After destroy the queue there won't be any new interrupts */
2611
2612	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
2613		int rc;
2614
2615		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2616		if (rc)
2617			netif_err(adapter, ifdown, adapter->netdev,
2618				  "Device reset failed\n");
2619		/* stop submitting admin commands on a device that was reset */
2620		ena_com_set_admin_running_state(adapter->ena_dev, false);
2621	}
2622
2623	ena_destroy_all_io_queues(adapter);
2624
2625	ena_disable_io_intr_sync(adapter);
2626	ena_free_io_irq(adapter);
2627	ena_del_napi_in_range(adapter, 0, io_queue_count);
2628
2629	ena_free_all_tx_bufs(adapter);
2630	ena_free_all_rx_bufs(adapter);
2631	ena_free_all_io_tx_resources(adapter);
2632	ena_free_all_io_rx_resources(adapter);
2633}
2634
2635/* ena_open - Called when a network interface is made active
2636 * @netdev: network interface device structure
2637 *
2638 * Returns 0 on success, negative value on failure
2639 *
2640 * The open entry point is called when a network interface is made
2641 * active by the system (IFF_UP).  At this point all resources needed
2642 * for transmit and receive operations are allocated, the interrupt
2643 * handler is registered with the OS, the watchdog timer is started,
2644 * and the stack is notified that the interface is ready.
2645 */
2646static int ena_open(struct net_device *netdev)
2647{
2648	struct ena_adapter *adapter = netdev_priv(netdev);
2649	int rc;
2650
2651	/* Notify the stack of the actual queue counts. */
2652	rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
2653	if (rc) {
2654		netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
2655		return rc;
2656	}
2657
2658	rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
2659	if (rc) {
2660		netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
2661		return rc;
2662	}
2663
2664	rc = ena_up(adapter);
2665	if (rc)
2666		return rc;
2667
2668	return rc;
2669}
2670
2671/* ena_close - Disables a network interface
2672 * @netdev: network interface device structure
2673 *
2674 * Returns 0, this is not allowed to fail
2675 *
2676 * The close entry point is called when an interface is de-activated
2677 * by the OS.  The hardware is still under the drivers control, but
2678 * needs to be disabled.  A global MAC reset is issued to stop the
2679 * hardware, and all transmit and receive resources are freed.
2680 */
2681static int ena_close(struct net_device *netdev)
2682{
2683	struct ena_adapter *adapter = netdev_priv(netdev);
2684
2685	netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
2686
2687	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
2688		return 0;
2689
2690	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2691		ena_down(adapter);
2692
2693	/* Check for device status and issue reset if needed*/
2694	check_for_admin_com_state(adapter);
2695	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2696		netif_err(adapter, ifdown, adapter->netdev,
2697			  "Destroy failure, restarting device\n");
2698		ena_dump_stats_to_dmesg(adapter);
2699		/* rtnl lock already obtained in dev_ioctl() layer */
2700		ena_destroy_device(adapter, false);
2701		ena_restore_device(adapter);
2702	}
2703
2704	return 0;
2705}
2706
2707int ena_update_queue_sizes(struct ena_adapter *adapter,
2708			   u32 new_tx_size,
2709			   u32 new_rx_size)
2710{
2711	bool dev_was_up;
2712
2713	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2714	ena_close(adapter->netdev);
2715	adapter->requested_tx_ring_size = new_tx_size;
2716	adapter->requested_rx_ring_size = new_rx_size;
2717	ena_init_io_rings(adapter,
2718			  0,
2719			  adapter->xdp_num_queues +
2720			  adapter->num_io_queues);
2721	return dev_was_up ? ena_up(adapter) : 0;
2722}
2723
2724int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
2725{
2726	struct ena_com_dev *ena_dev = adapter->ena_dev;
2727	int prev_channel_count;
2728	bool dev_was_up;
2729
2730	dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2731	ena_close(adapter->netdev);
2732	prev_channel_count = adapter->num_io_queues;
2733	adapter->num_io_queues = new_channel_count;
2734	if (ena_xdp_present(adapter) &&
2735	    ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
2736		adapter->xdp_first_ring = new_channel_count;
2737		adapter->xdp_num_queues = new_channel_count;
2738		if (prev_channel_count > new_channel_count)
2739			ena_xdp_exchange_program_rx_in_range(adapter,
2740							     NULL,
2741							     new_channel_count,
2742							     prev_channel_count);
2743		else
2744			ena_xdp_exchange_program_rx_in_range(adapter,
2745							     adapter->xdp_bpf_prog,
2746							     prev_channel_count,
2747							     new_channel_count);
2748	}
2749
2750	/* We need to destroy the rss table so that the indirection
2751	 * table will be reinitialized by ena_up()
2752	 */
2753	ena_com_rss_destroy(ena_dev);
2754	ena_init_io_rings(adapter,
2755			  0,
2756			  adapter->xdp_num_queues +
2757			  adapter->num_io_queues);
2758	return dev_was_up ? ena_open(adapter->netdev) : 0;
2759}
2760
2761static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx,
2762			struct sk_buff *skb,
2763			bool disable_meta_caching)
2764{
2765	u32 mss = skb_shinfo(skb)->gso_size;
2766	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
2767	u8 l4_protocol = 0;
2768
2769	if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
2770		ena_tx_ctx->l4_csum_enable = 1;
2771		if (mss) {
2772			ena_tx_ctx->tso_enable = 1;
2773			ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
2774			ena_tx_ctx->l4_csum_partial = 0;
2775		} else {
2776			ena_tx_ctx->tso_enable = 0;
2777			ena_meta->l4_hdr_len = 0;
2778			ena_tx_ctx->l4_csum_partial = 1;
2779		}
2780
2781		switch (ip_hdr(skb)->version) {
2782		case IPVERSION:
2783			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2784			if (ip_hdr(skb)->frag_off & htons(IP_DF))
2785				ena_tx_ctx->df = 1;
2786			if (mss)
2787				ena_tx_ctx->l3_csum_enable = 1;
2788			l4_protocol = ip_hdr(skb)->protocol;
2789			break;
2790		case 6:
2791			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2792			l4_protocol = ipv6_hdr(skb)->nexthdr;
2793			break;
2794		default:
2795			break;
2796		}
2797
2798		if (l4_protocol == IPPROTO_TCP)
2799			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2800		else
2801			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2802
2803		ena_meta->mss = mss;
2804		ena_meta->l3_hdr_len = skb_network_header_len(skb);
2805		ena_meta->l3_hdr_offset = skb_network_offset(skb);
2806		ena_tx_ctx->meta_valid = 1;
2807	} else if (disable_meta_caching) {
2808		memset(ena_meta, 0, sizeof(*ena_meta));
2809		ena_tx_ctx->meta_valid = 1;
2810	} else {
2811		ena_tx_ctx->meta_valid = 0;
2812	}
2813}
2814
2815static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
2816				       struct sk_buff *skb)
2817{
2818	int num_frags, header_len, rc;
2819
2820	num_frags = skb_shinfo(skb)->nr_frags;
2821	header_len = skb_headlen(skb);
2822
2823	if (num_frags < tx_ring->sgl_size)
2824		return 0;
2825
2826	if ((num_frags == tx_ring->sgl_size) &&
2827	    (header_len < tx_ring->tx_max_header_size))
2828		return 0;
2829
2830	u64_stats_update_begin(&tx_ring->syncp);
2831	tx_ring->tx_stats.linearize++;
2832	u64_stats_update_end(&tx_ring->syncp);
2833
2834	rc = skb_linearize(skb);
2835	if (unlikely(rc)) {
2836		u64_stats_update_begin(&tx_ring->syncp);
2837		tx_ring->tx_stats.linearize_failed++;
2838		u64_stats_update_end(&tx_ring->syncp);
2839	}
2840
2841	return rc;
2842}
2843
2844static int ena_tx_map_skb(struct ena_ring *tx_ring,
2845			  struct ena_tx_buffer *tx_info,
2846			  struct sk_buff *skb,
2847			  void **push_hdr,
2848			  u16 *header_len)
2849{
2850	struct ena_adapter *adapter = tx_ring->adapter;
2851	struct ena_com_buf *ena_buf;
2852	dma_addr_t dma;
2853	u32 skb_head_len, frag_len, last_frag;
2854	u16 push_len = 0;
2855	u16 delta = 0;
2856	int i = 0;
2857
2858	skb_head_len = skb_headlen(skb);
2859	tx_info->skb = skb;
2860	ena_buf = tx_info->bufs;
2861
2862	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2863		/* When the device is LLQ mode, the driver will copy
2864		 * the header into the device memory space.
2865		 * the ena_com layer assume the header is in a linear
2866		 * memory space.
2867		 * This assumption might be wrong since part of the header
2868		 * can be in the fragmented buffers.
2869		 * Use skb_header_pointer to make sure the header is in a
2870		 * linear memory space.
2871		 */
2872
2873		push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
2874		*push_hdr = skb_header_pointer(skb, 0, push_len,
2875					       tx_ring->push_buf_intermediate_buf);
2876		*header_len = push_len;
2877		if (unlikely(skb->data != *push_hdr)) {
2878			u64_stats_update_begin(&tx_ring->syncp);
2879			tx_ring->tx_stats.llq_buffer_copy++;
2880			u64_stats_update_end(&tx_ring->syncp);
2881
2882			delta = push_len - skb_head_len;
2883		}
2884	} else {
2885		*push_hdr = NULL;
2886		*header_len = min_t(u32, skb_head_len,
2887				    tx_ring->tx_max_header_size);
2888	}
2889
2890	netif_dbg(adapter, tx_queued, adapter->netdev,
2891		  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2892		  *push_hdr, push_len);
2893
2894	if (skb_head_len > push_len) {
2895		dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2896				     skb_head_len - push_len, DMA_TO_DEVICE);
2897		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2898			goto error_report_dma_error;
2899
2900		ena_buf->paddr = dma;
2901		ena_buf->len = skb_head_len - push_len;
2902
2903		ena_buf++;
2904		tx_info->num_of_bufs++;
2905		tx_info->map_linear_data = 1;
2906	} else {
2907		tx_info->map_linear_data = 0;
2908	}
2909
2910	last_frag = skb_shinfo(skb)->nr_frags;
2911
2912	for (i = 0; i < last_frag; i++) {
2913		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2914
2915		frag_len = skb_frag_size(frag);
2916
2917		if (unlikely(delta >= frag_len)) {
2918			delta -= frag_len;
2919			continue;
2920		}
2921
2922		dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
2923				       frag_len - delta, DMA_TO_DEVICE);
2924		if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2925			goto error_report_dma_error;
2926
2927		ena_buf->paddr = dma;
2928		ena_buf->len = frag_len - delta;
2929		ena_buf++;
2930		tx_info->num_of_bufs++;
2931		delta = 0;
2932	}
2933
2934	return 0;
2935
2936error_report_dma_error:
2937	u64_stats_update_begin(&tx_ring->syncp);
2938	tx_ring->tx_stats.dma_mapping_err++;
2939	u64_stats_update_end(&tx_ring->syncp);
2940	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map skb\n");
2941
2942	tx_info->skb = NULL;
2943
2944	tx_info->num_of_bufs += i;
2945	ena_unmap_tx_buff(tx_ring, tx_info);
2946
2947	return -EINVAL;
2948}
2949
2950/* Called with netif_tx_lock. */
2951static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
2952{
2953	struct ena_adapter *adapter = netdev_priv(dev);
2954	struct ena_tx_buffer *tx_info;
2955	struct ena_com_tx_ctx ena_tx_ctx;
2956	struct ena_ring *tx_ring;
2957	struct netdev_queue *txq;
2958	void *push_hdr;
2959	u16 next_to_use, req_id, header_len;
2960	int qid, rc;
2961
2962	netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
2963	/*  Determine which tx ring we will be placed on */
2964	qid = skb_get_queue_mapping(skb);
2965	tx_ring = &adapter->tx_ring[qid];
2966	txq = netdev_get_tx_queue(dev, qid);
2967
2968	rc = ena_check_and_linearize_skb(tx_ring, skb);
2969	if (unlikely(rc))
2970		goto error_drop_packet;
2971
2972	skb_tx_timestamp(skb);
2973
2974	next_to_use = tx_ring->next_to_use;
2975	req_id = tx_ring->free_ids[next_to_use];
2976	tx_info = &tx_ring->tx_buffer_info[req_id];
2977	tx_info->num_of_bufs = 0;
2978
2979	WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
2980
2981	rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
2982	if (unlikely(rc))
2983		goto error_drop_packet;
2984
2985	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
2986	ena_tx_ctx.ena_bufs = tx_info->bufs;
2987	ena_tx_ctx.push_header = push_hdr;
2988	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2989	ena_tx_ctx.req_id = req_id;
2990	ena_tx_ctx.header_len = header_len;
2991
2992	/* set flags and meta data */
2993	ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
2994
2995	rc = ena_xmit_common(dev,
2996			     tx_ring,
2997			     tx_info,
2998			     &ena_tx_ctx,
2999			     next_to_use,
3000			     skb->len);
3001	if (rc)
3002		goto error_unmap_dma;
3003
3004	netdev_tx_sent_queue(txq, skb->len);
3005
3006	/* stop the queue when no more space available, the packet can have up
3007	 * to sgl_size + 2. one for the meta descriptor and one for header
3008	 * (if the header is larger than tx_max_header_size).
3009	 */
3010	if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3011						   tx_ring->sgl_size + 2))) {
3012		netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
3013			  __func__, qid);
3014
3015		netif_tx_stop_queue(txq);
3016		u64_stats_update_begin(&tx_ring->syncp);
3017		tx_ring->tx_stats.queue_stop++;
3018		u64_stats_update_end(&tx_ring->syncp);
3019
3020		/* There is a rare condition where this function decide to
3021		 * stop the queue but meanwhile clean_tx_irq updates
3022		 * next_to_completion and terminates.
3023		 * The queue will remain stopped forever.
3024		 * To solve this issue add a mb() to make sure that
3025		 * netif_tx_stop_queue() write is vissible before checking if
3026		 * there is additional space in the queue.
3027		 */
3028		smp_mb();
3029
3030		if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3031						 ENA_TX_WAKEUP_THRESH)) {
3032			netif_tx_wake_queue(txq);
3033			u64_stats_update_begin(&tx_ring->syncp);
3034			tx_ring->tx_stats.queue_wakeup++;
3035			u64_stats_update_end(&tx_ring->syncp);
3036		}
3037	}
3038
3039	if (netif_xmit_stopped(txq) || !netdev_xmit_more()) {
3040		/* trigger the dma engine. ena_com_write_sq_doorbell()
3041		 * has a mb
3042		 */
3043		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
3044		u64_stats_update_begin(&tx_ring->syncp);
3045		tx_ring->tx_stats.doorbells++;
3046		u64_stats_update_end(&tx_ring->syncp);
3047	}
3048
3049	return NETDEV_TX_OK;
3050
3051error_unmap_dma:
3052	ena_unmap_tx_buff(tx_ring, tx_info);
3053	tx_info->skb = NULL;
3054
3055error_drop_packet:
3056	dev_kfree_skb(skb);
3057	return NETDEV_TX_OK;
3058}
3059
3060static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
3061			    struct net_device *sb_dev)
3062{
3063	u16 qid;
3064	/* we suspect that this is good for in--kernel network services that
3065	 * want to loop incoming skb rx to tx in normal user generated traffic,
3066	 * most probably we will not get to this
3067	 */
3068	if (skb_rx_queue_recorded(skb))
3069		qid = skb_get_rx_queue(skb);
3070	else
3071		qid = netdev_pick_tx(dev, skb, NULL);
3072
3073	return qid;
3074}
3075
3076static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
3077{
3078	struct device *dev = &pdev->dev;
3079	struct ena_admin_host_info *host_info;
3080	int rc;
3081
3082	/* Allocate only the host info */
3083	rc = ena_com_allocate_host_info(ena_dev);
3084	if (rc) {
3085		dev_err(dev, "Cannot allocate host info\n");
3086		return;
3087	}
3088
3089	host_info = ena_dev->host_attr.host_info;
3090
3091	host_info->bdf = (pdev->bus->number << 8) | pdev->devfn;
3092	host_info->os_type = ENA_ADMIN_OS_LINUX;
3093	host_info->kernel_ver = LINUX_VERSION_CODE;
3094	strlcpy(host_info->kernel_ver_str, utsname()->version,
3095		sizeof(host_info->kernel_ver_str) - 1);
3096	host_info->os_dist = 0;
3097	strncpy(host_info->os_dist_str, utsname()->release,
3098		sizeof(host_info->os_dist_str) - 1);
3099	host_info->driver_version =
3100		(DRV_MODULE_GEN_MAJOR) |
3101		(DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
3102		(DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
3103		("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
3104	host_info->num_cpus = num_online_cpus();
3105
3106	host_info->driver_supported_features =
3107		ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
3108		ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK |
3109		ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK |
3110		ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
3111
3112	rc = ena_com_set_host_attributes(ena_dev);
3113	if (rc) {
3114		if (rc == -EOPNOTSUPP)
3115			dev_warn(dev, "Cannot set host attributes\n");
3116		else
3117			dev_err(dev, "Cannot set host attributes\n");
3118
3119		goto err;
3120	}
3121
3122	return;
3123
3124err:
3125	ena_com_delete_host_info(ena_dev);
3126}
3127
3128static void ena_config_debug_area(struct ena_adapter *adapter)
3129{
3130	u32 debug_area_size;
3131	int rc, ss_count;
3132
3133	ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
3134	if (ss_count <= 0) {
3135		netif_err(adapter, drv, adapter->netdev,
3136			  "SS count is negative\n");
3137		return;
3138	}
3139
3140	/* allocate 32 bytes for each string and 64bit for the value */
3141	debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
3142
3143	rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
3144	if (rc) {
3145		netif_err(adapter, drv, adapter->netdev,
3146			  "Cannot allocate debug area\n");
3147		return;
3148	}
3149
3150	rc = ena_com_set_host_attributes(adapter->ena_dev);
3151	if (rc) {
3152		if (rc == -EOPNOTSUPP)
3153			netif_warn(adapter, drv, adapter->netdev,
3154				   "Cannot set host attributes\n");
3155		else
3156			netif_err(adapter, drv, adapter->netdev,
3157				  "Cannot set host attributes\n");
3158		goto err;
3159	}
3160
3161	return;
3162err:
3163	ena_com_delete_debug_area(adapter->ena_dev);
3164}
3165
3166int ena_update_hw_stats(struct ena_adapter *adapter)
3167{
3168	int rc = 0;
3169
3170	rc = ena_com_get_eni_stats(adapter->ena_dev, &adapter->eni_stats);
3171	if (rc) {
3172		dev_info_once(&adapter->pdev->dev, "Failed to get ENI stats\n");
3173		return rc;
3174	}
3175
3176	return 0;
3177}
3178
3179static void ena_get_stats64(struct net_device *netdev,
3180			    struct rtnl_link_stats64 *stats)
3181{
3182	struct ena_adapter *adapter = netdev_priv(netdev);
3183	struct ena_ring *rx_ring, *tx_ring;
3184	unsigned int start;
3185	u64 rx_drops;
3186	u64 tx_drops;
3187	int i;
3188
3189	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3190		return;
3191
3192	for (i = 0; i < adapter->num_io_queues; i++) {
3193		u64 bytes, packets;
3194
3195		tx_ring = &adapter->tx_ring[i];
3196
3197		do {
3198			start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
3199			packets = tx_ring->tx_stats.cnt;
3200			bytes = tx_ring->tx_stats.bytes;
3201		} while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
3202
3203		stats->tx_packets += packets;
3204		stats->tx_bytes += bytes;
3205
3206		rx_ring = &adapter->rx_ring[i];
3207
3208		do {
3209			start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
3210			packets = rx_ring->rx_stats.cnt;
3211			bytes = rx_ring->rx_stats.bytes;
3212		} while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
3213
3214		stats->rx_packets += packets;
3215		stats->rx_bytes += bytes;
3216	}
3217
3218	do {
3219		start = u64_stats_fetch_begin_irq(&adapter->syncp);
3220		rx_drops = adapter->dev_stats.rx_drops;
3221		tx_drops = adapter->dev_stats.tx_drops;
3222	} while (u64_stats_fetch_retry_irq(&adapter->syncp, start));
3223
3224	stats->rx_dropped = rx_drops;
3225	stats->tx_dropped = tx_drops;
3226
3227	stats->multicast = 0;
3228	stats->collisions = 0;
3229
3230	stats->rx_length_errors = 0;
3231	stats->rx_crc_errors = 0;
3232	stats->rx_frame_errors = 0;
3233	stats->rx_fifo_errors = 0;
3234	stats->rx_missed_errors = 0;
3235	stats->tx_window_errors = 0;
3236
3237	stats->rx_errors = 0;
3238	stats->tx_errors = 0;
3239}
3240
3241static const struct net_device_ops ena_netdev_ops = {
3242	.ndo_open		= ena_open,
3243	.ndo_stop		= ena_close,
3244	.ndo_start_xmit		= ena_start_xmit,
3245	.ndo_select_queue	= ena_select_queue,
3246	.ndo_get_stats64	= ena_get_stats64,
3247	.ndo_tx_timeout		= ena_tx_timeout,
3248	.ndo_change_mtu		= ena_change_mtu,
3249	.ndo_set_mac_address	= NULL,
3250	.ndo_validate_addr	= eth_validate_addr,
3251	.ndo_bpf		= ena_xdp,
3252};
3253
3254static int ena_device_validate_params(struct ena_adapter *adapter,
3255				      struct ena_com_dev_get_features_ctx *get_feat_ctx)
3256{
3257	struct net_device *netdev = adapter->netdev;
3258	int rc;
3259
3260	rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
3261			      adapter->mac_addr);
3262	if (!rc) {
3263		netif_err(adapter, drv, netdev,
3264			  "Error, mac address are different\n");
3265		return -EINVAL;
3266	}
3267
3268	if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
3269		netif_err(adapter, drv, netdev,
3270			  "Error, device max mtu is smaller than netdev MTU\n");
3271		return -EINVAL;
3272	}
3273
3274	return 0;
3275}
3276
3277static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
3278{
3279	llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
3280	llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
3281	llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
3282	llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
3283	llq_config->llq_ring_entry_size_value = 128;
3284}
3285
3286static int ena_set_queues_placement_policy(struct pci_dev *pdev,
3287					   struct ena_com_dev *ena_dev,
3288					   struct ena_admin_feature_llq_desc *llq,
3289					   struct ena_llq_configurations *llq_default_configurations)
3290{
3291	int rc;
3292	u32 llq_feature_mask;
3293
3294	llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3295	if (!(ena_dev->supported_features & llq_feature_mask)) {
3296		dev_err(&pdev->dev,
3297			"LLQ is not supported Fallback to host mode policy.\n");
3298		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3299		return 0;
3300	}
3301
3302	rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3303	if (unlikely(rc)) {
3304		dev_err(&pdev->dev,
3305			"Failed to configure the device mode.  Fallback to host mode policy.\n");
3306		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3307	}
3308
3309	return 0;
3310}
3311
3312static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
3313			       int bars)
3314{
3315	bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR));
3316
3317	if (!has_mem_bar) {
3318		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
3319			dev_err(&pdev->dev,
3320				"ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
3321			ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3322		}
3323
3324		return 0;
3325	}
3326
3327	ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3328					   pci_resource_start(pdev, ENA_MEM_BAR),
3329					   pci_resource_len(pdev, ENA_MEM_BAR));
3330
3331	if (!ena_dev->mem_bar)
3332		return -EFAULT;
3333
3334	return 0;
3335}
3336
3337static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
3338			   struct ena_com_dev_get_features_ctx *get_feat_ctx,
3339			   bool *wd_state)
3340{
3341	struct ena_llq_configurations llq_config;
3342	struct device *dev = &pdev->dev;
3343	bool readless_supported;
3344	u32 aenq_groups;
3345	int dma_width;
3346	int rc;
3347
3348	rc = ena_com_mmio_reg_read_request_init(ena_dev);
3349	if (rc) {
3350		dev_err(dev, "Failed to init mmio read less\n");
3351		return rc;
3352	}
3353
3354	/* The PCIe configuration space revision id indicate if mmio reg
3355	 * read is disabled
3356	 */
3357	readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
3358	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3359
3360	rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3361	if (rc) {
3362		dev_err(dev, "Can not reset device\n");
3363		goto err_mmio_read_less;
3364	}
3365
3366	rc = ena_com_validate_version(ena_dev);
3367	if (rc) {
3368		dev_err(dev, "Device version is too low\n");
3369		goto err_mmio_read_less;
3370	}
3371
3372	dma_width = ena_com_get_dma_width(ena_dev);
3373	if (dma_width < 0) {
3374		dev_err(dev, "Invalid dma width value %d", dma_width);
3375		rc = dma_width;
3376		goto err_mmio_read_less;
3377	}
3378
3379	rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width));
3380	if (rc) {
3381		dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc);
3382		goto err_mmio_read_less;
3383	}
3384
3385	/* ENA admin level init */
3386	rc = ena_com_admin_init(ena_dev, &aenq_handlers);
3387	if (rc) {
3388		dev_err(dev,
3389			"Can not initialize ena admin queue with device\n");
3390		goto err_mmio_read_less;
3391	}
3392
3393	/* To enable the msix interrupts the driver needs to know the number
3394	 * of queues. So the driver uses polling mode to retrieve this
3395	 * information
3396	 */
3397	ena_com_set_admin_polling_mode(ena_dev, true);
3398
3399	ena_config_host_info(ena_dev, pdev);
3400
3401	/* Get Device Attributes*/
3402	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3403	if (rc) {
3404		dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
3405		goto err_admin_init;
3406	}
3407
3408	/* Try to turn all the available aenq groups */
3409	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
3410		BIT(ENA_ADMIN_FATAL_ERROR) |
3411		BIT(ENA_ADMIN_WARNING) |
3412		BIT(ENA_ADMIN_NOTIFICATION) |
3413		BIT(ENA_ADMIN_KEEP_ALIVE);
3414
3415	aenq_groups &= get_feat_ctx->aenq.supported_groups;
3416
3417	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3418	if (rc) {
3419		dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
3420		goto err_admin_init;
3421	}
3422
3423	*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3424
3425	set_default_llq_configurations(&llq_config);
3426
3427	rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
3428					     &llq_config);
3429	if (rc) {
3430		dev_err(dev, "ENA device init failed\n");
3431		goto err_admin_init;
3432	}
3433
3434	return 0;
3435
3436err_admin_init:
3437	ena_com_delete_host_info(ena_dev);
3438	ena_com_admin_destroy(ena_dev);
3439err_mmio_read_less:
3440	ena_com_mmio_reg_read_request_destroy(ena_dev);
3441
3442	return rc;
3443}
3444
3445static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
3446{
3447	struct ena_com_dev *ena_dev = adapter->ena_dev;
3448	struct device *dev = &adapter->pdev->dev;
3449	int rc;
3450
3451	rc = ena_enable_msix(adapter);
3452	if (rc) {
3453		dev_err(dev, "Can not reserve msix vectors\n");
3454		return rc;
3455	}
3456
3457	ena_setup_mgmnt_intr(adapter);
3458
3459	rc = ena_request_mgmnt_irq(adapter);
3460	if (rc) {
3461		dev_err(dev, "Can not setup management interrupts\n");
3462		goto err_disable_msix;
3463	}
3464
3465	ena_com_set_admin_polling_mode(ena_dev, false);
3466
3467	ena_com_admin_aenq_enable(ena_dev);
3468
3469	return 0;
3470
3471err_disable_msix:
3472	ena_disable_msix(adapter);
3473
3474	return rc;
3475}
3476
3477static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3478{
3479	struct net_device *netdev = adapter->netdev;
3480	struct ena_com_dev *ena_dev = adapter->ena_dev;
3481	bool dev_up;
3482
3483	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
3484		return;
3485
3486	netif_carrier_off(netdev);
3487
3488	del_timer_sync(&adapter->timer_service);
3489
3490	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
3491	adapter->dev_up_before_reset = dev_up;
3492	if (!graceful)
3493		ena_com_set_admin_running_state(ena_dev, false);
3494
3495	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3496		ena_down(adapter);
3497
3498	/* Stop the device from sending AENQ events (in case reset flag is set
3499	 *  and device is up, ena_down() already reset the device.
3500	 */
3501	if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
3502		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3503
3504	ena_free_mgmnt_irq(adapter);
3505
3506	ena_disable_msix(adapter);
3507
3508	ena_com_abort_admin_commands(ena_dev);
3509
3510	ena_com_wait_for_abort_completion(ena_dev);
3511
3512	ena_com_admin_destroy(ena_dev);
3513
3514	ena_com_mmio_reg_read_request_destroy(ena_dev);
3515
3516	/* return reset reason to default value */
3517	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3518
3519	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3520	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3521}
3522
3523static int ena_restore_device(struct ena_adapter *adapter)
3524{
3525	struct ena_com_dev_get_features_ctx get_feat_ctx;
3526	struct ena_com_dev *ena_dev = adapter->ena_dev;
3527	struct pci_dev *pdev = adapter->pdev;
3528	bool wd_state;
3529	int rc;
3530
3531	set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3532	rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
3533	if (rc) {
3534		dev_err(&pdev->dev, "Can not initialize device\n");
3535		goto err;
3536	}
3537	adapter->wd_state = wd_state;
3538
3539	rc = ena_device_validate_params(adapter, &get_feat_ctx);
3540	if (rc) {
3541		dev_err(&pdev->dev, "Validation of device parameters failed\n");
3542		goto err_device_destroy;
3543	}
3544
3545	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3546	if (rc) {
3547		dev_err(&pdev->dev, "Enable MSI-X failed\n");
3548		goto err_device_destroy;
3549	}
3550	/* If the interface was up before the reset bring it up */
3551	if (adapter->dev_up_before_reset) {
3552		rc = ena_up(adapter);
3553		if (rc) {
3554			dev_err(&pdev->dev, "Failed to create I/O queues\n");
3555			goto err_disable_msix;
3556		}
3557	}
3558
3559	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3560
3561	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3562	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
3563		netif_carrier_on(adapter->netdev);
3564
3565	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3566	adapter->last_keep_alive_jiffies = jiffies;
3567
3568	dev_err(&pdev->dev, "Device reset completed successfully\n");
3569
3570	return rc;
3571err_disable_msix:
3572	ena_free_mgmnt_irq(adapter);
3573	ena_disable_msix(adapter);
3574err_device_destroy:
3575	ena_com_abort_admin_commands(ena_dev);
3576	ena_com_wait_for_abort_completion(ena_dev);
3577	ena_com_admin_destroy(ena_dev);
3578	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3579	ena_com_mmio_reg_read_request_destroy(ena_dev);
3580err:
3581	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3582	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3583	dev_err(&pdev->dev,
3584		"Reset attempt failed. Can not reset the device\n");
3585
3586	return rc;
3587}
3588
3589static void ena_fw_reset_device(struct work_struct *work)
3590{
3591	struct ena_adapter *adapter =
3592		container_of(work, struct ena_adapter, reset_task);
3593
3594	rtnl_lock();
3595
3596	if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3597		ena_destroy_device(adapter, false);
3598		ena_restore_device(adapter);
3599	}
3600
3601	rtnl_unlock();
3602}
3603
3604static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3605					struct ena_ring *rx_ring)
3606{
3607	if (likely(rx_ring->first_interrupt))
3608		return 0;
3609
3610	if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3611		return 0;
3612
3613	rx_ring->no_interrupt_event_cnt++;
3614
3615	if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3616		netif_err(adapter, rx_err, adapter->netdev,
3617			  "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
3618			  rx_ring->qid);
3619		adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
3620		smp_mb__before_atomic();
3621		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3622		return -EIO;
3623	}
3624
3625	return 0;
3626}
3627
3628static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3629					  struct ena_ring *tx_ring)
3630{
3631	struct ena_tx_buffer *tx_buf;
3632	unsigned long last_jiffies;
3633	u32 missed_tx = 0;
3634	int i, rc = 0;
3635
3636	for (i = 0; i < tx_ring->ring_size; i++) {
3637		tx_buf = &tx_ring->tx_buffer_info[i];
3638		last_jiffies = tx_buf->last_jiffies;
3639
3640		if (last_jiffies == 0)
3641			/* no pending Tx at this location */
3642			continue;
3643
3644		if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies +
3645			     2 * adapter->missing_tx_completion_to))) {
3646			/* If after graceful period interrupt is still not
3647			 * received, we schedule a reset
3648			 */
3649			netif_err(adapter, tx_err, adapter->netdev,
3650				  "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
3651				  tx_ring->qid);
3652			adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
3653			smp_mb__before_atomic();
3654			set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3655			return -EIO;
3656		}
3657
3658		if (unlikely(time_is_before_jiffies(last_jiffies +
3659				adapter->missing_tx_completion_to))) {
3660			if (!tx_buf->print_once)
3661				netif_notice(adapter, tx_err, adapter->netdev,
3662					     "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
3663					     tx_ring->qid, i);
3664
3665			tx_buf->print_once = 1;
3666			missed_tx++;
3667		}
3668	}
3669
3670	if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
3671		netif_err(adapter, tx_err, adapter->netdev,
3672			  "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
3673			  missed_tx,
3674			  adapter->missing_tx_completion_threshold);
3675		adapter->reset_reason =
3676			ENA_REGS_RESET_MISS_TX_CMPL;
3677		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3678		rc = -EIO;
3679	}
3680
3681	u64_stats_update_begin(&tx_ring->syncp);
3682	tx_ring->tx_stats.missed_tx += missed_tx;
3683	u64_stats_update_end(&tx_ring->syncp);
3684
3685	return rc;
3686}
3687
3688static void check_for_missing_completions(struct ena_adapter *adapter)
3689{
3690	struct ena_ring *tx_ring;
3691	struct ena_ring *rx_ring;
3692	int i, budget, rc;
3693	int io_queue_count;
3694
3695	io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
3696	/* Make sure the driver doesn't turn the device in other process */
3697	smp_rmb();
3698
3699	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3700		return;
3701
3702	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3703		return;
3704
3705	if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
3706		return;
3707
3708	budget = ENA_MONITORED_TX_QUEUES;
3709
3710	for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
3711		tx_ring = &adapter->tx_ring[i];
3712		rx_ring = &adapter->rx_ring[i];
3713
3714		rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3715		if (unlikely(rc))
3716			return;
3717
3718		rc =  !ENA_IS_XDP_INDEX(adapter, i) ?
3719			check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
3720		if (unlikely(rc))
3721			return;
3722
3723		budget--;
3724		if (!budget)
3725			break;
3726	}
3727
3728	adapter->last_monitored_tx_qid = i % io_queue_count;
3729}
3730
3731/* trigger napi schedule after 2 consecutive detections */
3732#define EMPTY_RX_REFILL 2
3733/* For the rare case where the device runs out of Rx descriptors and the
3734 * napi handler failed to refill new Rx descriptors (due to a lack of memory
3735 * for example).
3736 * This case will lead to a deadlock:
3737 * The device won't send interrupts since all the new Rx packets will be dropped
3738 * The napi handler won't allocate new Rx descriptors so the device will be
3739 * able to send new packets.
3740 *
3741 * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
3742 * It is recommended to have at least 512MB, with a minimum of 128MB for
3743 * constrained environment).
3744 *
3745 * When such a situation is detected - Reschedule napi
3746 */
3747static void check_for_empty_rx_ring(struct ena_adapter *adapter)
3748{
3749	struct ena_ring *rx_ring;
3750	int i, refill_required;
3751
3752	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3753		return;
3754
3755	if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3756		return;
3757
3758	for (i = 0; i < adapter->num_io_queues; i++) {
3759		rx_ring = &adapter->rx_ring[i];
3760
3761		refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3762		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3763			rx_ring->empty_rx_queue++;
3764
3765			if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3766				u64_stats_update_begin(&rx_ring->syncp);
3767				rx_ring->rx_stats.empty_rx_ring++;
3768				u64_stats_update_end(&rx_ring->syncp);
3769
3770				netif_err(adapter, drv, adapter->netdev,
3771					  "Trigger refill for ring %d\n", i);
3772
3773				napi_schedule(rx_ring->napi);
3774				rx_ring->empty_rx_queue = 0;
3775			}
3776		} else {
3777			rx_ring->empty_rx_queue = 0;
3778		}
3779	}
3780}
3781
3782/* Check for keep alive expiration */
3783static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3784{
3785	unsigned long keep_alive_expired;
3786
3787	if (!adapter->wd_state)
3788		return;
3789
3790	if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3791		return;
3792
3793	keep_alive_expired = adapter->last_keep_alive_jiffies +
3794			     adapter->keep_alive_timeout;
3795	if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
3796		netif_err(adapter, drv, adapter->netdev,
3797			  "Keep alive watchdog timeout.\n");
3798		u64_stats_update_begin(&adapter->syncp);
3799		adapter->dev_stats.wd_expired++;
3800		u64_stats_update_end(&adapter->syncp);
3801		adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
3802		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3803	}
3804}
3805
3806static void check_for_admin_com_state(struct ena_adapter *adapter)
3807{
3808	if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
3809		netif_err(adapter, drv, adapter->netdev,
3810			  "ENA admin queue is not in running state!\n");
3811		u64_stats_update_begin(&adapter->syncp);
3812		adapter->dev_stats.admin_q_pause++;
3813		u64_stats_update_end(&adapter->syncp);
3814		adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
3815		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3816	}
3817}
3818
3819static void ena_update_hints(struct ena_adapter *adapter,
3820			     struct ena_admin_ena_hw_hints *hints)
3821{
3822	struct net_device *netdev = adapter->netdev;
3823
3824	if (hints->admin_completion_tx_timeout)
3825		adapter->ena_dev->admin_queue.completion_timeout =
3826			hints->admin_completion_tx_timeout * 1000;
3827
3828	if (hints->mmio_read_timeout)
3829		/* convert to usec */
3830		adapter->ena_dev->mmio_read.reg_read_to =
3831			hints->mmio_read_timeout * 1000;
3832
3833	if (hints->missed_tx_completion_count_threshold_to_reset)
3834		adapter->missing_tx_completion_threshold =
3835			hints->missed_tx_completion_count_threshold_to_reset;
3836
3837	if (hints->missing_tx_completion_timeout) {
3838		if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3839			adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
3840		else
3841			adapter->missing_tx_completion_to =
3842				msecs_to_jiffies(hints->missing_tx_completion_timeout);
3843	}
3844
3845	if (hints->netdev_wd_timeout)
3846		netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
3847
3848	if (hints->driver_watchdog_timeout) {
3849		if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3850			adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3851		else
3852			adapter->keep_alive_timeout =
3853				msecs_to_jiffies(hints->driver_watchdog_timeout);
3854	}
3855}
3856
3857static void ena_update_host_info(struct ena_admin_host_info *host_info,
3858				 struct net_device *netdev)
3859{
3860	host_info->supported_network_features[0] =
3861		netdev->features & GENMASK_ULL(31, 0);
3862	host_info->supported_network_features[1] =
3863		(netdev->features & GENMASK_ULL(63, 32)) >> 32;
3864}
3865
3866static void ena_timer_service(struct timer_list *t)
3867{
3868	struct ena_adapter *adapter = from_timer(adapter, t, timer_service);
3869	u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
3870	struct ena_admin_host_info *host_info =
3871		adapter->ena_dev->host_attr.host_info;
3872
3873	check_for_missing_keep_alive(adapter);
3874
3875	check_for_admin_com_state(adapter);
3876
3877	check_for_missing_completions(adapter);
3878
3879	check_for_empty_rx_ring(adapter);
3880
3881	if (debug_area)
3882		ena_dump_stats_to_buf(adapter, debug_area);
3883
3884	if (host_info)
3885		ena_update_host_info(host_info, adapter->netdev);
3886
3887	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3888		netif_err(adapter, drv, adapter->netdev,
3889			  "Trigger reset is on\n");
3890		ena_dump_stats_to_dmesg(adapter);
3891		queue_work(ena_wq, &adapter->reset_task);
3892		return;
3893	}
3894
3895	/* Reset the timer */
3896	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3897}
3898
3899static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
3900				     struct ena_com_dev *ena_dev,
3901				     struct ena_com_dev_get_features_ctx *get_feat_ctx)
3902{
3903	u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
3904
3905	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3906		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3907			&get_feat_ctx->max_queue_ext.max_queue_ext;
3908		io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
3909				  max_queue_ext->max_rx_cq_num);
3910
3911		io_tx_sq_num = max_queue_ext->max_tx_sq_num;
3912		io_tx_cq_num = max_queue_ext->max_tx_cq_num;
3913	} else {
3914		struct ena_admin_queue_feature_desc *max_queues =
3915			&get_feat_ctx->max_queues;
3916		io_tx_sq_num = max_queues->max_sq_num;
3917		io_tx_cq_num = max_queues->max_cq_num;
3918		io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
3919	}
3920
3921	/* In case of LLQ use the llq fields for the tx SQ/CQ */
3922	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3923		io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
3924
3925	max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
3926	max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
3927	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
3928	max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
3929	/* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
3930	max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
3931
3932	return max_num_io_queues;
3933}
3934
3935static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
3936				 struct net_device *netdev)
3937{
3938	netdev_features_t dev_features = 0;
3939
3940	/* Set offload features */
3941	if (feat->offload.tx &
3942		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
3943		dev_features |= NETIF_F_IP_CSUM;
3944
3945	if (feat->offload.tx &
3946		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
3947		dev_features |= NETIF_F_IPV6_CSUM;
3948
3949	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
3950		dev_features |= NETIF_F_TSO;
3951
3952	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
3953		dev_features |= NETIF_F_TSO6;
3954
3955	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
3956		dev_features |= NETIF_F_TSO_ECN;
3957
3958	if (feat->offload.rx_supported &
3959		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
3960		dev_features |= NETIF_F_RXCSUM;
3961
3962	if (feat->offload.rx_supported &
3963		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
3964		dev_features |= NETIF_F_RXCSUM;
3965
3966	netdev->features =
3967		dev_features |
3968		NETIF_F_SG |
3969		NETIF_F_RXHASH |
3970		NETIF_F_HIGHDMA;
3971
3972	netdev->hw_features |= netdev->features;
3973	netdev->vlan_features |= netdev->features;
3974}
3975
3976static void ena_set_conf_feat_params(struct ena_adapter *adapter,
3977				     struct ena_com_dev_get_features_ctx *feat)
3978{
3979	struct net_device *netdev = adapter->netdev;
3980
3981	/* Copy mac address */
3982	if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
3983		eth_hw_addr_random(netdev);
3984		ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
3985	} else {
3986		ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
3987		ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
3988	}
3989
3990	/* Set offload features */
3991	ena_set_dev_offloads(feat, netdev);
3992
3993	adapter->max_mtu = feat->dev_attr.max_mtu;
3994	netdev->max_mtu = adapter->max_mtu;
3995	netdev->min_mtu = ENA_MIN_MTU;
3996}
3997
3998static int ena_rss_init_default(struct ena_adapter *adapter)
3999{
4000	struct ena_com_dev *ena_dev = adapter->ena_dev;
4001	struct device *dev = &adapter->pdev->dev;
4002	int rc, i;
4003	u32 val;
4004
4005	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
4006	if (unlikely(rc)) {
4007		dev_err(dev, "Cannot init indirect table\n");
4008		goto err_rss_init;
4009	}
4010
4011	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
4012		val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
4013		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
4014						       ENA_IO_RXQ_IDX(val));
4015		if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4016			dev_err(dev, "Cannot fill indirect table\n");
4017			goto err_fill_indir;
4018		}
4019	}
4020
4021	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL,
4022					ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
4023	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4024		dev_err(dev, "Cannot fill hash function\n");
4025		goto err_fill_indir;
4026	}
4027
4028	rc = ena_com_set_default_hash_ctrl(ena_dev);
4029	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4030		dev_err(dev, "Cannot fill hash control\n");
4031		goto err_fill_indir;
4032	}
4033
4034	return 0;
4035
4036err_fill_indir:
4037	ena_com_rss_destroy(ena_dev);
4038err_rss_init:
4039
4040	return rc;
4041}
4042
4043static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
4044{
4045	int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
4046
4047	pci_release_selected_regions(pdev, release_bars);
4048}
4049
4050
4051static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
4052{
4053	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
4054	struct ena_com_dev *ena_dev = ctx->ena_dev;
4055	u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
4056	u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
4057	u32 max_tx_queue_size;
4058	u32 max_rx_queue_size;
4059
4060	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
4061		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
4062			&ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
4063		max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
4064					  max_queue_ext->max_rx_sq_depth);
4065		max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
4066
4067		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4068			max_tx_queue_size = min_t(u32, max_tx_queue_size,
4069						  llq->max_llq_depth);
4070		else
4071			max_tx_queue_size = min_t(u32, max_tx_queue_size,
4072						  max_queue_ext->max_tx_sq_depth);
4073
4074		ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4075					     max_queue_ext->max_per_packet_tx_descs);
4076		ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4077					     max_queue_ext->max_per_packet_rx_descs);
4078	} else {
4079		struct ena_admin_queue_feature_desc *max_queues =
4080			&ctx->get_feat_ctx->max_queues;
4081		max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
4082					  max_queues->max_sq_depth);
4083		max_tx_queue_size = max_queues->max_cq_depth;
4084
4085		if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4086			max_tx_queue_size = min_t(u32, max_tx_queue_size,
4087						  llq->max_llq_depth);
4088		else
4089			max_tx_queue_size = min_t(u32, max_tx_queue_size,
4090						  max_queues->max_sq_depth);
4091
4092		ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4093					     max_queues->max_packet_tx_descs);
4094		ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4095					     max_queues->max_packet_rx_descs);
4096	}
4097
4098	max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
4099	max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
4100
4101	tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
4102				  max_tx_queue_size);
4103	rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
4104				  max_rx_queue_size);
4105
4106	tx_queue_size = rounddown_pow_of_two(tx_queue_size);
4107	rx_queue_size = rounddown_pow_of_two(rx_queue_size);
4108
4109	ctx->max_tx_queue_size = max_tx_queue_size;
4110	ctx->max_rx_queue_size = max_rx_queue_size;
4111	ctx->tx_queue_size = tx_queue_size;
4112	ctx->rx_queue_size = rx_queue_size;
4113
4114	return 0;
4115}
4116
4117/* ena_probe - Device Initialization Routine
4118 * @pdev: PCI device information struct
4119 * @ent: entry in ena_pci_tbl
4120 *
4121 * Returns 0 on success, negative on failure
4122 *
4123 * ena_probe initializes an adapter identified by a pci_dev structure.
4124 * The OS initialization, configuring of the adapter private structure,
4125 * and a hardware reset occur.
4126 */
4127static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
4128{
4129	struct ena_calc_queue_size_ctx calc_queue_ctx = {};
4130	struct ena_com_dev_get_features_ctx get_feat_ctx;
4131	struct ena_com_dev *ena_dev = NULL;
4132	struct ena_adapter *adapter;
4133	struct net_device *netdev;
4134	static int adapters_found;
4135	u32 max_num_io_queues;
4136	bool wd_state;
4137	int bars, rc;
4138
4139	dev_dbg(&pdev->dev, "%s\n", __func__);
4140
4141	rc = pci_enable_device_mem(pdev);
4142	if (rc) {
4143		dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
4144		return rc;
4145	}
4146
4147	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS));
4148	if (rc) {
4149		dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc);
4150		goto err_disable_device;
4151	}
4152
4153	pci_set_master(pdev);
4154
4155	ena_dev = vzalloc(sizeof(*ena_dev));
4156	if (!ena_dev) {
4157		rc = -ENOMEM;
4158		goto err_disable_device;
4159	}
4160
4161	bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
4162	rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
4163	if (rc) {
4164		dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
4165			rc);
4166		goto err_free_ena_dev;
4167	}
4168
4169	ena_dev->reg_bar = devm_ioremap(&pdev->dev,
4170					pci_resource_start(pdev, ENA_REG_BAR),
4171					pci_resource_len(pdev, ENA_REG_BAR));
4172	if (!ena_dev->reg_bar) {
4173		dev_err(&pdev->dev, "Failed to remap regs bar\n");
4174		rc = -EFAULT;
4175		goto err_free_region;
4176	}
4177
4178	ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
4179
4180	ena_dev->dmadev = &pdev->dev;
4181
4182	rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
4183	if (rc) {
4184		dev_err(&pdev->dev, "ENA device init failed\n");
4185		if (rc == -ETIME)
4186			rc = -EPROBE_DEFER;
4187		goto err_free_region;
4188	}
4189
4190	rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
4191	if (rc) {
4192		dev_err(&pdev->dev, "ENA llq bar mapping failed\n");
4193		goto err_free_ena_dev;
4194	}
4195
4196	calc_queue_ctx.ena_dev = ena_dev;
4197	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
4198	calc_queue_ctx.pdev = pdev;
4199
4200	/* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
4201	 * Updated during device initialization with the real granularity
4202	 */
4203	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
4204	ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
4205	ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
4206	max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
4207	rc = ena_calc_io_queue_size(&calc_queue_ctx);
4208	if (rc || !max_num_io_queues) {
4209		rc = -EFAULT;
4210		goto err_device_destroy;
4211	}
4212
4213	/* dev zeroed in init_etherdev */
4214	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), max_num_io_queues);
4215	if (!netdev) {
4216		dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
4217		rc = -ENOMEM;
4218		goto err_device_destroy;
4219	}
4220
4221	SET_NETDEV_DEV(netdev, &pdev->dev);
4222
4223	adapter = netdev_priv(netdev);
4224	pci_set_drvdata(pdev, adapter);
4225
4226	adapter->ena_dev = ena_dev;
4227	adapter->netdev = netdev;
4228	adapter->pdev = pdev;
4229
4230	ena_set_conf_feat_params(adapter, &get_feat_ctx);
4231
4232	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
4233	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
4234
4235	adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
4236	adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
4237	adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
4238	adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
4239	adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
4240	adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
4241
4242	adapter->num_io_queues = max_num_io_queues;
4243	adapter->max_num_io_queues = max_num_io_queues;
4244	adapter->last_monitored_tx_qid = 0;
4245
4246	adapter->xdp_first_ring = 0;
4247	adapter->xdp_num_queues = 0;
4248
4249	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
4250	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4251		adapter->disable_meta_caching =
4252			!!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
4253			   BIT(ENA_ADMIN_DISABLE_META_CACHING));
4254
4255	adapter->wd_state = wd_state;
4256
4257	snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
4258
4259	rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
4260	if (rc) {
4261		dev_err(&pdev->dev,
4262			"Failed to query interrupt moderation feature\n");
4263		goto err_netdev_destroy;
4264	}
4265	ena_init_io_rings(adapter,
4266			  0,
4267			  adapter->xdp_num_queues +
4268			  adapter->num_io_queues);
4269
4270	netdev->netdev_ops = &ena_netdev_ops;
4271	netdev->watchdog_timeo = TX_TIMEOUT;
4272	ena_set_ethtool_ops(netdev);
4273
4274	netdev->priv_flags |= IFF_UNICAST_FLT;
4275
4276	u64_stats_init(&adapter->syncp);
4277
4278	rc = ena_enable_msix_and_set_admin_interrupts(adapter);
4279	if (rc) {
4280		dev_err(&pdev->dev,
4281			"Failed to enable and set the admin interrupts\n");
4282		goto err_worker_destroy;
4283	}
4284	rc = ena_rss_init_default(adapter);
4285	if (rc && (rc != -EOPNOTSUPP)) {
4286		dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
4287		goto err_free_msix;
4288	}
4289
4290	ena_config_debug_area(adapter);
4291
4292	if (!ena_update_hw_stats(adapter))
4293		adapter->eni_stats_supported = true;
4294	else
4295		adapter->eni_stats_supported = false;
4296
4297	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
4298
4299	netif_carrier_off(netdev);
4300
4301	rc = register_netdev(netdev);
4302	if (rc) {
4303		dev_err(&pdev->dev, "Cannot register net device\n");
4304		goto err_rss;
4305	}
4306
4307	INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
4308
4309	adapter->last_keep_alive_jiffies = jiffies;
4310	adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
4311	adapter->missing_tx_completion_to = TX_TIMEOUT;
4312	adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
4313
4314	ena_update_hints(adapter, &get_feat_ctx.hw_hints);
4315
4316	timer_setup(&adapter->timer_service, ena_timer_service, 0);
4317	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
4318
4319	dev_info(&pdev->dev,
4320		 "%s found at mem %lx, mac addr %pM\n",
4321		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
4322		 netdev->dev_addr);
4323
4324	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
4325
4326	adapters_found++;
4327
4328	return 0;
4329
4330err_rss:
4331	ena_com_delete_debug_area(ena_dev);
4332	ena_com_rss_destroy(ena_dev);
4333err_free_msix:
4334	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
4335	/* stop submitting admin commands on a device that was reset */
4336	ena_com_set_admin_running_state(ena_dev, false);
4337	ena_free_mgmnt_irq(adapter);
4338	ena_disable_msix(adapter);
4339err_worker_destroy:
4340	del_timer(&adapter->timer_service);
4341err_netdev_destroy:
4342	free_netdev(netdev);
4343err_device_destroy:
4344	ena_com_delete_host_info(ena_dev);
4345	ena_com_admin_destroy(ena_dev);
4346err_free_region:
4347	ena_release_bars(ena_dev, pdev);
4348err_free_ena_dev:
4349	vfree(ena_dev);
4350err_disable_device:
4351	pci_disable_device(pdev);
4352	return rc;
4353}
4354
4355/*****************************************************************************/
4356
4357/* __ena_shutoff - Helper used in both PCI remove/shutdown routines
4358 * @pdev: PCI device information struct
4359 * @shutdown: Is it a shutdown operation? If false, means it is a removal
4360 *
4361 * __ena_shutoff is a helper routine that does the real work on shutdown and
4362 * removal paths; the difference between those paths is with regards to whether
4363 * dettach or unregister the netdevice.
4364 */
4365static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
4366{
4367	struct ena_adapter *adapter = pci_get_drvdata(pdev);
4368	struct ena_com_dev *ena_dev;
4369	struct net_device *netdev;
4370
4371	ena_dev = adapter->ena_dev;
4372	netdev = adapter->netdev;
4373
4374#ifdef CONFIG_RFS_ACCEL
4375	if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
4376		free_irq_cpu_rmap(netdev->rx_cpu_rmap);
4377		netdev->rx_cpu_rmap = NULL;
4378	}
4379#endif /* CONFIG_RFS_ACCEL */
4380
4381	/* Make sure timer and reset routine won't be called after
4382	 * freeing device resources.
4383	 */
4384	del_timer_sync(&adapter->timer_service);
4385	cancel_work_sync(&adapter->reset_task);
4386
4387	rtnl_lock(); /* lock released inside the below if-else block */
4388	adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
4389	ena_destroy_device(adapter, true);
4390	if (shutdown) {
4391		netif_device_detach(netdev);
4392		dev_close(netdev);
4393		rtnl_unlock();
4394	} else {
4395		rtnl_unlock();
4396		unregister_netdev(netdev);
4397		free_netdev(netdev);
4398	}
4399
4400	ena_com_rss_destroy(ena_dev);
4401
4402	ena_com_delete_debug_area(ena_dev);
4403
4404	ena_com_delete_host_info(ena_dev);
4405
4406	ena_release_bars(ena_dev, pdev);
4407
4408	pci_disable_device(pdev);
4409
4410	vfree(ena_dev);
4411}
4412
4413/* ena_remove - Device Removal Routine
4414 * @pdev: PCI device information struct
4415 *
4416 * ena_remove is called by the PCI subsystem to alert the driver
4417 * that it should release a PCI device.
4418 */
4419
4420static void ena_remove(struct pci_dev *pdev)
4421{
4422	__ena_shutoff(pdev, false);
4423}
4424
4425/* ena_shutdown - Device Shutdown Routine
4426 * @pdev: PCI device information struct
4427 *
4428 * ena_shutdown is called by the PCI subsystem to alert the driver that
4429 * a shutdown/reboot (or kexec) is happening and device must be disabled.
4430 */
4431
4432static void ena_shutdown(struct pci_dev *pdev)
4433{
4434	__ena_shutoff(pdev, true);
4435}
4436
4437/* ena_suspend - PM suspend callback
4438 * @dev_d: Device information struct
4439 */
4440static int __maybe_unused ena_suspend(struct device *dev_d)
4441{
4442	struct pci_dev *pdev = to_pci_dev(dev_d);
4443	struct ena_adapter *adapter = pci_get_drvdata(pdev);
4444
4445	u64_stats_update_begin(&adapter->syncp);
4446	adapter->dev_stats.suspend++;
4447	u64_stats_update_end(&adapter->syncp);
4448
4449	rtnl_lock();
4450	if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
4451		dev_err(&pdev->dev,
4452			"Ignoring device reset request as the device is being suspended\n");
4453		clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
4454	}
4455	ena_destroy_device(adapter, true);
4456	rtnl_unlock();
4457	return 0;
4458}
4459
4460/* ena_resume - PM resume callback
4461 * @dev_d: Device information struct
4462 */
4463static int __maybe_unused ena_resume(struct device *dev_d)
4464{
4465	struct ena_adapter *adapter = dev_get_drvdata(dev_d);
4466	int rc;
4467
4468	u64_stats_update_begin(&adapter->syncp);
4469	adapter->dev_stats.resume++;
4470	u64_stats_update_end(&adapter->syncp);
4471
4472	rtnl_lock();
4473	rc = ena_restore_device(adapter);
4474	rtnl_unlock();
4475	return rc;
4476}
4477
4478static SIMPLE_DEV_PM_OPS(ena_pm_ops, ena_suspend, ena_resume);
4479
4480static struct pci_driver ena_pci_driver = {
4481	.name		= DRV_MODULE_NAME,
4482	.id_table	= ena_pci_tbl,
4483	.probe		= ena_probe,
4484	.remove		= ena_remove,
4485	.shutdown	= ena_shutdown,
4486	.driver.pm	= &ena_pm_ops,
4487	.sriov_configure = pci_sriov_configure_simple,
4488};
4489
4490static int __init ena_init(void)
4491{
4492	int ret;
4493
4494	ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
4495	if (!ena_wq) {
4496		pr_err("Failed to create workqueue\n");
4497		return -ENOMEM;
4498	}
4499
4500	ret = pci_register_driver(&ena_pci_driver);
4501	if (ret)
4502		destroy_workqueue(ena_wq);
4503
4504	return ret;
4505}
4506
4507static void __exit ena_cleanup(void)
4508{
4509	pci_unregister_driver(&ena_pci_driver);
4510
4511	if (ena_wq) {
4512		destroy_workqueue(ena_wq);
4513		ena_wq = NULL;
4514	}
4515}
4516
4517/******************************************************************************
4518 ******************************** AENQ Handlers *******************************
4519 *****************************************************************************/
4520/* ena_update_on_link_change:
4521 * Notify the network interface about the change in link status
4522 */
4523static void ena_update_on_link_change(void *adapter_data,
4524				      struct ena_admin_aenq_entry *aenq_e)
4525{
4526	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4527	struct ena_admin_aenq_link_change_desc *aenq_desc =
4528		(struct ena_admin_aenq_link_change_desc *)aenq_e;
4529	int status = aenq_desc->flags &
4530		ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
4531
4532	if (status) {
4533		netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
4534		set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4535		if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
4536			netif_carrier_on(adapter->netdev);
4537	} else {
4538		clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4539		netif_carrier_off(adapter->netdev);
4540	}
4541}
4542
4543static void ena_keep_alive_wd(void *adapter_data,
4544			      struct ena_admin_aenq_entry *aenq_e)
4545{
4546	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4547	struct ena_admin_aenq_keep_alive_desc *desc;
4548	u64 rx_drops;
4549	u64 tx_drops;
4550
4551	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
4552	adapter->last_keep_alive_jiffies = jiffies;
4553
4554	rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
4555	tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
4556
4557	u64_stats_update_begin(&adapter->syncp);
4558	/* These stats are accumulated by the device, so the counters indicate
4559	 * all drops since last reset.
4560	 */
4561	adapter->dev_stats.rx_drops = rx_drops;
4562	adapter->dev_stats.tx_drops = tx_drops;
4563	u64_stats_update_end(&adapter->syncp);
4564}
4565
4566static void ena_notification(void *adapter_data,
4567			     struct ena_admin_aenq_entry *aenq_e)
4568{
4569	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4570	struct ena_admin_ena_hw_hints *hints;
4571
4572	WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
4573	     "Invalid group(%x) expected %x\n",
4574	     aenq_e->aenq_common_desc.group,
4575	     ENA_ADMIN_NOTIFICATION);
4576
4577	switch (aenq_e->aenq_common_desc.syndrome) {
4578	case ENA_ADMIN_UPDATE_HINTS:
4579		hints = (struct ena_admin_ena_hw_hints *)
4580			(&aenq_e->inline_data_w4);
4581		ena_update_hints(adapter, hints);
4582		break;
4583	default:
4584		netif_err(adapter, drv, adapter->netdev,
4585			  "Invalid aenq notification link state %d\n",
4586			  aenq_e->aenq_common_desc.syndrome);
4587	}
4588}
4589
4590/* This handler will called for unknown event group or unimplemented handlers*/
4591static void unimplemented_aenq_handler(void *data,
4592				       struct ena_admin_aenq_entry *aenq_e)
4593{
4594	struct ena_adapter *adapter = (struct ena_adapter *)data;
4595
4596	netif_err(adapter, drv, adapter->netdev,
4597		  "Unknown event was received or event with unimplemented handler\n");
4598}
4599
4600static struct ena_aenq_handlers aenq_handlers = {
4601	.handlers = {
4602		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4603		[ENA_ADMIN_NOTIFICATION] = ena_notification,
4604		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4605	},
4606	.unimplemented_handler = unimplemented_aenq_handler
4607};
4608
4609module_init(ena_init);
4610module_exit(ena_cleanup);
4611