1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2013 - 2021 Intel Corporation. */
3
4#include <linux/etherdevice.h>
5#include <linux/of_net.h>
6#include <linux/pci.h>
7#include <linux/bpf.h>
8#include <generated/utsrelease.h>
9#include <linux/crash_dump.h>
10
11/* Local includes */
12#include "i40e.h"
13#include "i40e_diag.h"
14#include "i40e_xsk.h"
15#include <net/udp_tunnel.h>
16#include <net/xdp_sock_drv.h>
17/* All i40e tracepoints are defined by the include below, which
18 * must be included exactly once across the whole kernel with
19 * CREATE_TRACE_POINTS defined
20 */
21#define CREATE_TRACE_POINTS
22#include "i40e_trace.h"
23
24const char i40e_driver_name[] = "i40e";
25static const char i40e_driver_string[] =
26			"Intel(R) Ethernet Connection XL710 Network Driver";
27
28static const char i40e_copyright[] = "Copyright (c) 2013 - 2019 Intel Corporation.";
29
30/* a bit of forward declarations */
31static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi);
32static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired);
33static int i40e_add_vsi(struct i40e_vsi *vsi);
34static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi);
35static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired);
36static int i40e_setup_misc_vector(struct i40e_pf *pf);
37static void i40e_determine_queue_usage(struct i40e_pf *pf);
38static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
39static void i40e_prep_for_reset(struct i40e_pf *pf);
40static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
41				   bool lock_acquired);
42static int i40e_reset(struct i40e_pf *pf);
43static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
44static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf);
45static int i40e_restore_interrupt_scheme(struct i40e_pf *pf);
46static bool i40e_check_recovery_mode(struct i40e_pf *pf);
47static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw);
48static void i40e_fdir_sb_setup(struct i40e_pf *pf);
49static int i40e_veb_get_bw_info(struct i40e_veb *veb);
50static int i40e_get_capabilities(struct i40e_pf *pf,
51				 enum i40e_admin_queue_opc list_type);
52static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf);
53
54/* i40e_pci_tbl - PCI Device ID Table
55 *
56 * Last entry must be all 0s
57 *
58 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
59 *   Class, Class Mask, private data (not used) }
60 */
61static const struct pci_device_id i40e_pci_tbl[] = {
62	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_XL710), 0},
63	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QEMU), 0},
64	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_B), 0},
65	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_C), 0},
66	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_A), 0},
67	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_B), 0},
68	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0},
69	{PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_BC), 0},
70	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0},
71	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0},
72	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_BC), 0},
73	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_SFP), 0},
74	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_B), 0},
75	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0},
76	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0},
77	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0},
78	{PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0},
79	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0},
80	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0},
81	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722_A), 0},
82	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
83	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0},
84	{PCI_VDEVICE(INTEL, I40E_DEV_ID_X710_N3000), 0},
85	{PCI_VDEVICE(INTEL, I40E_DEV_ID_XXV710_N3000), 0},
86	{PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_B), 0},
87	{PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_SFP28), 0},
88	/* required last entry */
89	{0, }
90};
91MODULE_DEVICE_TABLE(pci, i40e_pci_tbl);
92
93#define I40E_MAX_VF_COUNT 128
94static int debug = -1;
95module_param(debug, uint, 0);
96MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX)");
97
98MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
99MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver");
100MODULE_LICENSE("GPL v2");
101
102static struct workqueue_struct *i40e_wq;
103
104static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f,
105				  struct net_device *netdev, int delta)
106{
107	struct netdev_hw_addr_list *ha_list;
108	struct netdev_hw_addr *ha;
109
110	if (!f || !netdev)
111		return;
112
113	if (is_unicast_ether_addr(f->macaddr) || is_link_local_ether_addr(f->macaddr))
114		ha_list = &netdev->uc;
115	else
116		ha_list = &netdev->mc;
117
118	netdev_hw_addr_list_for_each(ha, ha_list) {
119		if (ether_addr_equal(ha->addr, f->macaddr)) {
120			ha->refcount += delta;
121			if (ha->refcount <= 0)
122				ha->refcount = 1;
123			break;
124		}
125	}
126}
127
128/**
129 * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code
130 * @hw:   pointer to the HW structure
131 * @mem:  ptr to mem struct to fill out
132 * @size: size of memory requested
133 * @alignment: what to align the allocation to
134 **/
135int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
136			    u64 size, u32 alignment)
137{
138	struct i40e_pf *pf = (struct i40e_pf *)hw->back;
139
140	mem->size = ALIGN(size, alignment);
141	mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa,
142				     GFP_KERNEL);
143	if (!mem->va)
144		return -ENOMEM;
145
146	return 0;
147}
148
149/**
150 * i40e_free_dma_mem_d - OS specific memory free for shared code
151 * @hw:   pointer to the HW structure
152 * @mem:  ptr to mem struct to free
153 **/
154int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
155{
156	struct i40e_pf *pf = (struct i40e_pf *)hw->back;
157
158	dma_free_coherent(&pf->pdev->dev, mem->size, mem->va, mem->pa);
159	mem->va = NULL;
160	mem->pa = 0;
161	mem->size = 0;
162
163	return 0;
164}
165
166/**
167 * i40e_allocate_virt_mem_d - OS specific memory alloc for shared code
168 * @hw:   pointer to the HW structure
169 * @mem:  ptr to mem struct to fill out
170 * @size: size of memory requested
171 **/
172int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
173			     u32 size)
174{
175	mem->size = size;
176	mem->va = kzalloc(size, GFP_KERNEL);
177
178	if (!mem->va)
179		return -ENOMEM;
180
181	return 0;
182}
183
184/**
185 * i40e_free_virt_mem_d - OS specific memory free for shared code
186 * @hw:   pointer to the HW structure
187 * @mem:  ptr to mem struct to free
188 **/
189int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem)
190{
191	/* it's ok to kfree a NULL pointer */
192	kfree(mem->va);
193	mem->va = NULL;
194	mem->size = 0;
195
196	return 0;
197}
198
199/**
200 * i40e_get_lump - find a lump of free generic resource
201 * @pf: board private structure
202 * @pile: the pile of resource to search
203 * @needed: the number of items needed
204 * @id: an owner id to stick on the items assigned
205 *
206 * Returns the base item index of the lump, or negative for error
207 **/
208static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
209			 u16 needed, u16 id)
210{
211	int ret = -ENOMEM;
212	int i, j;
213
214	if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
215		dev_info(&pf->pdev->dev,
216			 "param err: pile=%s needed=%d id=0x%04x\n",
217			 pile ? "<valid>" : "<null>", needed, id);
218		return -EINVAL;
219	}
220
221	/* Allocate last queue in the pile for FDIR VSI queue
222	 * so it doesn't fragment the qp_pile
223	 */
224	if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) {
225		if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) {
226			dev_err(&pf->pdev->dev,
227				"Cannot allocate queue %d for I40E_VSI_FDIR\n",
228				pile->num_entries - 1);
229			return -ENOMEM;
230		}
231		pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT;
232		return pile->num_entries - 1;
233	}
234
235	i = 0;
236	while (i < pile->num_entries) {
237		/* skip already allocated entries */
238		if (pile->list[i] & I40E_PILE_VALID_BIT) {
239			i++;
240			continue;
241		}
242
243		/* do we have enough in this lump? */
244		for (j = 0; (j < needed) && ((i+j) < pile->num_entries); j++) {
245			if (pile->list[i+j] & I40E_PILE_VALID_BIT)
246				break;
247		}
248
249		if (j == needed) {
250			/* there was enough, so assign it to the requestor */
251			for (j = 0; j < needed; j++)
252				pile->list[i+j] = id | I40E_PILE_VALID_BIT;
253			ret = i;
254			break;
255		}
256
257		/* not enough, so skip over it and continue looking */
258		i += j;
259	}
260
261	return ret;
262}
263
264/**
265 * i40e_put_lump - return a lump of generic resource
266 * @pile: the pile of resource to search
267 * @index: the base item index
268 * @id: the owner id of the items assigned
269 *
270 * Returns the count of items in the lump
271 **/
272static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
273{
274	int valid_id = (id | I40E_PILE_VALID_BIT);
275	int count = 0;
276	u16 i;
277
278	if (!pile || index >= pile->num_entries)
279		return -EINVAL;
280
281	for (i = index;
282	     i < pile->num_entries && pile->list[i] == valid_id;
283	     i++) {
284		pile->list[i] = 0;
285		count++;
286	}
287
288
289	return count;
290}
291
292/**
293 * i40e_find_vsi_from_id - searches for the vsi with the given id
294 * @pf: the pf structure to search for the vsi
295 * @id: id of the vsi it is searching for
296 **/
297struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
298{
299	int i;
300
301	for (i = 0; i < pf->num_alloc_vsi; i++)
302		if (pf->vsi[i] && (pf->vsi[i]->id == id))
303			return pf->vsi[i];
304
305	return NULL;
306}
307
308/**
309 * i40e_service_event_schedule - Schedule the service task to wake up
310 * @pf: board private structure
311 *
312 * If not already scheduled, this puts the task into the work queue
313 **/
314void i40e_service_event_schedule(struct i40e_pf *pf)
315{
316	if ((!test_bit(__I40E_DOWN, pf->state) &&
317	     !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) ||
318	      test_bit(__I40E_RECOVERY_MODE, pf->state))
319		queue_work(i40e_wq, &pf->service_task);
320}
321
322/**
323 * i40e_tx_timeout - Respond to a Tx Hang
324 * @netdev: network interface device structure
325 * @txqueue: queue number timing out
326 *
327 * If any port has noticed a Tx timeout, it is likely that the whole
328 * device is munged, not just the one netdev port, so go for the full
329 * reset.
330 **/
331static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
332{
333	struct i40e_netdev_priv *np = netdev_priv(netdev);
334	struct i40e_vsi *vsi = np->vsi;
335	struct i40e_pf *pf = vsi->back;
336	struct i40e_ring *tx_ring = NULL;
337	unsigned int i;
338	u32 head, val;
339
340	pf->tx_timeout_count++;
341
342	/* with txqueue index, find the tx_ring struct */
343	for (i = 0; i < vsi->num_queue_pairs; i++) {
344		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
345			if (txqueue ==
346			    vsi->tx_rings[i]->queue_index) {
347				tx_ring = vsi->tx_rings[i];
348				break;
349			}
350		}
351	}
352
353	if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ*20)))
354		pf->tx_timeout_recovery_level = 1;  /* reset after some time */
355	else if (time_before(jiffies,
356		      (pf->tx_timeout_last_recovery + netdev->watchdog_timeo)))
357		return;   /* don't do any new action before the next timeout */
358
359	/* don't kick off another recovery if one is already pending */
360	if (test_and_set_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state))
361		return;
362
363	if (tx_ring) {
364		head = i40e_get_head(tx_ring);
365		/* Read interrupt register */
366		if (pf->flags & I40E_FLAG_MSIX_ENABLED)
367			val = rd32(&pf->hw,
368			     I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
369						tx_ring->vsi->base_vector - 1));
370		else
371			val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
372
373		netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n",
374			    vsi->seid, txqueue, tx_ring->next_to_clean,
375			    head, tx_ring->next_to_use,
376			    readl(tx_ring->tail), val);
377	}
378
379	pf->tx_timeout_last_recovery = jiffies;
380	netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n",
381		    pf->tx_timeout_recovery_level, txqueue);
382
383	switch (pf->tx_timeout_recovery_level) {
384	case 1:
385		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
386		break;
387	case 2:
388		set_bit(__I40E_CORE_RESET_REQUESTED, pf->state);
389		break;
390	case 3:
391		set_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state);
392		break;
393	default:
394		netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in non-recoverable state.\n");
395		set_bit(__I40E_DOWN_REQUESTED, pf->state);
396		set_bit(__I40E_VSI_DOWN_REQUESTED, vsi->state);
397		break;
398	}
399
400	i40e_service_event_schedule(pf);
401	pf->tx_timeout_recovery_level++;
402}
403
404/**
405 * i40e_get_vsi_stats_struct - Get System Network Statistics
406 * @vsi: the VSI we care about
407 *
408 * Returns the address of the device statistics structure.
409 * The statistics are actually updated from the service task.
410 **/
411struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi)
412{
413	return &vsi->net_stats;
414}
415
416/**
417 * i40e_get_netdev_stats_struct_tx - populate stats from a Tx ring
418 * @ring: Tx ring to get statistics from
419 * @stats: statistics entry to be updated
420 **/
421static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring,
422					    struct rtnl_link_stats64 *stats)
423{
424	u64 bytes, packets;
425	unsigned int start;
426
427	do {
428		start = u64_stats_fetch_begin(&ring->syncp);
429		packets = ring->stats.packets;
430		bytes   = ring->stats.bytes;
431	} while (u64_stats_fetch_retry(&ring->syncp, start));
432
433	stats->tx_packets += packets;
434	stats->tx_bytes   += bytes;
435}
436
437/**
438 * i40e_get_netdev_stats_struct - Get statistics for netdev interface
439 * @netdev: network interface device structure
440 * @stats: data structure to store statistics
441 *
442 * Returns the address of the device statistics structure.
443 * The statistics are actually updated from the service task.
444 **/
445static void i40e_get_netdev_stats_struct(struct net_device *netdev,
446				  struct rtnl_link_stats64 *stats)
447{
448	struct i40e_netdev_priv *np = netdev_priv(netdev);
449	struct i40e_vsi *vsi = np->vsi;
450	struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi);
451	struct i40e_ring *ring;
452	int i;
453
454	if (test_bit(__I40E_VSI_DOWN, vsi->state))
455		return;
456
457	if (!vsi->tx_rings)
458		return;
459
460	rcu_read_lock();
461	for (i = 0; i < vsi->num_queue_pairs; i++) {
462		u64 bytes, packets;
463		unsigned int start;
464
465		ring = READ_ONCE(vsi->tx_rings[i]);
466		if (!ring)
467			continue;
468		i40e_get_netdev_stats_struct_tx(ring, stats);
469
470		if (i40e_enabled_xdp_vsi(vsi)) {
471			ring = READ_ONCE(vsi->xdp_rings[i]);
472			if (!ring)
473				continue;
474			i40e_get_netdev_stats_struct_tx(ring, stats);
475		}
476
477		ring = READ_ONCE(vsi->rx_rings[i]);
478		if (!ring)
479			continue;
480		do {
481			start   = u64_stats_fetch_begin(&ring->syncp);
482			packets = ring->stats.packets;
483			bytes   = ring->stats.bytes;
484		} while (u64_stats_fetch_retry(&ring->syncp, start));
485
486		stats->rx_packets += packets;
487		stats->rx_bytes   += bytes;
488
489	}
490	rcu_read_unlock();
491
492	/* following stats updated by i40e_watchdog_subtask() */
493	stats->multicast	= vsi_stats->multicast;
494	stats->tx_errors	= vsi_stats->tx_errors;
495	stats->tx_dropped	= vsi_stats->tx_dropped;
496	stats->rx_errors	= vsi_stats->rx_errors;
497	stats->rx_dropped	= vsi_stats->rx_dropped;
498	stats->rx_crc_errors	= vsi_stats->rx_crc_errors;
499	stats->rx_length_errors	= vsi_stats->rx_length_errors;
500}
501
502/**
503 * i40e_vsi_reset_stats - Resets all stats of the given vsi
504 * @vsi: the VSI to have its stats reset
505 **/
506void i40e_vsi_reset_stats(struct i40e_vsi *vsi)
507{
508	struct rtnl_link_stats64 *ns;
509	int i;
510
511	if (!vsi)
512		return;
513
514	ns = i40e_get_vsi_stats_struct(vsi);
515	memset(ns, 0, sizeof(*ns));
516	memset(&vsi->net_stats_offsets, 0, sizeof(vsi->net_stats_offsets));
517	memset(&vsi->eth_stats, 0, sizeof(vsi->eth_stats));
518	memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets));
519	if (vsi->rx_rings && vsi->rx_rings[0]) {
520		for (i = 0; i < vsi->num_queue_pairs; i++) {
521			memset(&vsi->rx_rings[i]->stats, 0,
522			       sizeof(vsi->rx_rings[i]->stats));
523			memset(&vsi->rx_rings[i]->rx_stats, 0,
524			       sizeof(vsi->rx_rings[i]->rx_stats));
525			memset(&vsi->tx_rings[i]->stats, 0,
526			       sizeof(vsi->tx_rings[i]->stats));
527			memset(&vsi->tx_rings[i]->tx_stats, 0,
528			       sizeof(vsi->tx_rings[i]->tx_stats));
529		}
530	}
531	vsi->stat_offsets_loaded = false;
532}
533
534/**
535 * i40e_pf_reset_stats - Reset all of the stats for the given PF
536 * @pf: the PF to be reset
537 **/
538void i40e_pf_reset_stats(struct i40e_pf *pf)
539{
540	int i;
541
542	memset(&pf->stats, 0, sizeof(pf->stats));
543	memset(&pf->stats_offsets, 0, sizeof(pf->stats_offsets));
544	pf->stat_offsets_loaded = false;
545
546	for (i = 0; i < I40E_MAX_VEB; i++) {
547		if (pf->veb[i]) {
548			memset(&pf->veb[i]->stats, 0,
549			       sizeof(pf->veb[i]->stats));
550			memset(&pf->veb[i]->stats_offsets, 0,
551			       sizeof(pf->veb[i]->stats_offsets));
552			memset(&pf->veb[i]->tc_stats, 0,
553			       sizeof(pf->veb[i]->tc_stats));
554			memset(&pf->veb[i]->tc_stats_offsets, 0,
555			       sizeof(pf->veb[i]->tc_stats_offsets));
556			pf->veb[i]->stat_offsets_loaded = false;
557		}
558	}
559	pf->hw_csum_rx_error = 0;
560}
561
562/**
563 * i40e_compute_pci_to_hw_id - compute index form PCI function.
564 * @vsi: ptr to the VSI to read from.
565 * @hw: ptr to the hardware info.
566 **/
567static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw)
568{
569	int pf_count = i40e_get_pf_count(hw);
570
571	if (vsi->type == I40E_VSI_SRIOV)
572		return (hw->port * BIT(7)) / pf_count + vsi->vf_id;
573
574	return hw->port + BIT(7);
575}
576
577/**
578 * i40e_stat_update64 - read and update a 64 bit stat from the chip.
579 * @hw: ptr to the hardware info.
580 * @hireg: the high 32 bit reg to read.
581 * @loreg: the low 32 bit reg to read.
582 * @offset_loaded: has the initial offset been loaded yet.
583 * @offset: ptr to current offset value.
584 * @stat: ptr to the stat.
585 *
586 * Since the device stats are not reset at PFReset, they will not
587 * be zeroed when the driver starts.  We'll save the first values read
588 * and use them as offsets to be subtracted from the raw values in order
589 * to report stats that count from zero.
590 **/
591static void i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg,
592			       bool offset_loaded, u64 *offset, u64 *stat)
593{
594	u64 new_data;
595
596	new_data = rd64(hw, loreg);
597
598	if (!offset_loaded || new_data < *offset)
599		*offset = new_data;
600	*stat = new_data - *offset;
601}
602
603/**
604 * i40e_stat_update48 - read and update a 48 bit stat from the chip
605 * @hw: ptr to the hardware info
606 * @hireg: the high 32 bit reg to read
607 * @loreg: the low 32 bit reg to read
608 * @offset_loaded: has the initial offset been loaded yet
609 * @offset: ptr to current offset value
610 * @stat: ptr to the stat
611 *
612 * Since the device stats are not reset at PFReset, they likely will not
613 * be zeroed when the driver starts.  We'll save the first values read
614 * and use them as offsets to be subtracted from the raw values in order
615 * to report stats that count from zero.  In the process, we also manage
616 * the potential roll-over.
617 **/
618static void i40e_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg,
619			       bool offset_loaded, u64 *offset, u64 *stat)
620{
621	u64 new_data;
622
623	if (hw->device_id == I40E_DEV_ID_QEMU) {
624		new_data = rd32(hw, loreg);
625		new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32;
626	} else {
627		new_data = rd64(hw, loreg);
628	}
629	if (!offset_loaded)
630		*offset = new_data;
631	if (likely(new_data >= *offset))
632		*stat = new_data - *offset;
633	else
634		*stat = (new_data + BIT_ULL(48)) - *offset;
635	*stat &= 0xFFFFFFFFFFFFULL;
636}
637
638/**
639 * i40e_stat_update32 - read and update a 32 bit stat from the chip
640 * @hw: ptr to the hardware info
641 * @reg: the hw reg to read
642 * @offset_loaded: has the initial offset been loaded yet
643 * @offset: ptr to current offset value
644 * @stat: ptr to the stat
645 **/
646static void i40e_stat_update32(struct i40e_hw *hw, u32 reg,
647			       bool offset_loaded, u64 *offset, u64 *stat)
648{
649	u32 new_data;
650
651	new_data = rd32(hw, reg);
652	if (!offset_loaded)
653		*offset = new_data;
654	if (likely(new_data >= *offset))
655		*stat = (u32)(new_data - *offset);
656	else
657		*stat = (u32)((new_data + BIT_ULL(32)) - *offset);
658}
659
660/**
661 * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat
662 * @hw: ptr to the hardware info
663 * @reg: the hw reg to read and clear
664 * @stat: ptr to the stat
665 **/
666static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat)
667{
668	u32 new_data = rd32(hw, reg);
669
670	wr32(hw, reg, 1); /* must write a nonzero value to clear register */
671	*stat += new_data;
672}
673
674/**
675 * i40e_stats_update_rx_discards - update rx_discards.
676 * @vsi: ptr to the VSI to be updated.
677 * @hw: ptr to the hardware info.
678 * @stat_idx: VSI's stat_counter_idx.
679 * @offset_loaded: ptr to the VSI's stat_offsets_loaded.
680 * @stat_offset: ptr to stat_offset to store first read of specific register.
681 * @stat: ptr to VSI's stat to be updated.
682 **/
683static void
684i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw,
685			      int stat_idx, bool offset_loaded,
686			      struct i40e_eth_stats *stat_offset,
687			      struct i40e_eth_stats *stat)
688{
689	u64 rx_rdpc, rx_rxerr;
690
691	i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded,
692			   &stat_offset->rx_discards, &rx_rdpc);
693	i40e_stat_update64(hw,
694			   I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)),
695			   I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)),
696			   offset_loaded, &stat_offset->rx_discards_other,
697			   &rx_rxerr);
698
699	stat->rx_discards = rx_rdpc + rx_rxerr;
700}
701
702/**
703 * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
704 * @vsi: the VSI to be updated
705 **/
706void i40e_update_eth_stats(struct i40e_vsi *vsi)
707{
708	int stat_idx = le16_to_cpu(vsi->info.stat_counter_idx);
709	struct i40e_pf *pf = vsi->back;
710	struct i40e_hw *hw = &pf->hw;
711	struct i40e_eth_stats *oes;
712	struct i40e_eth_stats *es;     /* device's eth stats */
713
714	es = &vsi->eth_stats;
715	oes = &vsi->eth_stats_offsets;
716
717	/* Gather up the stats that the hw collects */
718	i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx),
719			   vsi->stat_offsets_loaded,
720			   &oes->tx_errors, &es->tx_errors);
721	i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx),
722			   vsi->stat_offsets_loaded,
723			   &oes->rx_discards, &es->rx_discards);
724	i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx),
725			   vsi->stat_offsets_loaded,
726			   &oes->rx_unknown_protocol, &es->rx_unknown_protocol);
727
728	i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx),
729			   I40E_GLV_GORCL(stat_idx),
730			   vsi->stat_offsets_loaded,
731			   &oes->rx_bytes, &es->rx_bytes);
732	i40e_stat_update48(hw, I40E_GLV_UPRCH(stat_idx),
733			   I40E_GLV_UPRCL(stat_idx),
734			   vsi->stat_offsets_loaded,
735			   &oes->rx_unicast, &es->rx_unicast);
736	i40e_stat_update48(hw, I40E_GLV_MPRCH(stat_idx),
737			   I40E_GLV_MPRCL(stat_idx),
738			   vsi->stat_offsets_loaded,
739			   &oes->rx_multicast, &es->rx_multicast);
740	i40e_stat_update48(hw, I40E_GLV_BPRCH(stat_idx),
741			   I40E_GLV_BPRCL(stat_idx),
742			   vsi->stat_offsets_loaded,
743			   &oes->rx_broadcast, &es->rx_broadcast);
744
745	i40e_stat_update48(hw, I40E_GLV_GOTCH(stat_idx),
746			   I40E_GLV_GOTCL(stat_idx),
747			   vsi->stat_offsets_loaded,
748			   &oes->tx_bytes, &es->tx_bytes);
749	i40e_stat_update48(hw, I40E_GLV_UPTCH(stat_idx),
750			   I40E_GLV_UPTCL(stat_idx),
751			   vsi->stat_offsets_loaded,
752			   &oes->tx_unicast, &es->tx_unicast);
753	i40e_stat_update48(hw, I40E_GLV_MPTCH(stat_idx),
754			   I40E_GLV_MPTCL(stat_idx),
755			   vsi->stat_offsets_loaded,
756			   &oes->tx_multicast, &es->tx_multicast);
757	i40e_stat_update48(hw, I40E_GLV_BPTCH(stat_idx),
758			   I40E_GLV_BPTCL(stat_idx),
759			   vsi->stat_offsets_loaded,
760			   &oes->tx_broadcast, &es->tx_broadcast);
761
762	i40e_stats_update_rx_discards(vsi, hw, stat_idx,
763				      vsi->stat_offsets_loaded, oes, es);
764
765	vsi->stat_offsets_loaded = true;
766}
767
768/**
769 * i40e_update_veb_stats - Update Switch component statistics
770 * @veb: the VEB being updated
771 **/
772void i40e_update_veb_stats(struct i40e_veb *veb)
773{
774	struct i40e_pf *pf = veb->pf;
775	struct i40e_hw *hw = &pf->hw;
776	struct i40e_eth_stats *oes;
777	struct i40e_eth_stats *es;     /* device's eth stats */
778	struct i40e_veb_tc_stats *veb_oes;
779	struct i40e_veb_tc_stats *veb_es;
780	int i, idx = 0;
781
782	idx = veb->stats_idx;
783	es = &veb->stats;
784	oes = &veb->stats_offsets;
785	veb_es = &veb->tc_stats;
786	veb_oes = &veb->tc_stats_offsets;
787
788	/* Gather up the stats that the hw collects */
789	i40e_stat_update32(hw, I40E_GLSW_TDPC(idx),
790			   veb->stat_offsets_loaded,
791			   &oes->tx_discards, &es->tx_discards);
792	if (hw->revision_id > 0)
793		i40e_stat_update32(hw, I40E_GLSW_RUPP(idx),
794				   veb->stat_offsets_loaded,
795				   &oes->rx_unknown_protocol,
796				   &es->rx_unknown_protocol);
797	i40e_stat_update48(hw, I40E_GLSW_GORCH(idx), I40E_GLSW_GORCL(idx),
798			   veb->stat_offsets_loaded,
799			   &oes->rx_bytes, &es->rx_bytes);
800	i40e_stat_update48(hw, I40E_GLSW_UPRCH(idx), I40E_GLSW_UPRCL(idx),
801			   veb->stat_offsets_loaded,
802			   &oes->rx_unicast, &es->rx_unicast);
803	i40e_stat_update48(hw, I40E_GLSW_MPRCH(idx), I40E_GLSW_MPRCL(idx),
804			   veb->stat_offsets_loaded,
805			   &oes->rx_multicast, &es->rx_multicast);
806	i40e_stat_update48(hw, I40E_GLSW_BPRCH(idx), I40E_GLSW_BPRCL(idx),
807			   veb->stat_offsets_loaded,
808			   &oes->rx_broadcast, &es->rx_broadcast);
809
810	i40e_stat_update48(hw, I40E_GLSW_GOTCH(idx), I40E_GLSW_GOTCL(idx),
811			   veb->stat_offsets_loaded,
812			   &oes->tx_bytes, &es->tx_bytes);
813	i40e_stat_update48(hw, I40E_GLSW_UPTCH(idx), I40E_GLSW_UPTCL(idx),
814			   veb->stat_offsets_loaded,
815			   &oes->tx_unicast, &es->tx_unicast);
816	i40e_stat_update48(hw, I40E_GLSW_MPTCH(idx), I40E_GLSW_MPTCL(idx),
817			   veb->stat_offsets_loaded,
818			   &oes->tx_multicast, &es->tx_multicast);
819	i40e_stat_update48(hw, I40E_GLSW_BPTCH(idx), I40E_GLSW_BPTCL(idx),
820			   veb->stat_offsets_loaded,
821			   &oes->tx_broadcast, &es->tx_broadcast);
822	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
823		i40e_stat_update48(hw, I40E_GLVEBTC_RPCH(i, idx),
824				   I40E_GLVEBTC_RPCL(i, idx),
825				   veb->stat_offsets_loaded,
826				   &veb_oes->tc_rx_packets[i],
827				   &veb_es->tc_rx_packets[i]);
828		i40e_stat_update48(hw, I40E_GLVEBTC_RBCH(i, idx),
829				   I40E_GLVEBTC_RBCL(i, idx),
830				   veb->stat_offsets_loaded,
831				   &veb_oes->tc_rx_bytes[i],
832				   &veb_es->tc_rx_bytes[i]);
833		i40e_stat_update48(hw, I40E_GLVEBTC_TPCH(i, idx),
834				   I40E_GLVEBTC_TPCL(i, idx),
835				   veb->stat_offsets_loaded,
836				   &veb_oes->tc_tx_packets[i],
837				   &veb_es->tc_tx_packets[i]);
838		i40e_stat_update48(hw, I40E_GLVEBTC_TBCH(i, idx),
839				   I40E_GLVEBTC_TBCL(i, idx),
840				   veb->stat_offsets_loaded,
841				   &veb_oes->tc_tx_bytes[i],
842				   &veb_es->tc_tx_bytes[i]);
843	}
844	veb->stat_offsets_loaded = true;
845}
846
847/**
848 * i40e_update_vsi_stats - Update the vsi statistics counters.
849 * @vsi: the VSI to be updated
850 *
851 * There are a few instances where we store the same stat in a
852 * couple of different structs.  This is partly because we have
853 * the netdev stats that need to be filled out, which is slightly
854 * different from the "eth_stats" defined by the chip and used in
855 * VF communications.  We sort it out here.
856 **/
857static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
858{
859	u64 rx_page, rx_buf, rx_reuse, rx_alloc, rx_waive, rx_busy;
860	struct i40e_pf *pf = vsi->back;
861	struct rtnl_link_stats64 *ons;
862	struct rtnl_link_stats64 *ns;   /* netdev stats */
863	struct i40e_eth_stats *oes;
864	struct i40e_eth_stats *es;     /* device's eth stats */
865	u64 tx_restart, tx_busy;
866	struct i40e_ring *p;
867	u64 bytes, packets;
868	unsigned int start;
869	u64 tx_linearize;
870	u64 tx_force_wb;
871	u64 tx_stopped;
872	u64 rx_p, rx_b;
873	u64 tx_p, tx_b;
874	u16 q;
875
876	if (test_bit(__I40E_VSI_DOWN, vsi->state) ||
877	    test_bit(__I40E_CONFIG_BUSY, pf->state))
878		return;
879
880	ns = i40e_get_vsi_stats_struct(vsi);
881	ons = &vsi->net_stats_offsets;
882	es = &vsi->eth_stats;
883	oes = &vsi->eth_stats_offsets;
884
885	/* Gather up the netdev and vsi stats that the driver collects
886	 * on the fly during packet processing
887	 */
888	rx_b = rx_p = 0;
889	tx_b = tx_p = 0;
890	tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
891	tx_stopped = 0;
892	rx_page = 0;
893	rx_buf = 0;
894	rx_reuse = 0;
895	rx_alloc = 0;
896	rx_waive = 0;
897	rx_busy = 0;
898	rcu_read_lock();
899	for (q = 0; q < vsi->num_queue_pairs; q++) {
900		/* locate Tx ring */
901		p = READ_ONCE(vsi->tx_rings[q]);
902		if (!p)
903			continue;
904
905		do {
906			start = u64_stats_fetch_begin(&p->syncp);
907			packets = p->stats.packets;
908			bytes = p->stats.bytes;
909		} while (u64_stats_fetch_retry(&p->syncp, start));
910		tx_b += bytes;
911		tx_p += packets;
912		tx_restart += p->tx_stats.restart_queue;
913		tx_busy += p->tx_stats.tx_busy;
914		tx_linearize += p->tx_stats.tx_linearize;
915		tx_force_wb += p->tx_stats.tx_force_wb;
916		tx_stopped += p->tx_stats.tx_stopped;
917
918		/* locate Rx ring */
919		p = READ_ONCE(vsi->rx_rings[q]);
920		if (!p)
921			continue;
922
923		do {
924			start = u64_stats_fetch_begin(&p->syncp);
925			packets = p->stats.packets;
926			bytes = p->stats.bytes;
927		} while (u64_stats_fetch_retry(&p->syncp, start));
928		rx_b += bytes;
929		rx_p += packets;
930		rx_buf += p->rx_stats.alloc_buff_failed;
931		rx_page += p->rx_stats.alloc_page_failed;
932		rx_reuse += p->rx_stats.page_reuse_count;
933		rx_alloc += p->rx_stats.page_alloc_count;
934		rx_waive += p->rx_stats.page_waive_count;
935		rx_busy += p->rx_stats.page_busy_count;
936
937		if (i40e_enabled_xdp_vsi(vsi)) {
938			/* locate XDP ring */
939			p = READ_ONCE(vsi->xdp_rings[q]);
940			if (!p)
941				continue;
942
943			do {
944				start = u64_stats_fetch_begin(&p->syncp);
945				packets = p->stats.packets;
946				bytes = p->stats.bytes;
947			} while (u64_stats_fetch_retry(&p->syncp, start));
948			tx_b += bytes;
949			tx_p += packets;
950			tx_restart += p->tx_stats.restart_queue;
951			tx_busy += p->tx_stats.tx_busy;
952			tx_linearize += p->tx_stats.tx_linearize;
953			tx_force_wb += p->tx_stats.tx_force_wb;
954		}
955	}
956	rcu_read_unlock();
957	vsi->tx_restart = tx_restart;
958	vsi->tx_busy = tx_busy;
959	vsi->tx_linearize = tx_linearize;
960	vsi->tx_force_wb = tx_force_wb;
961	vsi->tx_stopped = tx_stopped;
962	vsi->rx_page_failed = rx_page;
963	vsi->rx_buf_failed = rx_buf;
964	vsi->rx_page_reuse = rx_reuse;
965	vsi->rx_page_alloc = rx_alloc;
966	vsi->rx_page_waive = rx_waive;
967	vsi->rx_page_busy = rx_busy;
968
969	ns->rx_packets = rx_p;
970	ns->rx_bytes = rx_b;
971	ns->tx_packets = tx_p;
972	ns->tx_bytes = tx_b;
973
974	/* update netdev stats from eth stats */
975	i40e_update_eth_stats(vsi);
976	ons->tx_errors = oes->tx_errors;
977	ns->tx_errors = es->tx_errors;
978	ons->multicast = oes->rx_multicast;
979	ns->multicast = es->rx_multicast;
980	ons->rx_dropped = oes->rx_discards;
981	ns->rx_dropped = es->rx_discards;
982	ons->tx_dropped = oes->tx_discards;
983	ns->tx_dropped = es->tx_discards;
984
985	/* pull in a couple PF stats if this is the main vsi */
986	if (vsi == pf->vsi[pf->lan_vsi]) {
987		ns->rx_crc_errors = pf->stats.crc_errors;
988		ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes;
989		ns->rx_length_errors = pf->stats.rx_length_errors;
990	}
991}
992
993/**
994 * i40e_update_pf_stats - Update the PF statistics counters.
995 * @pf: the PF to be updated
996 **/
997static void i40e_update_pf_stats(struct i40e_pf *pf)
998{
999	struct i40e_hw_port_stats *osd = &pf->stats_offsets;
1000	struct i40e_hw_port_stats *nsd = &pf->stats;
1001	struct i40e_hw *hw = &pf->hw;
1002	u32 val;
1003	int i;
1004
1005	i40e_stat_update48(hw, I40E_GLPRT_GORCH(hw->port),
1006			   I40E_GLPRT_GORCL(hw->port),
1007			   pf->stat_offsets_loaded,
1008			   &osd->eth.rx_bytes, &nsd->eth.rx_bytes);
1009	i40e_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port),
1010			   I40E_GLPRT_GOTCL(hw->port),
1011			   pf->stat_offsets_loaded,
1012			   &osd->eth.tx_bytes, &nsd->eth.tx_bytes);
1013	i40e_stat_update32(hw, I40E_GLPRT_RDPC(hw->port),
1014			   pf->stat_offsets_loaded,
1015			   &osd->eth.rx_discards,
1016			   &nsd->eth.rx_discards);
1017	i40e_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port),
1018			   I40E_GLPRT_UPRCL(hw->port),
1019			   pf->stat_offsets_loaded,
1020			   &osd->eth.rx_unicast,
1021			   &nsd->eth.rx_unicast);
1022	i40e_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port),
1023			   I40E_GLPRT_MPRCL(hw->port),
1024			   pf->stat_offsets_loaded,
1025			   &osd->eth.rx_multicast,
1026			   &nsd->eth.rx_multicast);
1027	i40e_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port),
1028			   I40E_GLPRT_BPRCL(hw->port),
1029			   pf->stat_offsets_loaded,
1030			   &osd->eth.rx_broadcast,
1031			   &nsd->eth.rx_broadcast);
1032	i40e_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port),
1033			   I40E_GLPRT_UPTCL(hw->port),
1034			   pf->stat_offsets_loaded,
1035			   &osd->eth.tx_unicast,
1036			   &nsd->eth.tx_unicast);
1037	i40e_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port),
1038			   I40E_GLPRT_MPTCL(hw->port),
1039			   pf->stat_offsets_loaded,
1040			   &osd->eth.tx_multicast,
1041			   &nsd->eth.tx_multicast);
1042	i40e_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port),
1043			   I40E_GLPRT_BPTCL(hw->port),
1044			   pf->stat_offsets_loaded,
1045			   &osd->eth.tx_broadcast,
1046			   &nsd->eth.tx_broadcast);
1047
1048	i40e_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port),
1049			   pf->stat_offsets_loaded,
1050			   &osd->tx_dropped_link_down,
1051			   &nsd->tx_dropped_link_down);
1052
1053	i40e_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port),
1054			   pf->stat_offsets_loaded,
1055			   &osd->crc_errors, &nsd->crc_errors);
1056
1057	i40e_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port),
1058			   pf->stat_offsets_loaded,
1059			   &osd->illegal_bytes, &nsd->illegal_bytes);
1060
1061	i40e_stat_update32(hw, I40E_GLPRT_MLFC(hw->port),
1062			   pf->stat_offsets_loaded,
1063			   &osd->mac_local_faults,
1064			   &nsd->mac_local_faults);
1065	i40e_stat_update32(hw, I40E_GLPRT_MRFC(hw->port),
1066			   pf->stat_offsets_loaded,
1067			   &osd->mac_remote_faults,
1068			   &nsd->mac_remote_faults);
1069
1070	i40e_stat_update32(hw, I40E_GLPRT_RLEC(hw->port),
1071			   pf->stat_offsets_loaded,
1072			   &osd->rx_length_errors,
1073			   &nsd->rx_length_errors);
1074
1075	i40e_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port),
1076			   pf->stat_offsets_loaded,
1077			   &osd->link_xon_rx, &nsd->link_xon_rx);
1078	i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port),
1079			   pf->stat_offsets_loaded,
1080			   &osd->link_xon_tx, &nsd->link_xon_tx);
1081	i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port),
1082			   pf->stat_offsets_loaded,
1083			   &osd->link_xoff_rx, &nsd->link_xoff_rx);
1084	i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port),
1085			   pf->stat_offsets_loaded,
1086			   &osd->link_xoff_tx, &nsd->link_xoff_tx);
1087
1088	for (i = 0; i < 8; i++) {
1089		i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i),
1090				   pf->stat_offsets_loaded,
1091				   &osd->priority_xoff_rx[i],
1092				   &nsd->priority_xoff_rx[i]);
1093		i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i),
1094				   pf->stat_offsets_loaded,
1095				   &osd->priority_xon_rx[i],
1096				   &nsd->priority_xon_rx[i]);
1097		i40e_stat_update32(hw, I40E_GLPRT_PXONTXC(hw->port, i),
1098				   pf->stat_offsets_loaded,
1099				   &osd->priority_xon_tx[i],
1100				   &nsd->priority_xon_tx[i]);
1101		i40e_stat_update32(hw, I40E_GLPRT_PXOFFTXC(hw->port, i),
1102				   pf->stat_offsets_loaded,
1103				   &osd->priority_xoff_tx[i],
1104				   &nsd->priority_xoff_tx[i]);
1105		i40e_stat_update32(hw,
1106				   I40E_GLPRT_RXON2OFFCNT(hw->port, i),
1107				   pf->stat_offsets_loaded,
1108				   &osd->priority_xon_2_xoff[i],
1109				   &nsd->priority_xon_2_xoff[i]);
1110	}
1111
1112	i40e_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port),
1113			   I40E_GLPRT_PRC64L(hw->port),
1114			   pf->stat_offsets_loaded,
1115			   &osd->rx_size_64, &nsd->rx_size_64);
1116	i40e_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port),
1117			   I40E_GLPRT_PRC127L(hw->port),
1118			   pf->stat_offsets_loaded,
1119			   &osd->rx_size_127, &nsd->rx_size_127);
1120	i40e_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port),
1121			   I40E_GLPRT_PRC255L(hw->port),
1122			   pf->stat_offsets_loaded,
1123			   &osd->rx_size_255, &nsd->rx_size_255);
1124	i40e_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port),
1125			   I40E_GLPRT_PRC511L(hw->port),
1126			   pf->stat_offsets_loaded,
1127			   &osd->rx_size_511, &nsd->rx_size_511);
1128	i40e_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port),
1129			   I40E_GLPRT_PRC1023L(hw->port),
1130			   pf->stat_offsets_loaded,
1131			   &osd->rx_size_1023, &nsd->rx_size_1023);
1132	i40e_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port),
1133			   I40E_GLPRT_PRC1522L(hw->port),
1134			   pf->stat_offsets_loaded,
1135			   &osd->rx_size_1522, &nsd->rx_size_1522);
1136	i40e_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port),
1137			   I40E_GLPRT_PRC9522L(hw->port),
1138			   pf->stat_offsets_loaded,
1139			   &osd->rx_size_big, &nsd->rx_size_big);
1140
1141	i40e_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port),
1142			   I40E_GLPRT_PTC64L(hw->port),
1143			   pf->stat_offsets_loaded,
1144			   &osd->tx_size_64, &nsd->tx_size_64);
1145	i40e_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port),
1146			   I40E_GLPRT_PTC127L(hw->port),
1147			   pf->stat_offsets_loaded,
1148			   &osd->tx_size_127, &nsd->tx_size_127);
1149	i40e_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port),
1150			   I40E_GLPRT_PTC255L(hw->port),
1151			   pf->stat_offsets_loaded,
1152			   &osd->tx_size_255, &nsd->tx_size_255);
1153	i40e_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port),
1154			   I40E_GLPRT_PTC511L(hw->port),
1155			   pf->stat_offsets_loaded,
1156			   &osd->tx_size_511, &nsd->tx_size_511);
1157	i40e_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port),
1158			   I40E_GLPRT_PTC1023L(hw->port),
1159			   pf->stat_offsets_loaded,
1160			   &osd->tx_size_1023, &nsd->tx_size_1023);
1161	i40e_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port),
1162			   I40E_GLPRT_PTC1522L(hw->port),
1163			   pf->stat_offsets_loaded,
1164			   &osd->tx_size_1522, &nsd->tx_size_1522);
1165	i40e_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port),
1166			   I40E_GLPRT_PTC9522L(hw->port),
1167			   pf->stat_offsets_loaded,
1168			   &osd->tx_size_big, &nsd->tx_size_big);
1169
1170	i40e_stat_update32(hw, I40E_GLPRT_RUC(hw->port),
1171			   pf->stat_offsets_loaded,
1172			   &osd->rx_undersize, &nsd->rx_undersize);
1173	i40e_stat_update32(hw, I40E_GLPRT_RFC(hw->port),
1174			   pf->stat_offsets_loaded,
1175			   &osd->rx_fragments, &nsd->rx_fragments);
1176	i40e_stat_update32(hw, I40E_GLPRT_ROC(hw->port),
1177			   pf->stat_offsets_loaded,
1178			   &osd->rx_oversize, &nsd->rx_oversize);
1179	i40e_stat_update32(hw, I40E_GLPRT_RJC(hw->port),
1180			   pf->stat_offsets_loaded,
1181			   &osd->rx_jabber, &nsd->rx_jabber);
1182
1183	/* FDIR stats */
1184	i40e_stat_update_and_clear32(hw,
1185			I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)),
1186			&nsd->fd_atr_match);
1187	i40e_stat_update_and_clear32(hw,
1188			I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)),
1189			&nsd->fd_sb_match);
1190	i40e_stat_update_and_clear32(hw,
1191			I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)),
1192			&nsd->fd_atr_tunnel_match);
1193
1194	val = rd32(hw, I40E_PRTPM_EEE_STAT);
1195	nsd->tx_lpi_status =
1196		       (val & I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK) >>
1197			I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT;
1198	nsd->rx_lpi_status =
1199		       (val & I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK) >>
1200			I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT;
1201	i40e_stat_update32(hw, I40E_PRTPM_TLPIC,
1202			   pf->stat_offsets_loaded,
1203			   &osd->tx_lpi_count, &nsd->tx_lpi_count);
1204	i40e_stat_update32(hw, I40E_PRTPM_RLPIC,
1205			   pf->stat_offsets_loaded,
1206			   &osd->rx_lpi_count, &nsd->rx_lpi_count);
1207
1208	if (pf->flags & I40E_FLAG_FD_SB_ENABLED &&
1209	    !test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
1210		nsd->fd_sb_status = true;
1211	else
1212		nsd->fd_sb_status = false;
1213
1214	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED &&
1215	    !test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
1216		nsd->fd_atr_status = true;
1217	else
1218		nsd->fd_atr_status = false;
1219
1220	pf->stat_offsets_loaded = true;
1221}
1222
1223/**
1224 * i40e_update_stats - Update the various statistics counters.
1225 * @vsi: the VSI to be updated
1226 *
1227 * Update the various stats for this VSI and its related entities.
1228 **/
1229void i40e_update_stats(struct i40e_vsi *vsi)
1230{
1231	struct i40e_pf *pf = vsi->back;
1232
1233	if (vsi == pf->vsi[pf->lan_vsi])
1234		i40e_update_pf_stats(pf);
1235
1236	i40e_update_vsi_stats(vsi);
1237}
1238
1239/**
1240 * i40e_count_filters - counts VSI mac filters
1241 * @vsi: the VSI to be searched
1242 *
1243 * Returns count of mac filters
1244 **/
1245int i40e_count_filters(struct i40e_vsi *vsi)
1246{
1247	struct i40e_mac_filter *f;
1248	struct hlist_node *h;
1249	int bkt;
1250	int cnt = 0;
1251
1252	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
1253		++cnt;
1254
1255	return cnt;
1256}
1257
1258/**
1259 * i40e_find_filter - Search VSI filter list for specific mac/vlan filter
1260 * @vsi: the VSI to be searched
1261 * @macaddr: the MAC address
1262 * @vlan: the vlan
1263 *
1264 * Returns ptr to the filter object or NULL
1265 **/
1266static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi,
1267						const u8 *macaddr, s16 vlan)
1268{
1269	struct i40e_mac_filter *f;
1270	u64 key;
1271
1272	if (!vsi || !macaddr)
1273		return NULL;
1274
1275	key = i40e_addr_to_hkey(macaddr);
1276	hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) {
1277		if ((ether_addr_equal(macaddr, f->macaddr)) &&
1278		    (vlan == f->vlan))
1279			return f;
1280	}
1281	return NULL;
1282}
1283
1284/**
1285 * i40e_find_mac - Find a mac addr in the macvlan filters list
1286 * @vsi: the VSI to be searched
1287 * @macaddr: the MAC address we are searching for
1288 *
1289 * Returns the first filter with the provided MAC address or NULL if
1290 * MAC address was not found
1291 **/
1292struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr)
1293{
1294	struct i40e_mac_filter *f;
1295	u64 key;
1296
1297	if (!vsi || !macaddr)
1298		return NULL;
1299
1300	key = i40e_addr_to_hkey(macaddr);
1301	hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) {
1302		if ((ether_addr_equal(macaddr, f->macaddr)))
1303			return f;
1304	}
1305	return NULL;
1306}
1307
1308/**
1309 * i40e_is_vsi_in_vlan - Check if VSI is in vlan mode
1310 * @vsi: the VSI to be searched
1311 *
1312 * Returns true if VSI is in vlan mode or false otherwise
1313 **/
1314bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi)
1315{
1316	/* If we have a PVID, always operate in VLAN mode */
1317	if (vsi->info.pvid)
1318		return true;
1319
1320	/* We need to operate in VLAN mode whenever we have any filters with
1321	 * a VLAN other than I40E_VLAN_ALL. We could check the table each
1322	 * time, incurring search cost repeatedly. However, we can notice two
1323	 * things:
1324	 *
1325	 * 1) the only place where we can gain a VLAN filter is in
1326	 *    i40e_add_filter.
1327	 *
1328	 * 2) the only place where filters are actually removed is in
1329	 *    i40e_sync_filters_subtask.
1330	 *
1331	 * Thus, we can simply use a boolean value, has_vlan_filters which we
1332	 * will set to true when we add a VLAN filter in i40e_add_filter. Then
1333	 * we have to perform the full search after deleting filters in
1334	 * i40e_sync_filters_subtask, but we already have to search
1335	 * filters here and can perform the check at the same time. This
1336	 * results in avoiding embedding a loop for VLAN mode inside another
1337	 * loop over all the filters, and should maintain correctness as noted
1338	 * above.
1339	 */
1340	return vsi->has_vlan_filter;
1341}
1342
1343/**
1344 * i40e_correct_mac_vlan_filters - Correct non-VLAN filters if necessary
1345 * @vsi: the VSI to configure
1346 * @tmp_add_list: list of filters ready to be added
1347 * @tmp_del_list: list of filters ready to be deleted
1348 * @vlan_filters: the number of active VLAN filters
1349 *
1350 * Update VLAN=0 and VLAN=-1 (I40E_VLAN_ANY) filters properly so that they
1351 * behave as expected. If we have any active VLAN filters remaining or about
1352 * to be added then we need to update non-VLAN filters to be marked as VLAN=0
1353 * so that they only match against untagged traffic. If we no longer have any
1354 * active VLAN filters, we need to make all non-VLAN filters marked as VLAN=-1
1355 * so that they match against both tagged and untagged traffic. In this way,
1356 * we ensure that we correctly receive the desired traffic. This ensures that
1357 * when we have an active VLAN we will receive only untagged traffic and
1358 * traffic matching active VLANs. If we have no active VLANs then we will
1359 * operate in non-VLAN mode and receive all traffic, tagged or untagged.
1360 *
1361 * Finally, in a similar fashion, this function also corrects filters when
1362 * there is an active PVID assigned to this VSI.
1363 *
1364 * In case of memory allocation failure return -ENOMEM. Otherwise, return 0.
1365 *
1366 * This function is only expected to be called from within
1367 * i40e_sync_vsi_filters.
1368 *
1369 * NOTE: This function expects to be called while under the
1370 * mac_filter_hash_lock
1371 */
1372static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi,
1373					 struct hlist_head *tmp_add_list,
1374					 struct hlist_head *tmp_del_list,
1375					 int vlan_filters)
1376{
1377	s16 pvid = le16_to_cpu(vsi->info.pvid);
1378	struct i40e_mac_filter *f, *add_head;
1379	struct i40e_new_mac_filter *new;
1380	struct hlist_node *h;
1381	int bkt, new_vlan;
1382
1383	/* To determine if a particular filter needs to be replaced we
1384	 * have the three following conditions:
1385	 *
1386	 * a) if we have a PVID assigned, then all filters which are
1387	 *    not marked as VLAN=PVID must be replaced with filters that
1388	 *    are.
1389	 * b) otherwise, if we have any active VLANS, all filters
1390	 *    which are marked as VLAN=-1 must be replaced with
1391	 *    filters marked as VLAN=0
1392	 * c) finally, if we do not have any active VLANS, all filters
1393	 *    which are marked as VLAN=0 must be replaced with filters
1394	 *    marked as VLAN=-1
1395	 */
1396
1397	/* Update the filters about to be added in place */
1398	hlist_for_each_entry(new, tmp_add_list, hlist) {
1399		if (pvid && new->f->vlan != pvid)
1400			new->f->vlan = pvid;
1401		else if (vlan_filters && new->f->vlan == I40E_VLAN_ANY)
1402			new->f->vlan = 0;
1403		else if (!vlan_filters && new->f->vlan == 0)
1404			new->f->vlan = I40E_VLAN_ANY;
1405	}
1406
1407	/* Update the remaining active filters */
1408	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
1409		/* Combine the checks for whether a filter needs to be changed
1410		 * and then determine the new VLAN inside the if block, in
1411		 * order to avoid duplicating code for adding the new filter
1412		 * then deleting the old filter.
1413		 */
1414		if ((pvid && f->vlan != pvid) ||
1415		    (vlan_filters && f->vlan == I40E_VLAN_ANY) ||
1416		    (!vlan_filters && f->vlan == 0)) {
1417			/* Determine the new vlan we will be adding */
1418			if (pvid)
1419				new_vlan = pvid;
1420			else if (vlan_filters)
1421				new_vlan = 0;
1422			else
1423				new_vlan = I40E_VLAN_ANY;
1424
1425			/* Create the new filter */
1426			add_head = i40e_add_filter(vsi, f->macaddr, new_vlan);
1427			if (!add_head)
1428				return -ENOMEM;
1429
1430			/* Create a temporary i40e_new_mac_filter */
1431			new = kzalloc(sizeof(*new), GFP_ATOMIC);
1432			if (!new)
1433				return -ENOMEM;
1434
1435			new->f = add_head;
1436			new->state = add_head->state;
1437
1438			/* Add the new filter to the tmp list */
1439			hlist_add_head(&new->hlist, tmp_add_list);
1440
1441			/* Put the original filter into the delete list */
1442			f->state = I40E_FILTER_REMOVE;
1443			hash_del(&f->hlist);
1444			hlist_add_head(&f->hlist, tmp_del_list);
1445		}
1446	}
1447
1448	vsi->has_vlan_filter = !!vlan_filters;
1449
1450	return 0;
1451}
1452
1453/**
1454 * i40e_get_vf_new_vlan - Get new vlan id on a vf
1455 * @vsi: the vsi to configure
1456 * @new_mac: new mac filter to be added
1457 * @f: existing mac filter, replaced with new_mac->f if new_mac is not NULL
1458 * @vlan_filters: the number of active VLAN filters
1459 * @trusted: flag if the VF is trusted
1460 *
1461 * Get new VLAN id based on current VLAN filters, trust, PVID
1462 * and vf-vlan-prune-disable flag.
1463 *
1464 * Returns the value of the new vlan filter or
1465 * the old value if no new filter is needed.
1466 */
1467static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi,
1468				struct i40e_new_mac_filter *new_mac,
1469				struct i40e_mac_filter *f,
1470				int vlan_filters,
1471				bool trusted)
1472{
1473	s16 pvid = le16_to_cpu(vsi->info.pvid);
1474	struct i40e_pf *pf = vsi->back;
1475	bool is_any;
1476
1477	if (new_mac)
1478		f = new_mac->f;
1479
1480	if (pvid && f->vlan != pvid)
1481		return pvid;
1482
1483	is_any = (trusted ||
1484		  !(pf->flags & I40E_FLAG_VF_VLAN_PRUNING));
1485
1486	if ((vlan_filters && f->vlan == I40E_VLAN_ANY) ||
1487	    (!is_any && !vlan_filters && f->vlan == I40E_VLAN_ANY) ||
1488	    (is_any && !vlan_filters && f->vlan == 0)) {
1489		if (is_any)
1490			return I40E_VLAN_ANY;
1491		else
1492			return 0;
1493	}
1494
1495	return f->vlan;
1496}
1497
1498/**
1499 * i40e_correct_vf_mac_vlan_filters - Correct non-VLAN VF filters if necessary
1500 * @vsi: the vsi to configure
1501 * @tmp_add_list: list of filters ready to be added
1502 * @tmp_del_list: list of filters ready to be deleted
1503 * @vlan_filters: the number of active VLAN filters
1504 * @trusted: flag if the VF is trusted
1505 *
1506 * Correct VF VLAN filters based on current VLAN filters, trust, PVID
1507 * and vf-vlan-prune-disable flag.
1508 *
1509 * In case of memory allocation failure return -ENOMEM. Otherwise, return 0.
1510 *
1511 * This function is only expected to be called from within
1512 * i40e_sync_vsi_filters.
1513 *
1514 * NOTE: This function expects to be called while under the
1515 * mac_filter_hash_lock
1516 */
1517static int i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi,
1518					    struct hlist_head *tmp_add_list,
1519					    struct hlist_head *tmp_del_list,
1520					    int vlan_filters,
1521					    bool trusted)
1522{
1523	struct i40e_mac_filter *f, *add_head;
1524	struct i40e_new_mac_filter *new_mac;
1525	struct hlist_node *h;
1526	int bkt, new_vlan;
1527
1528	hlist_for_each_entry(new_mac, tmp_add_list, hlist) {
1529		new_mac->f->vlan = i40e_get_vf_new_vlan(vsi, new_mac, NULL,
1530							vlan_filters, trusted);
1531	}
1532
1533	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
1534		new_vlan = i40e_get_vf_new_vlan(vsi, NULL, f, vlan_filters,
1535						trusted);
1536		if (new_vlan != f->vlan) {
1537			add_head = i40e_add_filter(vsi, f->macaddr, new_vlan);
1538			if (!add_head)
1539				return -ENOMEM;
1540			/* Create a temporary i40e_new_mac_filter */
1541			new_mac = kzalloc(sizeof(*new_mac), GFP_ATOMIC);
1542			if (!new_mac)
1543				return -ENOMEM;
1544			new_mac->f = add_head;
1545			new_mac->state = add_head->state;
1546
1547			/* Add the new filter to the tmp list */
1548			hlist_add_head(&new_mac->hlist, tmp_add_list);
1549
1550			/* Put the original filter into the delete list */
1551			f->state = I40E_FILTER_REMOVE;
1552			hash_del(&f->hlist);
1553			hlist_add_head(&f->hlist, tmp_del_list);
1554		}
1555	}
1556
1557	vsi->has_vlan_filter = !!vlan_filters;
1558	return 0;
1559}
1560
1561/**
1562 * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM
1563 * @vsi: the PF Main VSI - inappropriate for any other VSI
1564 * @macaddr: the MAC address
1565 *
1566 * Remove whatever filter the firmware set up so the driver can manage
1567 * its own filtering intelligently.
1568 **/
1569static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
1570{
1571	struct i40e_aqc_remove_macvlan_element_data element;
1572	struct i40e_pf *pf = vsi->back;
1573
1574	/* Only appropriate for the PF main VSI */
1575	if (vsi->type != I40E_VSI_MAIN)
1576		return;
1577
1578	memset(&element, 0, sizeof(element));
1579	ether_addr_copy(element.mac_addr, macaddr);
1580	element.vlan_tag = 0;
1581	/* Ignore error returns, some firmware does it this way... */
1582	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
1583	i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
1584
1585	memset(&element, 0, sizeof(element));
1586	ether_addr_copy(element.mac_addr, macaddr);
1587	element.vlan_tag = 0;
1588	/* ...and some firmware does it this way. */
1589	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
1590			I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
1591	i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
1592}
1593
1594/**
1595 * i40e_add_filter - Add a mac/vlan filter to the VSI
1596 * @vsi: the VSI to be searched
1597 * @macaddr: the MAC address
1598 * @vlan: the vlan
1599 *
1600 * Returns ptr to the filter object or NULL when no memory available.
1601 *
1602 * NOTE: This function is expected to be called with mac_filter_hash_lock
1603 * being held.
1604 **/
1605struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
1606					const u8 *macaddr, s16 vlan)
1607{
1608	struct i40e_mac_filter *f;
1609	u64 key;
1610
1611	if (!vsi || !macaddr)
1612		return NULL;
1613
1614	f = i40e_find_filter(vsi, macaddr, vlan);
1615	if (!f) {
1616		f = kzalloc(sizeof(*f), GFP_ATOMIC);
1617		if (!f)
1618			return NULL;
1619
1620		/* Update the boolean indicating if we need to function in
1621		 * VLAN mode.
1622		 */
1623		if (vlan >= 0)
1624			vsi->has_vlan_filter = true;
1625
1626		ether_addr_copy(f->macaddr, macaddr);
1627		f->vlan = vlan;
1628		f->state = I40E_FILTER_NEW;
1629		INIT_HLIST_NODE(&f->hlist);
1630
1631		key = i40e_addr_to_hkey(macaddr);
1632		hash_add(vsi->mac_filter_hash, &f->hlist, key);
1633
1634		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
1635		set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
1636	}
1637
1638	/* If we're asked to add a filter that has been marked for removal, it
1639	 * is safe to simply restore it to active state. __i40e_del_filter
1640	 * will have simply deleted any filters which were previously marked
1641	 * NEW or FAILED, so if it is currently marked REMOVE it must have
1642	 * previously been ACTIVE. Since we haven't yet run the sync filters
1643	 * task, just restore this filter to the ACTIVE state so that the
1644	 * sync task leaves it in place
1645	 */
1646	if (f->state == I40E_FILTER_REMOVE)
1647		f->state = I40E_FILTER_ACTIVE;
1648
1649	return f;
1650}
1651
1652/**
1653 * __i40e_del_filter - Remove a specific filter from the VSI
1654 * @vsi: VSI to remove from
1655 * @f: the filter to remove from the list
1656 *
1657 * This function should be called instead of i40e_del_filter only if you know
1658 * the exact filter you will remove already, such as via i40e_find_filter or
1659 * i40e_find_mac.
1660 *
1661 * NOTE: This function is expected to be called with mac_filter_hash_lock
1662 * being held.
1663 * ANOTHER NOTE: This function MUST be called from within the context of
1664 * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
1665 * instead of list_for_each_entry().
1666 **/
1667void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
1668{
1669	if (!f)
1670		return;
1671
1672	/* If the filter was never added to firmware then we can just delete it
1673	 * directly and we don't want to set the status to remove or else an
1674	 * admin queue command will unnecessarily fire.
1675	 */
1676	if ((f->state == I40E_FILTER_FAILED) ||
1677	    (f->state == I40E_FILTER_NEW)) {
1678		hash_del(&f->hlist);
1679		kfree(f);
1680	} else {
1681		f->state = I40E_FILTER_REMOVE;
1682	}
1683
1684	vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
1685	set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
1686}
1687
1688/**
1689 * i40e_del_filter - Remove a MAC/VLAN filter from the VSI
1690 * @vsi: the VSI to be searched
1691 * @macaddr: the MAC address
1692 * @vlan: the VLAN
1693 *
1694 * NOTE: This function is expected to be called with mac_filter_hash_lock
1695 * being held.
1696 * ANOTHER NOTE: This function MUST be called from within the context of
1697 * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
1698 * instead of list_for_each_entry().
1699 **/
1700void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan)
1701{
1702	struct i40e_mac_filter *f;
1703
1704	if (!vsi || !macaddr)
1705		return;
1706
1707	f = i40e_find_filter(vsi, macaddr, vlan);
1708	__i40e_del_filter(vsi, f);
1709}
1710
1711/**
1712 * i40e_add_mac_filter - Add a MAC filter for all active VLANs
1713 * @vsi: the VSI to be searched
1714 * @macaddr: the mac address to be filtered
1715 *
1716 * If we're not in VLAN mode, just add the filter to I40E_VLAN_ANY. Otherwise,
1717 * go through all the macvlan filters and add a macvlan filter for each
1718 * unique vlan that already exists. If a PVID has been assigned, instead only
1719 * add the macaddr to that VLAN.
1720 *
1721 * Returns last filter added on success, else NULL
1722 **/
1723struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
1724					    const u8 *macaddr)
1725{
1726	struct i40e_mac_filter *f, *add = NULL;
1727	struct hlist_node *h;
1728	int bkt;
1729
1730	if (vsi->info.pvid)
1731		return i40e_add_filter(vsi, macaddr,
1732				       le16_to_cpu(vsi->info.pvid));
1733
1734	if (!i40e_is_vsi_in_vlan(vsi))
1735		return i40e_add_filter(vsi, macaddr, I40E_VLAN_ANY);
1736
1737	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
1738		if (f->state == I40E_FILTER_REMOVE)
1739			continue;
1740		add = i40e_add_filter(vsi, macaddr, f->vlan);
1741		if (!add)
1742			return NULL;
1743	}
1744
1745	return add;
1746}
1747
1748/**
1749 * i40e_del_mac_filter - Remove a MAC filter from all VLANs
1750 * @vsi: the VSI to be searched
1751 * @macaddr: the mac address to be removed
1752 *
1753 * Removes a given MAC address from a VSI regardless of what VLAN it has been
1754 * associated with.
1755 *
1756 * Returns 0 for success, or error
1757 **/
1758int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr)
1759{
1760	struct i40e_mac_filter *f;
1761	struct hlist_node *h;
1762	bool found = false;
1763	int bkt;
1764
1765	lockdep_assert_held(&vsi->mac_filter_hash_lock);
1766	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
1767		if (ether_addr_equal(macaddr, f->macaddr)) {
1768			__i40e_del_filter(vsi, f);
1769			found = true;
1770		}
1771	}
1772
1773	if (found)
1774		return 0;
1775	else
1776		return -ENOENT;
1777}
1778
1779/**
1780 * i40e_set_mac - NDO callback to set mac address
1781 * @netdev: network interface device structure
1782 * @p: pointer to an address structure
1783 *
1784 * Returns 0 on success, negative on failure
1785 **/
1786static int i40e_set_mac(struct net_device *netdev, void *p)
1787{
1788	struct i40e_netdev_priv *np = netdev_priv(netdev);
1789	struct i40e_vsi *vsi = np->vsi;
1790	struct i40e_pf *pf = vsi->back;
1791	struct i40e_hw *hw = &pf->hw;
1792	struct sockaddr *addr = p;
1793
1794	if (!is_valid_ether_addr(addr->sa_data))
1795		return -EADDRNOTAVAIL;
1796
1797	if (test_bit(__I40E_DOWN, pf->state) ||
1798	    test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
1799		return -EADDRNOTAVAIL;
1800
1801	if (ether_addr_equal(hw->mac.addr, addr->sa_data))
1802		netdev_info(netdev, "returning to hw mac address %pM\n",
1803			    hw->mac.addr);
1804	else
1805		netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
1806
1807	/* Copy the address first, so that we avoid a possible race with
1808	 * .set_rx_mode().
1809	 * - Remove old address from MAC filter
1810	 * - Copy new address
1811	 * - Add new address to MAC filter
1812	 */
1813	spin_lock_bh(&vsi->mac_filter_hash_lock);
1814	i40e_del_mac_filter(vsi, netdev->dev_addr);
1815	eth_hw_addr_set(netdev, addr->sa_data);
1816	i40e_add_mac_filter(vsi, netdev->dev_addr);
1817	spin_unlock_bh(&vsi->mac_filter_hash_lock);
1818
1819	if (vsi->type == I40E_VSI_MAIN) {
1820		int ret;
1821
1822		ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL,
1823						addr->sa_data, NULL);
1824		if (ret)
1825			netdev_info(netdev, "Ignoring error from firmware on LAA update, status %pe, AQ ret %s\n",
1826				    ERR_PTR(ret),
1827				    i40e_aq_str(hw, hw->aq.asq_last_status));
1828	}
1829
1830	/* schedule our worker thread which will take care of
1831	 * applying the new filter changes
1832	 */
1833	i40e_service_event_schedule(pf);
1834	return 0;
1835}
1836
1837/**
1838 * i40e_config_rss_aq - Prepare for RSS using AQ commands
1839 * @vsi: vsi structure
1840 * @seed: RSS hash seed
1841 * @lut: pointer to lookup table of lut_size
1842 * @lut_size: size of the lookup table
1843 **/
1844static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
1845			      u8 *lut, u16 lut_size)
1846{
1847	struct i40e_pf *pf = vsi->back;
1848	struct i40e_hw *hw = &pf->hw;
1849	int ret = 0;
1850
1851	if (seed) {
1852		struct i40e_aqc_get_set_rss_key_data *seed_dw =
1853			(struct i40e_aqc_get_set_rss_key_data *)seed;
1854		ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
1855		if (ret) {
1856			dev_info(&pf->pdev->dev,
1857				 "Cannot set RSS key, err %pe aq_err %s\n",
1858				 ERR_PTR(ret),
1859				 i40e_aq_str(hw, hw->aq.asq_last_status));
1860			return ret;
1861		}
1862	}
1863	if (lut) {
1864		bool pf_lut = vsi->type == I40E_VSI_MAIN;
1865
1866		ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
1867		if (ret) {
1868			dev_info(&pf->pdev->dev,
1869				 "Cannot set RSS lut, err %pe aq_err %s\n",
1870				 ERR_PTR(ret),
1871				 i40e_aq_str(hw, hw->aq.asq_last_status));
1872			return ret;
1873		}
1874	}
1875	return ret;
1876}
1877
1878/**
1879 * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
1880 * @vsi: VSI structure
1881 **/
1882static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
1883{
1884	struct i40e_pf *pf = vsi->back;
1885	u8 seed[I40E_HKEY_ARRAY_SIZE];
1886	u8 *lut;
1887	int ret;
1888
1889	if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
1890		return 0;
1891	if (!vsi->rss_size)
1892		vsi->rss_size = min_t(int, pf->alloc_rss_size,
1893				      vsi->num_queue_pairs);
1894	if (!vsi->rss_size)
1895		return -EINVAL;
1896	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
1897	if (!lut)
1898		return -ENOMEM;
1899
1900	/* Use the user configured hash keys and lookup table if there is one,
1901	 * otherwise use default
1902	 */
1903	if (vsi->rss_lut_user)
1904		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
1905	else
1906		i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
1907	if (vsi->rss_hkey_user)
1908		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
1909	else
1910		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
1911	ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
1912	kfree(lut);
1913	return ret;
1914}
1915
1916/**
1917 * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config
1918 * @vsi: the VSI being configured,
1919 * @ctxt: VSI context structure
1920 * @enabled_tc: number of traffic classes to enable
1921 *
1922 * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
1923 **/
1924static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi,
1925					   struct i40e_vsi_context *ctxt,
1926					   u8 enabled_tc)
1927{
1928	u16 qcount = 0, max_qcount, qmap, sections = 0;
1929	int i, override_q, pow, num_qps, ret;
1930	u8 netdev_tc = 0, offset = 0;
1931
1932	if (vsi->type != I40E_VSI_MAIN)
1933		return -EINVAL;
1934	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
1935	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
1936	vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc;
1937	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
1938	num_qps = vsi->mqprio_qopt.qopt.count[0];
1939
1940	/* find the next higher power-of-2 of num queue pairs */
1941	pow = ilog2(num_qps);
1942	if (!is_power_of_2(num_qps))
1943		pow++;
1944	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
1945		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
1946
1947	/* Setup queue offset/count for all TCs for given VSI */
1948	max_qcount = vsi->mqprio_qopt.qopt.count[0];
1949	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
1950		/* See if the given TC is enabled for the given VSI */
1951		if (vsi->tc_config.enabled_tc & BIT(i)) {
1952			offset = vsi->mqprio_qopt.qopt.offset[i];
1953			qcount = vsi->mqprio_qopt.qopt.count[i];
1954			if (qcount > max_qcount)
1955				max_qcount = qcount;
1956			vsi->tc_config.tc_info[i].qoffset = offset;
1957			vsi->tc_config.tc_info[i].qcount = qcount;
1958			vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
1959		} else {
1960			/* TC is not enabled so set the offset to
1961			 * default queue and allocate one queue
1962			 * for the given TC.
1963			 */
1964			vsi->tc_config.tc_info[i].qoffset = 0;
1965			vsi->tc_config.tc_info[i].qcount = 1;
1966			vsi->tc_config.tc_info[i].netdev_tc = 0;
1967		}
1968	}
1969
1970	/* Set actual Tx/Rx queue pairs */
1971	vsi->num_queue_pairs = offset + qcount;
1972
1973	/* Setup queue TC[0].qmap for given VSI context */
1974	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
1975	ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
1976	ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
1977	ctxt->info.valid_sections |= cpu_to_le16(sections);
1978
1979	/* Reconfigure RSS for main VSI with max queue count */
1980	vsi->rss_size = max_qcount;
1981	ret = i40e_vsi_config_rss(vsi);
1982	if (ret) {
1983		dev_info(&vsi->back->pdev->dev,
1984			 "Failed to reconfig rss for num_queues (%u)\n",
1985			 max_qcount);
1986		return ret;
1987	}
1988	vsi->reconfig_rss = true;
1989	dev_dbg(&vsi->back->pdev->dev,
1990		"Reconfigured rss with num_queues (%u)\n", max_qcount);
1991
1992	/* Find queue count available for channel VSIs and starting offset
1993	 * for channel VSIs
1994	 */
1995	override_q = vsi->mqprio_qopt.qopt.count[0];
1996	if (override_q && override_q < vsi->num_queue_pairs) {
1997		vsi->cnt_q_avail = vsi->num_queue_pairs - override_q;
1998		vsi->next_base_queue = override_q;
1999	}
2000	return 0;
2001}
2002
2003/**
2004 * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc
2005 * @vsi: the VSI being setup
2006 * @ctxt: VSI context structure
2007 * @enabled_tc: Enabled TCs bitmap
2008 * @is_add: True if called before Add VSI
2009 *
2010 * Setup VSI queue mapping for enabled traffic classes.
2011 **/
2012static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
2013				     struct i40e_vsi_context *ctxt,
2014				     u8 enabled_tc,
2015				     bool is_add)
2016{
2017	struct i40e_pf *pf = vsi->back;
2018	u16 num_tc_qps = 0;
2019	u16 sections = 0;
2020	u8 netdev_tc = 0;
2021	u16 numtc = 1;
2022	u16 qcount;
2023	u8 offset;
2024	u16 qmap;
2025	int i;
2026
2027	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
2028	offset = 0;
2029	/* zero out queue mapping, it will get updated on the end of the function */
2030	memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping));
2031
2032	if (vsi->type == I40E_VSI_MAIN) {
2033		/* This code helps add more queue to the VSI if we have
2034		 * more cores than RSS can support, the higher cores will
2035		 * be served by ATR or other filters. Furthermore, the
2036		 * non-zero req_queue_pairs says that user requested a new
2037		 * queue count via ethtool's set_channels, so use this
2038		 * value for queues distribution across traffic classes
2039		 * We need at least one queue pair for the interface
2040		 * to be usable as we see in else statement.
2041		 */
2042		if (vsi->req_queue_pairs > 0)
2043			vsi->num_queue_pairs = vsi->req_queue_pairs;
2044		else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
2045			vsi->num_queue_pairs = pf->num_lan_msix;
2046		else
2047			vsi->num_queue_pairs = 1;
2048	}
2049
2050	/* Number of queues per enabled TC */
2051	if (vsi->type == I40E_VSI_MAIN ||
2052	    (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0))
2053		num_tc_qps = vsi->num_queue_pairs;
2054	else
2055		num_tc_qps = vsi->alloc_queue_pairs;
2056
2057	if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
2058		/* Find numtc from enabled TC bitmap */
2059		for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
2060			if (enabled_tc & BIT(i)) /* TC is enabled */
2061				numtc++;
2062		}
2063		if (!numtc) {
2064			dev_warn(&pf->pdev->dev, "DCB is enabled but no TC enabled, forcing TC0\n");
2065			numtc = 1;
2066		}
2067		num_tc_qps = num_tc_qps / numtc;
2068		num_tc_qps = min_t(int, num_tc_qps,
2069				   i40e_pf_get_max_q_per_tc(pf));
2070	}
2071
2072	vsi->tc_config.numtc = numtc;
2073	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
2074
2075	/* Do not allow use more TC queue pairs than MSI-X vectors exist */
2076	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
2077		num_tc_qps = min_t(int, num_tc_qps, pf->num_lan_msix);
2078
2079	/* Setup queue offset/count for all TCs for given VSI */
2080	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
2081		/* See if the given TC is enabled for the given VSI */
2082		if (vsi->tc_config.enabled_tc & BIT(i)) {
2083			/* TC is enabled */
2084			int pow, num_qps;
2085
2086			switch (vsi->type) {
2087			case I40E_VSI_MAIN:
2088				if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED |
2089				    I40E_FLAG_FD_ATR_ENABLED)) ||
2090				    vsi->tc_config.enabled_tc != 1) {
2091					qcount = min_t(int, pf->alloc_rss_size,
2092						       num_tc_qps);
2093					break;
2094				}
2095				fallthrough;
2096			case I40E_VSI_FDIR:
2097			case I40E_VSI_SRIOV:
2098			case I40E_VSI_VMDQ2:
2099			default:
2100				qcount = num_tc_qps;
2101				WARN_ON(i != 0);
2102				break;
2103			}
2104			vsi->tc_config.tc_info[i].qoffset = offset;
2105			vsi->tc_config.tc_info[i].qcount = qcount;
2106
2107			/* find the next higher power-of-2 of num queue pairs */
2108			num_qps = qcount;
2109			pow = 0;
2110			while (num_qps && (BIT_ULL(pow) < qcount)) {
2111				pow++;
2112				num_qps >>= 1;
2113			}
2114
2115			vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
2116			qmap =
2117			    (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
2118			    (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
2119
2120			offset += qcount;
2121		} else {
2122			/* TC is not enabled so set the offset to
2123			 * default queue and allocate one queue
2124			 * for the given TC.
2125			 */
2126			vsi->tc_config.tc_info[i].qoffset = 0;
2127			vsi->tc_config.tc_info[i].qcount = 1;
2128			vsi->tc_config.tc_info[i].netdev_tc = 0;
2129
2130			qmap = 0;
2131		}
2132		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
2133	}
2134	/* Do not change previously set num_queue_pairs for PFs and VFs*/
2135	if ((vsi->type == I40E_VSI_MAIN && numtc != 1) ||
2136	    (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) ||
2137	    (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV))
2138		vsi->num_queue_pairs = offset;
2139
2140	/* Scheduler section valid can only be set for ADD VSI */
2141	if (is_add) {
2142		sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
2143
2144		ctxt->info.up_enable_bits = enabled_tc;
2145	}
2146	if (vsi->type == I40E_VSI_SRIOV) {
2147		ctxt->info.mapping_flags |=
2148				     cpu_to_le16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);
2149		for (i = 0; i < vsi->num_queue_pairs; i++)
2150			ctxt->info.queue_mapping[i] =
2151					       cpu_to_le16(vsi->base_queue + i);
2152	} else {
2153		ctxt->info.mapping_flags |=
2154					cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
2155		ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
2156	}
2157	ctxt->info.valid_sections |= cpu_to_le16(sections);
2158}
2159
2160/**
2161 * i40e_addr_sync - Callback for dev_(mc|uc)_sync to add address
2162 * @netdev: the netdevice
2163 * @addr: address to add
2164 *
2165 * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
2166 * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
2167 */
2168static int i40e_addr_sync(struct net_device *netdev, const u8 *addr)
2169{
2170	struct i40e_netdev_priv *np = netdev_priv(netdev);
2171	struct i40e_vsi *vsi = np->vsi;
2172
2173	if (i40e_add_mac_filter(vsi, addr))
2174		return 0;
2175	else
2176		return -ENOMEM;
2177}
2178
2179/**
2180 * i40e_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
2181 * @netdev: the netdevice
2182 * @addr: address to add
2183 *
2184 * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
2185 * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
2186 */
2187static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr)
2188{
2189	struct i40e_netdev_priv *np = netdev_priv(netdev);
2190	struct i40e_vsi *vsi = np->vsi;
2191
2192	/* Under some circumstances, we might receive a request to delete
2193	 * our own device address from our uc list. Because we store the
2194	 * device address in the VSI's MAC/VLAN filter list, we need to ignore
2195	 * such requests and not delete our device address from this list.
2196	 */
2197	if (ether_addr_equal(addr, netdev->dev_addr))
2198		return 0;
2199
2200	i40e_del_mac_filter(vsi, addr);
2201
2202	return 0;
2203}
2204
2205/**
2206 * i40e_set_rx_mode - NDO callback to set the netdev filters
2207 * @netdev: network interface device structure
2208 **/
2209static void i40e_set_rx_mode(struct net_device *netdev)
2210{
2211	struct i40e_netdev_priv *np = netdev_priv(netdev);
2212	struct i40e_vsi *vsi = np->vsi;
2213
2214	spin_lock_bh(&vsi->mac_filter_hash_lock);
2215
2216	__dev_uc_sync(netdev, i40e_addr_sync, i40e_addr_unsync);
2217	__dev_mc_sync(netdev, i40e_addr_sync, i40e_addr_unsync);
2218
2219	spin_unlock_bh(&vsi->mac_filter_hash_lock);
2220
2221	/* check for other flag changes */
2222	if (vsi->current_netdev_flags != vsi->netdev->flags) {
2223		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
2224		set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
2225	}
2226}
2227
2228/**
2229 * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries
2230 * @vsi: Pointer to VSI struct
2231 * @from: Pointer to list which contains MAC filter entries - changes to
2232 *        those entries needs to be undone.
2233 *
2234 * MAC filter entries from this list were slated for deletion.
2235 **/
2236static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi,
2237					 struct hlist_head *from)
2238{
2239	struct i40e_mac_filter *f;
2240	struct hlist_node *h;
2241
2242	hlist_for_each_entry_safe(f, h, from, hlist) {
2243		u64 key = i40e_addr_to_hkey(f->macaddr);
2244
2245		/* Move the element back into MAC filter list*/
2246		hlist_del(&f->hlist);
2247		hash_add(vsi->mac_filter_hash, &f->hlist, key);
2248	}
2249}
2250
2251/**
2252 * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries
2253 * @vsi: Pointer to vsi struct
2254 * @from: Pointer to list which contains MAC filter entries - changes to
2255 *        those entries needs to be undone.
2256 *
2257 * MAC filter entries from this list were slated for addition.
2258 **/
2259static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi,
2260					 struct hlist_head *from)
2261{
2262	struct i40e_new_mac_filter *new;
2263	struct hlist_node *h;
2264
2265	hlist_for_each_entry_safe(new, h, from, hlist) {
2266		/* We can simply free the wrapper structure */
2267		hlist_del(&new->hlist);
2268		netdev_hw_addr_refcnt(new->f, vsi->netdev, -1);
2269		kfree(new);
2270	}
2271}
2272
2273/**
2274 * i40e_next_filter - Get the next non-broadcast filter from a list
2275 * @next: pointer to filter in list
2276 *
2277 * Returns the next non-broadcast filter in the list. Required so that we
2278 * ignore broadcast filters within the list, since these are not handled via
2279 * the normal firmware update path.
2280 */
2281static
2282struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next)
2283{
2284	hlist_for_each_entry_continue(next, hlist) {
2285		if (!is_broadcast_ether_addr(next->f->macaddr))
2286			return next;
2287	}
2288
2289	return NULL;
2290}
2291
2292/**
2293 * i40e_update_filter_state - Update filter state based on return data
2294 * from firmware
2295 * @count: Number of filters added
2296 * @add_list: return data from fw
2297 * @add_head: pointer to first filter in current batch
2298 *
2299 * MAC filter entries from list were slated to be added to device. Returns
2300 * number of successful filters. Note that 0 does NOT mean success!
2301 **/
2302static int
2303i40e_update_filter_state(int count,
2304			 struct i40e_aqc_add_macvlan_element_data *add_list,
2305			 struct i40e_new_mac_filter *add_head)
2306{
2307	int retval = 0;
2308	int i;
2309
2310	for (i = 0; i < count; i++) {
2311		/* Always check status of each filter. We don't need to check
2312		 * the firmware return status because we pre-set the filter
2313		 * status to I40E_AQC_MM_ERR_NO_RES when sending the filter
2314		 * request to the adminq. Thus, if it no longer matches then
2315		 * we know the filter is active.
2316		 */
2317		if (add_list[i].match_method == I40E_AQC_MM_ERR_NO_RES) {
2318			add_head->state = I40E_FILTER_FAILED;
2319		} else {
2320			add_head->state = I40E_FILTER_ACTIVE;
2321			retval++;
2322		}
2323
2324		add_head = i40e_next_filter(add_head);
2325		if (!add_head)
2326			break;
2327	}
2328
2329	return retval;
2330}
2331
2332/**
2333 * i40e_aqc_del_filters - Request firmware to delete a set of filters
2334 * @vsi: ptr to the VSI
2335 * @vsi_name: name to display in messages
2336 * @list: the list of filters to send to firmware
2337 * @num_del: the number of filters to delete
2338 * @retval: Set to -EIO on failure to delete
2339 *
2340 * Send a request to firmware via AdminQ to delete a set of filters. Uses
2341 * *retval instead of a return value so that success does not force ret_val to
2342 * be set to 0. This ensures that a sequence of calls to this function
2343 * preserve the previous value of *retval on successful delete.
2344 */
2345static
2346void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
2347			  struct i40e_aqc_remove_macvlan_element_data *list,
2348			  int num_del, int *retval)
2349{
2350	struct i40e_hw *hw = &vsi->back->hw;
2351	enum i40e_admin_queue_err aq_status;
2352	int aq_ret;
2353
2354	aq_ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, list, num_del, NULL,
2355					   &aq_status);
2356
2357	/* Explicitly ignore and do not report when firmware returns ENOENT */
2358	if (aq_ret && !(aq_status == I40E_AQ_RC_ENOENT)) {
2359		*retval = -EIO;
2360		dev_info(&vsi->back->pdev->dev,
2361			 "ignoring delete macvlan error on %s, err %pe, aq_err %s\n",
2362			 vsi_name, ERR_PTR(aq_ret),
2363			 i40e_aq_str(hw, aq_status));
2364	}
2365}
2366
2367/**
2368 * i40e_aqc_add_filters - Request firmware to add a set of filters
2369 * @vsi: ptr to the VSI
2370 * @vsi_name: name to display in messages
2371 * @list: the list of filters to send to firmware
2372 * @add_head: Position in the add hlist
2373 * @num_add: the number of filters to add
2374 *
2375 * Send a request to firmware via AdminQ to add a chunk of filters. Will set
2376 * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of
2377 * space for more filters.
2378 */
2379static
2380void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
2381			  struct i40e_aqc_add_macvlan_element_data *list,
2382			  struct i40e_new_mac_filter *add_head,
2383			  int num_add)
2384{
2385	struct i40e_hw *hw = &vsi->back->hw;
2386	enum i40e_admin_queue_err aq_status;
2387	int fcnt;
2388
2389	i40e_aq_add_macvlan_v2(hw, vsi->seid, list, num_add, NULL, &aq_status);
2390	fcnt = i40e_update_filter_state(num_add, list, add_head);
2391
2392	if (fcnt != num_add) {
2393		if (vsi->type == I40E_VSI_MAIN) {
2394			set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2395			dev_warn(&vsi->back->pdev->dev,
2396				 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
2397				 i40e_aq_str(hw, aq_status), vsi_name);
2398		} else if (vsi->type == I40E_VSI_SRIOV ||
2399			   vsi->type == I40E_VSI_VMDQ1 ||
2400			   vsi->type == I40E_VSI_VMDQ2) {
2401			dev_warn(&vsi->back->pdev->dev,
2402				 "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n",
2403				 i40e_aq_str(hw, aq_status), vsi_name,
2404					     vsi_name);
2405		} else {
2406			dev_warn(&vsi->back->pdev->dev,
2407				 "Error %s adding RX filters on %s, incorrect VSI type: %i.\n",
2408				 i40e_aq_str(hw, aq_status), vsi_name,
2409					     vsi->type);
2410		}
2411	}
2412}
2413
2414/**
2415 * i40e_aqc_broadcast_filter - Set promiscuous broadcast flags
2416 * @vsi: pointer to the VSI
2417 * @vsi_name: the VSI name
2418 * @f: filter data
2419 *
2420 * This function sets or clears the promiscuous broadcast flags for VLAN
2421 * filters in order to properly receive broadcast frames. Assumes that only
2422 * broadcast filters are passed.
2423 *
2424 * Returns status indicating success or failure;
2425 **/
2426static int
2427i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name,
2428			  struct i40e_mac_filter *f)
2429{
2430	bool enable = f->state == I40E_FILTER_NEW;
2431	struct i40e_hw *hw = &vsi->back->hw;
2432	int aq_ret;
2433
2434	if (f->vlan == I40E_VLAN_ANY) {
2435		aq_ret = i40e_aq_set_vsi_broadcast(hw,
2436						   vsi->seid,
2437						   enable,
2438						   NULL);
2439	} else {
2440		aq_ret = i40e_aq_set_vsi_bc_promisc_on_vlan(hw,
2441							    vsi->seid,
2442							    enable,
2443							    f->vlan,
2444							    NULL);
2445	}
2446
2447	if (aq_ret) {
2448		set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2449		dev_warn(&vsi->back->pdev->dev,
2450			 "Error %s, forcing overflow promiscuous on %s\n",
2451			 i40e_aq_str(hw, hw->aq.asq_last_status),
2452			 vsi_name);
2453	}
2454
2455	return aq_ret;
2456}
2457
2458/**
2459 * i40e_set_promiscuous - set promiscuous mode
2460 * @pf: board private structure
2461 * @promisc: promisc on or off
2462 *
2463 * There are different ways of setting promiscuous mode on a PF depending on
2464 * what state/environment we're in.  This identifies and sets it appropriately.
2465 * Returns 0 on success.
2466 **/
2467static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc)
2468{
2469	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
2470	struct i40e_hw *hw = &pf->hw;
2471	int aq_ret;
2472
2473	if (vsi->type == I40E_VSI_MAIN &&
2474	    pf->lan_veb != I40E_NO_VEB &&
2475	    !(pf->flags & I40E_FLAG_MFP_ENABLED)) {
2476		/* set defport ON for Main VSI instead of true promisc
2477		 * this way we will get all unicast/multicast and VLAN
2478		 * promisc behavior but will not get VF or VMDq traffic
2479		 * replicated on the Main VSI.
2480		 */
2481		if (promisc)
2482			aq_ret = i40e_aq_set_default_vsi(hw,
2483							 vsi->seid,
2484							 NULL);
2485		else
2486			aq_ret = i40e_aq_clear_default_vsi(hw,
2487							   vsi->seid,
2488							   NULL);
2489		if (aq_ret) {
2490			dev_info(&pf->pdev->dev,
2491				 "Set default VSI failed, err %pe, aq_err %s\n",
2492				 ERR_PTR(aq_ret),
2493				 i40e_aq_str(hw, hw->aq.asq_last_status));
2494		}
2495	} else {
2496		aq_ret = i40e_aq_set_vsi_unicast_promiscuous(
2497						  hw,
2498						  vsi->seid,
2499						  promisc, NULL,
2500						  true);
2501		if (aq_ret) {
2502			dev_info(&pf->pdev->dev,
2503				 "set unicast promisc failed, err %pe, aq_err %s\n",
2504				 ERR_PTR(aq_ret),
2505				 i40e_aq_str(hw, hw->aq.asq_last_status));
2506		}
2507		aq_ret = i40e_aq_set_vsi_multicast_promiscuous(
2508						  hw,
2509						  vsi->seid,
2510						  promisc, NULL);
2511		if (aq_ret) {
2512			dev_info(&pf->pdev->dev,
2513				 "set multicast promisc failed, err %pe, aq_err %s\n",
2514				 ERR_PTR(aq_ret),
2515				 i40e_aq_str(hw, hw->aq.asq_last_status));
2516		}
2517	}
2518
2519	if (!aq_ret)
2520		pf->cur_promisc = promisc;
2521
2522	return aq_ret;
2523}
2524
2525/**
2526 * i40e_sync_vsi_filters - Update the VSI filter list to the HW
2527 * @vsi: ptr to the VSI
2528 *
2529 * Push any outstanding VSI filter changes through the AdminQ.
2530 *
2531 * Returns 0 or error value
2532 **/
2533int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
2534{
2535	struct hlist_head tmp_add_list, tmp_del_list;
2536	struct i40e_mac_filter *f;
2537	struct i40e_new_mac_filter *new, *add_head = NULL;
2538	struct i40e_hw *hw = &vsi->back->hw;
2539	bool old_overflow, new_overflow;
2540	unsigned int failed_filters = 0;
2541	unsigned int vlan_filters = 0;
2542	char vsi_name[16] = "PF";
2543	int filter_list_len = 0;
2544	u32 changed_flags = 0;
2545	struct hlist_node *h;
2546	struct i40e_pf *pf;
2547	int num_add = 0;
2548	int num_del = 0;
2549	int aq_ret = 0;
2550	int retval = 0;
2551	u16 cmd_flags;
2552	int list_size;
2553	int bkt;
2554
2555	/* empty array typed pointers, kcalloc later */
2556	struct i40e_aqc_add_macvlan_element_data *add_list;
2557	struct i40e_aqc_remove_macvlan_element_data *del_list;
2558
2559	while (test_and_set_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state))
2560		usleep_range(1000, 2000);
2561	pf = vsi->back;
2562
2563	old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2564
2565	if (vsi->netdev) {
2566		changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
2567		vsi->current_netdev_flags = vsi->netdev->flags;
2568	}
2569
2570	INIT_HLIST_HEAD(&tmp_add_list);
2571	INIT_HLIST_HEAD(&tmp_del_list);
2572
2573	if (vsi->type == I40E_VSI_SRIOV)
2574		snprintf(vsi_name, sizeof(vsi_name) - 1, "VF %d", vsi->vf_id);
2575	else if (vsi->type != I40E_VSI_MAIN)
2576		snprintf(vsi_name, sizeof(vsi_name) - 1, "vsi %d", vsi->seid);
2577
2578	if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
2579		vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
2580
2581		spin_lock_bh(&vsi->mac_filter_hash_lock);
2582		/* Create a list of filters to delete. */
2583		hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
2584			if (f->state == I40E_FILTER_REMOVE) {
2585				/* Move the element into temporary del_list */
2586				hash_del(&f->hlist);
2587				hlist_add_head(&f->hlist, &tmp_del_list);
2588
2589				/* Avoid counting removed filters */
2590				continue;
2591			}
2592			if (f->state == I40E_FILTER_NEW) {
2593				/* Create a temporary i40e_new_mac_filter */
2594				new = kzalloc(sizeof(*new), GFP_ATOMIC);
2595				if (!new)
2596					goto err_no_memory_locked;
2597
2598				/* Store pointer to the real filter */
2599				new->f = f;
2600				new->state = f->state;
2601
2602				/* Add it to the hash list */
2603				hlist_add_head(&new->hlist, &tmp_add_list);
2604			}
2605
2606			/* Count the number of active (current and new) VLAN
2607			 * filters we have now. Does not count filters which
2608			 * are marked for deletion.
2609			 */
2610			if (f->vlan > 0)
2611				vlan_filters++;
2612		}
2613
2614		if (vsi->type != I40E_VSI_SRIOV)
2615			retval = i40e_correct_mac_vlan_filters
2616				(vsi, &tmp_add_list, &tmp_del_list,
2617				 vlan_filters);
2618		else if (pf->vf)
2619			retval = i40e_correct_vf_mac_vlan_filters
2620				(vsi, &tmp_add_list, &tmp_del_list,
2621				 vlan_filters, pf->vf[vsi->vf_id].trusted);
2622
2623		hlist_for_each_entry(new, &tmp_add_list, hlist)
2624			netdev_hw_addr_refcnt(new->f, vsi->netdev, 1);
2625
2626		if (retval)
2627			goto err_no_memory_locked;
2628
2629		spin_unlock_bh(&vsi->mac_filter_hash_lock);
2630	}
2631
2632	/* Now process 'del_list' outside the lock */
2633	if (!hlist_empty(&tmp_del_list)) {
2634		filter_list_len = hw->aq.asq_buf_size /
2635			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
2636		list_size = filter_list_len *
2637			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
2638		del_list = kzalloc(list_size, GFP_ATOMIC);
2639		if (!del_list)
2640			goto err_no_memory;
2641
2642		hlist_for_each_entry_safe(f, h, &tmp_del_list, hlist) {
2643			cmd_flags = 0;
2644
2645			/* handle broadcast filters by updating the broadcast
2646			 * promiscuous flag and release filter list.
2647			 */
2648			if (is_broadcast_ether_addr(f->macaddr)) {
2649				i40e_aqc_broadcast_filter(vsi, vsi_name, f);
2650
2651				hlist_del(&f->hlist);
2652				kfree(f);
2653				continue;
2654			}
2655
2656			/* add to delete list */
2657			ether_addr_copy(del_list[num_del].mac_addr, f->macaddr);
2658			if (f->vlan == I40E_VLAN_ANY) {
2659				del_list[num_del].vlan_tag = 0;
2660				cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
2661			} else {
2662				del_list[num_del].vlan_tag =
2663					cpu_to_le16((u16)(f->vlan));
2664			}
2665
2666			cmd_flags |= I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
2667			del_list[num_del].flags = cmd_flags;
2668			num_del++;
2669
2670			/* flush a full buffer */
2671			if (num_del == filter_list_len) {
2672				i40e_aqc_del_filters(vsi, vsi_name, del_list,
2673						     num_del, &retval);
2674				memset(del_list, 0, list_size);
2675				num_del = 0;
2676			}
2677			/* Release memory for MAC filter entries which were
2678			 * synced up with HW.
2679			 */
2680			hlist_del(&f->hlist);
2681			kfree(f);
2682		}
2683
2684		if (num_del) {
2685			i40e_aqc_del_filters(vsi, vsi_name, del_list,
2686					     num_del, &retval);
2687		}
2688
2689		kfree(del_list);
2690		del_list = NULL;
2691	}
2692
2693	if (!hlist_empty(&tmp_add_list)) {
2694		/* Do all the adds now. */
2695		filter_list_len = hw->aq.asq_buf_size /
2696			       sizeof(struct i40e_aqc_add_macvlan_element_data);
2697		list_size = filter_list_len *
2698			       sizeof(struct i40e_aqc_add_macvlan_element_data);
2699		add_list = kzalloc(list_size, GFP_ATOMIC);
2700		if (!add_list)
2701			goto err_no_memory;
2702
2703		num_add = 0;
2704		hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
2705			/* handle broadcast filters by updating the broadcast
2706			 * promiscuous flag instead of adding a MAC filter.
2707			 */
2708			if (is_broadcast_ether_addr(new->f->macaddr)) {
2709				if (i40e_aqc_broadcast_filter(vsi, vsi_name,
2710							      new->f))
2711					new->state = I40E_FILTER_FAILED;
2712				else
2713					new->state = I40E_FILTER_ACTIVE;
2714				continue;
2715			}
2716
2717			/* add to add array */
2718			if (num_add == 0)
2719				add_head = new;
2720			cmd_flags = 0;
2721			ether_addr_copy(add_list[num_add].mac_addr,
2722					new->f->macaddr);
2723			if (new->f->vlan == I40E_VLAN_ANY) {
2724				add_list[num_add].vlan_tag = 0;
2725				cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
2726			} else {
2727				add_list[num_add].vlan_tag =
2728					cpu_to_le16((u16)(new->f->vlan));
2729			}
2730			add_list[num_add].queue_number = 0;
2731			/* set invalid match method for later detection */
2732			add_list[num_add].match_method = I40E_AQC_MM_ERR_NO_RES;
2733			cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
2734			add_list[num_add].flags = cpu_to_le16(cmd_flags);
2735			num_add++;
2736
2737			/* flush a full buffer */
2738			if (num_add == filter_list_len) {
2739				i40e_aqc_add_filters(vsi, vsi_name, add_list,
2740						     add_head, num_add);
2741				memset(add_list, 0, list_size);
2742				num_add = 0;
2743			}
2744		}
2745		if (num_add) {
2746			i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
2747					     num_add);
2748		}
2749		/* Now move all of the filters from the temp add list back to
2750		 * the VSI's list.
2751		 */
2752		spin_lock_bh(&vsi->mac_filter_hash_lock);
2753		hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
2754			/* Only update the state if we're still NEW */
2755			if (new->f->state == I40E_FILTER_NEW)
2756				new->f->state = new->state;
2757			hlist_del(&new->hlist);
2758			netdev_hw_addr_refcnt(new->f, vsi->netdev, -1);
2759			kfree(new);
2760		}
2761		spin_unlock_bh(&vsi->mac_filter_hash_lock);
2762		kfree(add_list);
2763		add_list = NULL;
2764	}
2765
2766	/* Determine the number of active and failed filters. */
2767	spin_lock_bh(&vsi->mac_filter_hash_lock);
2768	vsi->active_filters = 0;
2769	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
2770		if (f->state == I40E_FILTER_ACTIVE)
2771			vsi->active_filters++;
2772		else if (f->state == I40E_FILTER_FAILED)
2773			failed_filters++;
2774	}
2775	spin_unlock_bh(&vsi->mac_filter_hash_lock);
2776
2777	/* Check if we are able to exit overflow promiscuous mode. We can
2778	 * safely exit if we didn't just enter, we no longer have any failed
2779	 * filters, and we have reduced filters below the threshold value.
2780	 */
2781	if (old_overflow && !failed_filters &&
2782	    vsi->active_filters < vsi->promisc_threshold) {
2783		dev_info(&pf->pdev->dev,
2784			 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
2785			 vsi_name);
2786		clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2787		vsi->promisc_threshold = 0;
2788	}
2789
2790	/* if the VF is not trusted do not do promisc */
2791	if (vsi->type == I40E_VSI_SRIOV && pf->vf &&
2792	    !pf->vf[vsi->vf_id].trusted) {
2793		clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2794		goto out;
2795	}
2796
2797	new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2798
2799	/* If we are entering overflow promiscuous, we need to calculate a new
2800	 * threshold for when we are safe to exit
2801	 */
2802	if (!old_overflow && new_overflow)
2803		vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
2804
2805	/* check for changes in promiscuous modes */
2806	if (changed_flags & IFF_ALLMULTI) {
2807		bool cur_multipromisc;
2808
2809		cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI);
2810		aq_ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw,
2811							       vsi->seid,
2812							       cur_multipromisc,
2813							       NULL);
2814		if (aq_ret) {
2815			retval = i40e_aq_rc_to_posix(aq_ret,
2816						     hw->aq.asq_last_status);
2817			dev_info(&pf->pdev->dev,
2818				 "set multi promisc failed on %s, err %pe aq_err %s\n",
2819				 vsi_name,
2820				 ERR_PTR(aq_ret),
2821				 i40e_aq_str(hw, hw->aq.asq_last_status));
2822		} else {
2823			dev_info(&pf->pdev->dev, "%s allmulti mode.\n",
2824				 cur_multipromisc ? "entering" : "leaving");
2825		}
2826	}
2827
2828	if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) {
2829		bool cur_promisc;
2830
2831		cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
2832			       new_overflow);
2833		aq_ret = i40e_set_promiscuous(pf, cur_promisc);
2834		if (aq_ret) {
2835			retval = i40e_aq_rc_to_posix(aq_ret,
2836						     hw->aq.asq_last_status);
2837			dev_info(&pf->pdev->dev,
2838				 "Setting promiscuous %s failed on %s, err %pe aq_err %s\n",
2839				 cur_promisc ? "on" : "off",
2840				 vsi_name,
2841				 ERR_PTR(aq_ret),
2842				 i40e_aq_str(hw, hw->aq.asq_last_status));
2843		}
2844	}
2845out:
2846	/* if something went wrong then set the changed flag so we try again */
2847	if (retval)
2848		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
2849
2850	clear_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state);
2851	return retval;
2852
2853err_no_memory:
2854	/* Restore elements on the temporary add and delete lists */
2855	spin_lock_bh(&vsi->mac_filter_hash_lock);
2856err_no_memory_locked:
2857	i40e_undo_del_filter_entries(vsi, &tmp_del_list);
2858	i40e_undo_add_filter_entries(vsi, &tmp_add_list);
2859	spin_unlock_bh(&vsi->mac_filter_hash_lock);
2860
2861	vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
2862	clear_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state);
2863	return -ENOMEM;
2864}
2865
2866/**
2867 * i40e_sync_filters_subtask - Sync the VSI filter list with HW
2868 * @pf: board private structure
2869 **/
2870static void i40e_sync_filters_subtask(struct i40e_pf *pf)
2871{
2872	int v;
2873
2874	if (!pf)
2875		return;
2876	if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state))
2877		return;
2878	if (test_bit(__I40E_VF_DISABLE, pf->state)) {
2879		set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
2880		return;
2881	}
2882
2883	for (v = 0; v < pf->num_alloc_vsi; v++) {
2884		if (pf->vsi[v] &&
2885		    (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED) &&
2886		    !test_bit(__I40E_VSI_RELEASING, pf->vsi[v]->state)) {
2887			int ret = i40e_sync_vsi_filters(pf->vsi[v]);
2888
2889			if (ret) {
2890				/* come back and try again later */
2891				set_bit(__I40E_MACVLAN_SYNC_PENDING,
2892					pf->state);
2893				break;
2894			}
2895		}
2896	}
2897}
2898
2899/**
2900 * i40e_calculate_vsi_rx_buf_len - Calculates buffer length
2901 *
2902 * @vsi: VSI to calculate rx_buf_len from
2903 */
2904static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
2905{
2906	if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
2907		return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048);
2908
2909	return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
2910}
2911
2912/**
2913 * i40e_max_vsi_frame_size - returns the maximum allowed frame size for VSI
2914 * @vsi: the vsi
2915 * @xdp_prog: XDP program
2916 **/
2917static int i40e_max_vsi_frame_size(struct i40e_vsi *vsi,
2918				   struct bpf_prog *xdp_prog)
2919{
2920	u16 rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
2921	u16 chain_len;
2922
2923	if (xdp_prog && !xdp_prog->aux->xdp_has_frags)
2924		chain_len = 1;
2925	else
2926		chain_len = I40E_MAX_CHAINED_RX_BUFFERS;
2927
2928	return min_t(u16, rx_buf_len * chain_len, I40E_MAX_RXBUFFER);
2929}
2930
2931/**
2932 * i40e_change_mtu - NDO callback to change the Maximum Transfer Unit
2933 * @netdev: network interface device structure
2934 * @new_mtu: new value for maximum frame size
2935 *
2936 * Returns 0 on success, negative on failure
2937 **/
2938static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
2939{
2940	struct i40e_netdev_priv *np = netdev_priv(netdev);
2941	struct i40e_vsi *vsi = np->vsi;
2942	struct i40e_pf *pf = vsi->back;
2943	int frame_size;
2944
2945	frame_size = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog);
2946	if (new_mtu > frame_size - I40E_PACKET_HDR_PAD) {
2947		netdev_err(netdev, "Error changing mtu to %d, Max is %d\n",
2948			   new_mtu, frame_size - I40E_PACKET_HDR_PAD);
2949		return -EINVAL;
2950	}
2951
2952	netdev_dbg(netdev, "changing MTU from %d to %d\n",
2953		   netdev->mtu, new_mtu);
2954	netdev->mtu = new_mtu;
2955	if (netif_running(netdev))
2956		i40e_vsi_reinit_locked(vsi);
2957	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
2958	set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
2959	return 0;
2960}
2961
2962/**
2963 * i40e_ioctl - Access the hwtstamp interface
2964 * @netdev: network interface device structure
2965 * @ifr: interface request data
2966 * @cmd: ioctl command
2967 **/
2968int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
2969{
2970	struct i40e_netdev_priv *np = netdev_priv(netdev);
2971	struct i40e_pf *pf = np->vsi->back;
2972
2973	switch (cmd) {
2974	case SIOCGHWTSTAMP:
2975		return i40e_ptp_get_ts_config(pf, ifr);
2976	case SIOCSHWTSTAMP:
2977		return i40e_ptp_set_ts_config(pf, ifr);
2978	default:
2979		return -EOPNOTSUPP;
2980	}
2981}
2982
2983/**
2984 * i40e_vlan_stripping_enable - Turn on vlan stripping for the VSI
2985 * @vsi: the vsi being adjusted
2986 **/
2987void i40e_vlan_stripping_enable(struct i40e_vsi *vsi)
2988{
2989	struct i40e_vsi_context ctxt;
2990	int ret;
2991
2992	/* Don't modify stripping options if a port VLAN is active */
2993	if (vsi->info.pvid)
2994		return;
2995
2996	if ((vsi->info.valid_sections &
2997	     cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
2998	    ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_MODE_MASK) == 0))
2999		return;  /* already enabled */
3000
3001	vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
3002	vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
3003				    I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH;
3004
3005	ctxt.seid = vsi->seid;
3006	ctxt.info = vsi->info;
3007	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
3008	if (ret) {
3009		dev_info(&vsi->back->pdev->dev,
3010			 "update vlan stripping failed, err %pe aq_err %s\n",
3011			 ERR_PTR(ret),
3012			 i40e_aq_str(&vsi->back->hw,
3013				     vsi->back->hw.aq.asq_last_status));
3014	}
3015}
3016
3017/**
3018 * i40e_vlan_stripping_disable - Turn off vlan stripping for the VSI
3019 * @vsi: the vsi being adjusted
3020 **/
3021void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
3022{
3023	struct i40e_vsi_context ctxt;
3024	int ret;
3025
3026	/* Don't modify stripping options if a port VLAN is active */
3027	if (vsi->info.pvid)
3028		return;
3029
3030	if ((vsi->info.valid_sections &
3031	     cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
3032	    ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_EMOD_MASK) ==
3033	     I40E_AQ_VSI_PVLAN_EMOD_MASK))
3034		return;  /* already disabled */
3035
3036	vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
3037	vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
3038				    I40E_AQ_VSI_PVLAN_EMOD_NOTHING;
3039
3040	ctxt.seid = vsi->seid;
3041	ctxt.info = vsi->info;
3042	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
3043	if (ret) {
3044		dev_info(&vsi->back->pdev->dev,
3045			 "update vlan stripping failed, err %pe aq_err %s\n",
3046			 ERR_PTR(ret),
3047			 i40e_aq_str(&vsi->back->hw,
3048				     vsi->back->hw.aq.asq_last_status));
3049	}
3050}
3051
3052/**
3053 * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address
3054 * @vsi: the vsi being configured
3055 * @vid: vlan id to be added (0 = untagged only , -1 = any)
3056 *
3057 * This is a helper function for adding a new MAC/VLAN filter with the
3058 * specified VLAN for each existing MAC address already in the hash table.
3059 * This function does *not* perform any accounting to update filters based on
3060 * VLAN mode.
3061 *
3062 * NOTE: this function expects to be called while under the
3063 * mac_filter_hash_lock
3064 **/
3065int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
3066{
3067	struct i40e_mac_filter *f, *add_f;
3068	struct hlist_node *h;
3069	int bkt;
3070
3071	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
3072		/* If we're asked to add a filter that has been marked for
3073		 * removal, it is safe to simply restore it to active state.
3074		 * __i40e_del_filter will have simply deleted any filters which
3075		 * were previously marked NEW or FAILED, so if it is currently
3076		 * marked REMOVE it must have previously been ACTIVE. Since we
3077		 * haven't yet run the sync filters task, just restore this
3078		 * filter to the ACTIVE state so that the sync task leaves it
3079		 * in place.
3080		 */
3081		if (f->state == I40E_FILTER_REMOVE && f->vlan == vid) {
3082			f->state = I40E_FILTER_ACTIVE;
3083			continue;
3084		} else if (f->state == I40E_FILTER_REMOVE) {
3085			continue;
3086		}
3087		add_f = i40e_add_filter(vsi, f->macaddr, vid);
3088		if (!add_f) {
3089			dev_info(&vsi->back->pdev->dev,
3090				 "Could not add vlan filter %d for %pM\n",
3091				 vid, f->macaddr);
3092			return -ENOMEM;
3093		}
3094	}
3095
3096	return 0;
3097}
3098
3099/**
3100 * i40e_vsi_add_vlan - Add VSI membership for given VLAN
3101 * @vsi: the VSI being configured
3102 * @vid: VLAN id to be added
3103 **/
3104int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid)
3105{
3106	int err;
3107
3108	if (vsi->info.pvid)
3109		return -EINVAL;
3110
3111	/* The network stack will attempt to add VID=0, with the intention to
3112	 * receive priority tagged packets with a VLAN of 0. Our HW receives
3113	 * these packets by default when configured to receive untagged
3114	 * packets, so we don't need to add a filter for this case.
3115	 * Additionally, HW interprets adding a VID=0 filter as meaning to
3116	 * receive *only* tagged traffic and stops receiving untagged traffic.
3117	 * Thus, we do not want to actually add a filter for VID=0
3118	 */
3119	if (!vid)
3120		return 0;
3121
3122	/* Locked once because all functions invoked below iterates list*/
3123	spin_lock_bh(&vsi->mac_filter_hash_lock);
3124	err = i40e_add_vlan_all_mac(vsi, vid);
3125	spin_unlock_bh(&vsi->mac_filter_hash_lock);
3126	if (err)
3127		return err;
3128
3129	/* schedule our worker thread which will take care of
3130	 * applying the new filter changes
3131	 */
3132	i40e_service_event_schedule(vsi->back);
3133	return 0;
3134}
3135
3136/**
3137 * i40e_rm_vlan_all_mac - Remove MAC/VLAN pair for all MAC with the given VLAN
3138 * @vsi: the vsi being configured
3139 * @vid: vlan id to be removed (0 = untagged only , -1 = any)
3140 *
3141 * This function should be used to remove all VLAN filters which match the
3142 * given VID. It does not schedule the service event and does not take the
3143 * mac_filter_hash_lock so it may be combined with other operations under
3144 * a single invocation of the mac_filter_hash_lock.
3145 *
3146 * NOTE: this function expects to be called while under the
3147 * mac_filter_hash_lock
3148 */
3149void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
3150{
3151	struct i40e_mac_filter *f;
3152	struct hlist_node *h;
3153	int bkt;
3154
3155	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
3156		if (f->vlan == vid)
3157			__i40e_del_filter(vsi, f);
3158	}
3159}
3160
3161/**
3162 * i40e_vsi_kill_vlan - Remove VSI membership for given VLAN
3163 * @vsi: the VSI being configured
3164 * @vid: VLAN id to be removed
3165 **/
3166void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid)
3167{
3168	if (!vid || vsi->info.pvid)
3169		return;
3170
3171	spin_lock_bh(&vsi->mac_filter_hash_lock);
3172	i40e_rm_vlan_all_mac(vsi, vid);
3173	spin_unlock_bh(&vsi->mac_filter_hash_lock);
3174
3175	/* schedule our worker thread which will take care of
3176	 * applying the new filter changes
3177	 */
3178	i40e_service_event_schedule(vsi->back);
3179}
3180
3181/**
3182 * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload
3183 * @netdev: network interface to be adjusted
3184 * @proto: unused protocol value
3185 * @vid: vlan id to be added
3186 *
3187 * net_device_ops implementation for adding vlan ids
3188 **/
3189static int i40e_vlan_rx_add_vid(struct net_device *netdev,
3190				__always_unused __be16 proto, u16 vid)
3191{
3192	struct i40e_netdev_priv *np = netdev_priv(netdev);
3193	struct i40e_vsi *vsi = np->vsi;
3194	int ret = 0;
3195
3196	if (vid >= VLAN_N_VID)
3197		return -EINVAL;
3198
3199	ret = i40e_vsi_add_vlan(vsi, vid);
3200	if (!ret)
3201		set_bit(vid, vsi->active_vlans);
3202
3203	return ret;
3204}
3205
3206/**
3207 * i40e_vlan_rx_add_vid_up - Add a vlan id filter to HW offload in UP path
3208 * @netdev: network interface to be adjusted
3209 * @proto: unused protocol value
3210 * @vid: vlan id to be added
3211 **/
3212static void i40e_vlan_rx_add_vid_up(struct net_device *netdev,
3213				    __always_unused __be16 proto, u16 vid)
3214{
3215	struct i40e_netdev_priv *np = netdev_priv(netdev);
3216	struct i40e_vsi *vsi = np->vsi;
3217
3218	if (vid >= VLAN_N_VID)
3219		return;
3220	set_bit(vid, vsi->active_vlans);
3221}
3222
3223/**
3224 * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
3225 * @netdev: network interface to be adjusted
3226 * @proto: unused protocol value
3227 * @vid: vlan id to be removed
3228 *
3229 * net_device_ops implementation for removing vlan ids
3230 **/
3231static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
3232				 __always_unused __be16 proto, u16 vid)
3233{
3234	struct i40e_netdev_priv *np = netdev_priv(netdev);
3235	struct i40e_vsi *vsi = np->vsi;
3236
3237	/* return code is ignored as there is nothing a user
3238	 * can do about failure to remove and a log message was
3239	 * already printed from the other function
3240	 */
3241	i40e_vsi_kill_vlan(vsi, vid);
3242
3243	clear_bit(vid, vsi->active_vlans);
3244
3245	return 0;
3246}
3247
3248/**
3249 * i40e_restore_vlan - Reinstate vlans when vsi/netdev comes back up
3250 * @vsi: the vsi being brought back up
3251 **/
3252static void i40e_restore_vlan(struct i40e_vsi *vsi)
3253{
3254	u16 vid;
3255
3256	if (!vsi->netdev)
3257		return;
3258
3259	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
3260		i40e_vlan_stripping_enable(vsi);
3261	else
3262		i40e_vlan_stripping_disable(vsi);
3263
3264	for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID)
3265		i40e_vlan_rx_add_vid_up(vsi->netdev, htons(ETH_P_8021Q),
3266					vid);
3267}
3268
3269/**
3270 * i40e_vsi_add_pvid - Add pvid for the VSI
3271 * @vsi: the vsi being adjusted
3272 * @vid: the vlan id to set as a PVID
3273 **/
3274int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid)
3275{
3276	struct i40e_vsi_context ctxt;
3277	int ret;
3278
3279	vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
3280	vsi->info.pvid = cpu_to_le16(vid);
3281	vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_TAGGED |
3282				    I40E_AQ_VSI_PVLAN_INSERT_PVID |
3283				    I40E_AQ_VSI_PVLAN_EMOD_STR;
3284
3285	ctxt.seid = vsi->seid;
3286	ctxt.info = vsi->info;
3287	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
3288	if (ret) {
3289		dev_info(&vsi->back->pdev->dev,
3290			 "add pvid failed, err %pe aq_err %s\n",
3291			 ERR_PTR(ret),
3292			 i40e_aq_str(&vsi->back->hw,
3293				     vsi->back->hw.aq.asq_last_status));
3294		return -ENOENT;
3295	}
3296
3297	return 0;
3298}
3299
3300/**
3301 * i40e_vsi_remove_pvid - Remove the pvid from the VSI
3302 * @vsi: the vsi being adjusted
3303 *
3304 * Just use the vlan_rx_register() service to put it back to normal
3305 **/
3306void i40e_vsi_remove_pvid(struct i40e_vsi *vsi)
3307{
3308	vsi->info.pvid = 0;
3309
3310	i40e_vlan_stripping_disable(vsi);
3311}
3312
3313/**
3314 * i40e_vsi_setup_tx_resources - Allocate VSI Tx queue resources
3315 * @vsi: ptr to the VSI
3316 *
3317 * If this function returns with an error, then it's possible one or
3318 * more of the rings is populated (while the rest are not).  It is the
3319 * callers duty to clean those orphaned rings.
3320 *
3321 * Return 0 on success, negative on failure
3322 **/
3323static int i40e_vsi_setup_tx_resources(struct i40e_vsi *vsi)
3324{
3325	int i, err = 0;
3326
3327	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
3328		err = i40e_setup_tx_descriptors(vsi->tx_rings[i]);
3329
3330	if (!i40e_enabled_xdp_vsi(vsi))
3331		return err;
3332
3333	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
3334		err = i40e_setup_tx_descriptors(vsi->xdp_rings[i]);
3335
3336	return err;
3337}
3338
3339/**
3340 * i40e_vsi_free_tx_resources - Free Tx resources for VSI queues
3341 * @vsi: ptr to the VSI
3342 *
3343 * Free VSI's transmit software resources
3344 **/
3345static void i40e_vsi_free_tx_resources(struct i40e_vsi *vsi)
3346{
3347	int i;
3348
3349	if (vsi->tx_rings) {
3350		for (i = 0; i < vsi->num_queue_pairs; i++)
3351			if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
3352				i40e_free_tx_resources(vsi->tx_rings[i]);
3353	}
3354
3355	if (vsi->xdp_rings) {
3356		for (i = 0; i < vsi->num_queue_pairs; i++)
3357			if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
3358				i40e_free_tx_resources(vsi->xdp_rings[i]);
3359	}
3360}
3361
3362/**
3363 * i40e_vsi_setup_rx_resources - Allocate VSI queues Rx resources
3364 * @vsi: ptr to the VSI
3365 *
3366 * If this function returns with an error, then it's possible one or
3367 * more of the rings is populated (while the rest are not).  It is the
3368 * callers duty to clean those orphaned rings.
3369 *
3370 * Return 0 on success, negative on failure
3371 **/
3372static int i40e_vsi_setup_rx_resources(struct i40e_vsi *vsi)
3373{
3374	int i, err = 0;
3375
3376	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
3377		err = i40e_setup_rx_descriptors(vsi->rx_rings[i]);
3378	return err;
3379}
3380
3381/**
3382 * i40e_vsi_free_rx_resources - Free Rx Resources for VSI queues
3383 * @vsi: ptr to the VSI
3384 *
3385 * Free all receive software resources
3386 **/
3387static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
3388{
3389	int i;
3390
3391	if (!vsi->rx_rings)
3392		return;
3393
3394	for (i = 0; i < vsi->num_queue_pairs; i++)
3395		if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
3396			i40e_free_rx_resources(vsi->rx_rings[i]);
3397}
3398
3399/**
3400 * i40e_config_xps_tx_ring - Configure XPS for a Tx ring
3401 * @ring: The Tx ring to configure
3402 *
3403 * This enables/disables XPS for a given Tx descriptor ring
3404 * based on the TCs enabled for the VSI that ring belongs to.
3405 **/
3406static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
3407{
3408	int cpu;
3409
3410	if (!ring->q_vector || !ring->netdev || ring->ch)
3411		return;
3412
3413	/* We only initialize XPS once, so as not to overwrite user settings */
3414	if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state))
3415		return;
3416
3417	cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
3418	netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
3419			    ring->queue_index);
3420}
3421
3422/**
3423 * i40e_xsk_pool - Retrieve the AF_XDP buffer pool if XDP and ZC is enabled
3424 * @ring: The Tx or Rx ring
3425 *
3426 * Returns the AF_XDP buffer pool or NULL.
3427 **/
3428static struct xsk_buff_pool *i40e_xsk_pool(struct i40e_ring *ring)
3429{
3430	bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
3431	int qid = ring->queue_index;
3432
3433	if (ring_is_xdp(ring))
3434		qid -= ring->vsi->alloc_queue_pairs;
3435
3436	if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps))
3437		return NULL;
3438
3439	return xsk_get_pool_from_qid(ring->vsi->netdev, qid);
3440}
3441
3442/**
3443 * i40e_configure_tx_ring - Configure a transmit ring context and rest
3444 * @ring: The Tx ring to configure
3445 *
3446 * Configure the Tx descriptor ring in the HMC context.
3447 **/
3448static int i40e_configure_tx_ring(struct i40e_ring *ring)
3449{
3450	struct i40e_vsi *vsi = ring->vsi;
3451	u16 pf_q = vsi->base_queue + ring->queue_index;
3452	struct i40e_hw *hw = &vsi->back->hw;
3453	struct i40e_hmc_obj_txq tx_ctx;
3454	u32 qtx_ctl = 0;
3455	int err = 0;
3456
3457	if (ring_is_xdp(ring))
3458		ring->xsk_pool = i40e_xsk_pool(ring);
3459
3460	/* some ATR related tx ring init */
3461	if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {
3462		ring->atr_sample_rate = vsi->back->atr_sample_rate;
3463		ring->atr_count = 0;
3464	} else {
3465		ring->atr_sample_rate = 0;
3466	}
3467
3468	/* configure XPS */
3469	i40e_config_xps_tx_ring(ring);
3470
3471	/* clear the context structure first */
3472	memset(&tx_ctx, 0, sizeof(tx_ctx));
3473
3474	tx_ctx.new_context = 1;
3475	tx_ctx.base = (ring->dma / 128);
3476	tx_ctx.qlen = ring->count;
3477	tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
3478					       I40E_FLAG_FD_ATR_ENABLED));
3479	tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
3480	/* FDIR VSI tx ring can still use RS bit and writebacks */
3481	if (vsi->type != I40E_VSI_FDIR)
3482		tx_ctx.head_wb_ena = 1;
3483	tx_ctx.head_wb_addr = ring->dma +
3484			      (ring->count * sizeof(struct i40e_tx_desc));
3485
3486	/* As part of VSI creation/update, FW allocates certain
3487	 * Tx arbitration queue sets for each TC enabled for
3488	 * the VSI. The FW returns the handles to these queue
3489	 * sets as part of the response buffer to Add VSI,
3490	 * Update VSI, etc. AQ commands. It is expected that
3491	 * these queue set handles be associated with the Tx
3492	 * queues by the driver as part of the TX queue context
3493	 * initialization. This has to be done regardless of
3494	 * DCB as by default everything is mapped to TC0.
3495	 */
3496
3497	if (ring->ch)
3498		tx_ctx.rdylist =
3499			le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]);
3500
3501	else
3502		tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
3503
3504	tx_ctx.rdylist_act = 0;
3505
3506	/* clear the context in the HMC */
3507	err = i40e_clear_lan_tx_queue_context(hw, pf_q);
3508	if (err) {
3509		dev_info(&vsi->back->pdev->dev,
3510			 "Failed to clear LAN Tx queue context on Tx ring %d (pf_q %d), error: %d\n",
3511			 ring->queue_index, pf_q, err);
3512		return -ENOMEM;
3513	}
3514
3515	/* set the context in the HMC */
3516	err = i40e_set_lan_tx_queue_context(hw, pf_q, &tx_ctx);
3517	if (err) {
3518		dev_info(&vsi->back->pdev->dev,
3519			 "Failed to set LAN Tx queue context on Tx ring %d (pf_q %d, error: %d\n",
3520			 ring->queue_index, pf_q, err);
3521		return -ENOMEM;
3522	}
3523
3524	/* Now associate this queue with this PCI function */
3525	if (ring->ch) {
3526		if (ring->ch->type == I40E_VSI_VMDQ2)
3527			qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
3528		else
3529			return -EINVAL;
3530
3531		qtx_ctl |= (ring->ch->vsi_number <<
3532			    I40E_QTX_CTL_VFVM_INDX_SHIFT) &
3533			    I40E_QTX_CTL_VFVM_INDX_MASK;
3534	} else {
3535		if (vsi->type == I40E_VSI_VMDQ2) {
3536			qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
3537			qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
3538				    I40E_QTX_CTL_VFVM_INDX_MASK;
3539		} else {
3540			qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
3541		}
3542	}
3543
3544	qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
3545		    I40E_QTX_CTL_PF_INDX_MASK);
3546	wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl);
3547	i40e_flush(hw);
3548
3549	/* cache tail off for easier writes later */
3550	ring->tail = hw->hw_addr + I40E_QTX_TAIL(pf_q);
3551
3552	return 0;
3553}
3554
3555/**
3556 * i40e_rx_offset - Return expected offset into page to access data
3557 * @rx_ring: Ring we are requesting offset of
3558 *
3559 * Returns the offset value for ring into the data buffer.
3560 */
3561static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
3562{
3563	return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
3564}
3565
3566/**
3567 * i40e_configure_rx_ring - Configure a receive ring context
3568 * @ring: The Rx ring to configure
3569 *
3570 * Configure the Rx descriptor ring in the HMC context.
3571 **/
3572static int i40e_configure_rx_ring(struct i40e_ring *ring)
3573{
3574	struct i40e_vsi *vsi = ring->vsi;
3575	u32 chain_len = vsi->back->hw.func_caps.rx_buf_chain_len;
3576	u16 pf_q = vsi->base_queue + ring->queue_index;
3577	struct i40e_hw *hw = &vsi->back->hw;
3578	struct i40e_hmc_obj_rxq rx_ctx;
3579	int err = 0;
3580	bool ok;
3581
3582	bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
3583
3584	/* clear the context structure first */
3585	memset(&rx_ctx, 0, sizeof(rx_ctx));
3586
3587	ring->rx_buf_len = vsi->rx_buf_len;
3588
3589	/* XDP RX-queue info only needed for RX rings exposed to XDP */
3590	if (ring->vsi->type != I40E_VSI_MAIN)
3591		goto skip;
3592
3593	if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
3594		err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
3595					 ring->queue_index,
3596					 ring->q_vector->napi.napi_id,
3597					 ring->rx_buf_len);
3598		if (err)
3599			return err;
3600	}
3601
3602	ring->xsk_pool = i40e_xsk_pool(ring);
3603	if (ring->xsk_pool) {
3604		xdp_rxq_info_unreg(&ring->xdp_rxq);
3605		ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
3606		err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
3607					 ring->queue_index,
3608					 ring->q_vector->napi.napi_id,
3609					 ring->rx_buf_len);
3610		if (err)
3611			return err;
3612		err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
3613						 MEM_TYPE_XSK_BUFF_POOL,
3614						 NULL);
3615		if (err)
3616			return err;
3617		dev_info(&vsi->back->pdev->dev,
3618			 "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
3619			 ring->queue_index);
3620
3621	} else {
3622		err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
3623						 MEM_TYPE_PAGE_SHARED,
3624						 NULL);
3625		if (err)
3626			return err;
3627	}
3628
3629skip:
3630	xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);
3631
3632	rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
3633				    BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
3634
3635	rx_ctx.base = (ring->dma / 128);
3636	rx_ctx.qlen = ring->count;
3637
3638	/* use 16 byte descriptors */
3639	rx_ctx.dsize = 0;
3640
3641	/* descriptor type is always zero
3642	 * rx_ctx.dtype = 0;
3643	 */
3644	rx_ctx.hsplit_0 = 0;
3645
3646	rx_ctx.rxmax = min_t(u16, vsi->max_frame, chain_len * ring->rx_buf_len);
3647	if (hw->revision_id == 0)
3648		rx_ctx.lrxqthresh = 0;
3649	else
3650		rx_ctx.lrxqthresh = 1;
3651	rx_ctx.crcstrip = 1;
3652	rx_ctx.l2tsel = 1;
3653	/* this controls whether VLAN is stripped from inner headers */
3654	rx_ctx.showiv = 0;
3655	/* set the prefena field to 1 because the manual says to */
3656	rx_ctx.prefena = 1;
3657
3658	/* clear the context in the HMC */
3659	err = i40e_clear_lan_rx_queue_context(hw, pf_q);
3660	if (err) {
3661		dev_info(&vsi->back->pdev->dev,
3662			 "Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
3663			 ring->queue_index, pf_q, err);
3664		return -ENOMEM;
3665	}
3666
3667	/* set the context in the HMC */
3668	err = i40e_set_lan_rx_queue_context(hw, pf_q, &rx_ctx);
3669	if (err) {
3670		dev_info(&vsi->back->pdev->dev,
3671			 "Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
3672			 ring->queue_index, pf_q, err);
3673		return -ENOMEM;
3674	}
3675
3676	/* configure Rx buffer alignment */
3677	if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
3678		if (I40E_2K_TOO_SMALL_WITH_PADDING) {
3679			dev_info(&vsi->back->pdev->dev,
3680				 "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
3681			return -EOPNOTSUPP;
3682		}
3683		clear_ring_build_skb_enabled(ring);
3684	} else {
3685		set_ring_build_skb_enabled(ring);
3686	}
3687
3688	ring->rx_offset = i40e_rx_offset(ring);
3689
3690	/* cache tail for quicker writes, and clear the reg before use */
3691	ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
3692	writel(0, ring->tail);
3693
3694	if (ring->xsk_pool) {
3695		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
3696		ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring));
3697	} else {
3698		ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
3699	}
3700	if (!ok) {
3701		/* Log this in case the user has forgotten to give the kernel
3702		 * any buffers, even later in the application.
3703		 */
3704		dev_info(&vsi->back->pdev->dev,
3705			 "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n",
3706			 ring->xsk_pool ? "AF_XDP ZC enabled " : "",
3707			 ring->queue_index, pf_q);
3708	}
3709
3710	return 0;
3711}
3712
3713/**
3714 * i40e_vsi_configure_tx - Configure the VSI for Tx
3715 * @vsi: VSI structure describing this set of rings and resources
3716 *
3717 * Configure the Tx VSI for operation.
3718 **/
3719static int i40e_vsi_configure_tx(struct i40e_vsi *vsi)
3720{
3721	int err = 0;
3722	u16 i;
3723
3724	for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
3725		err = i40e_configure_tx_ring(vsi->tx_rings[i]);
3726
3727	if (err || !i40e_enabled_xdp_vsi(vsi))
3728		return err;
3729
3730	for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
3731		err = i40e_configure_tx_ring(vsi->xdp_rings[i]);
3732
3733	return err;
3734}
3735
3736/**
3737 * i40e_vsi_configure_rx - Configure the VSI for Rx
3738 * @vsi: the VSI being configured
3739 *
3740 * Configure the Rx VSI for operation.
3741 **/
3742static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
3743{
3744	int err = 0;
3745	u16 i;
3746
3747	vsi->max_frame = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog);
3748	vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
3749
3750#if (PAGE_SIZE < 8192)
3751	if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING &&
3752	    vsi->netdev->mtu <= ETH_DATA_LEN) {
3753		vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
3754		vsi->max_frame = vsi->rx_buf_len;
3755	}
3756#endif
3757
3758	/* set up individual rings */
3759	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
3760		err = i40e_configure_rx_ring(vsi->rx_rings[i]);
3761
3762	return err;
3763}
3764
3765/**
3766 * i40e_vsi_config_dcb_rings - Update rings to reflect DCB TC
3767 * @vsi: ptr to the VSI
3768 **/
3769static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
3770{
3771	struct i40e_ring *tx_ring, *rx_ring;
3772	u16 qoffset, qcount;
3773	int i, n;
3774
3775	if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
3776		/* Reset the TC information */
3777		for (i = 0; i < vsi->num_queue_pairs; i++) {
3778			rx_ring = vsi->rx_rings[i];
3779			tx_ring = vsi->tx_rings[i];
3780			rx_ring->dcb_tc = 0;
3781			tx_ring->dcb_tc = 0;
3782		}
3783		return;
3784	}
3785
3786	for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
3787		if (!(vsi->tc_config.enabled_tc & BIT_ULL(n)))
3788			continue;
3789
3790		qoffset = vsi->tc_config.tc_info[n].qoffset;
3791		qcount = vsi->tc_config.tc_info[n].qcount;
3792		for (i = qoffset; i < (qoffset + qcount); i++) {
3793			rx_ring = vsi->rx_rings[i];
3794			tx_ring = vsi->tx_rings[i];
3795			rx_ring->dcb_tc = n;
3796			tx_ring->dcb_tc = n;
3797		}
3798	}
3799}
3800
3801/**
3802 * i40e_set_vsi_rx_mode - Call set_rx_mode on a VSI
3803 * @vsi: ptr to the VSI
3804 **/
3805static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
3806{
3807	if (vsi->netdev)
3808		i40e_set_rx_mode(vsi->netdev);
3809}
3810
3811/**
3812 * i40e_reset_fdir_filter_cnt - Reset flow director filter counters
3813 * @pf: Pointer to the targeted PF
3814 *
3815 * Set all flow director counters to 0.
3816 */
3817static void i40e_reset_fdir_filter_cnt(struct i40e_pf *pf)
3818{
3819	pf->fd_tcp4_filter_cnt = 0;
3820	pf->fd_udp4_filter_cnt = 0;
3821	pf->fd_sctp4_filter_cnt = 0;
3822	pf->fd_ip4_filter_cnt = 0;
3823	pf->fd_tcp6_filter_cnt = 0;
3824	pf->fd_udp6_filter_cnt = 0;
3825	pf->fd_sctp6_filter_cnt = 0;
3826	pf->fd_ip6_filter_cnt = 0;
3827}
3828
3829/**
3830 * i40e_fdir_filter_restore - Restore the Sideband Flow Director filters
3831 * @vsi: Pointer to the targeted VSI
3832 *
3833 * This function replays the hlist on the hw where all the SB Flow Director
3834 * filters were saved.
3835 **/
3836static void i40e_fdir_filter_restore(struct i40e_vsi *vsi)
3837{
3838	struct i40e_fdir_filter *filter;
3839	struct i40e_pf *pf = vsi->back;
3840	struct hlist_node *node;
3841
3842	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
3843		return;
3844
3845	/* Reset FDir counters as we're replaying all existing filters */
3846	i40e_reset_fdir_filter_cnt(pf);
3847
3848	hlist_for_each_entry_safe(filter, node,
3849				  &pf->fdir_filter_list, fdir_node) {
3850		i40e_add_del_fdir(vsi, filter, true);
3851	}
3852}
3853
3854/**
3855 * i40e_vsi_configure - Set up the VSI for action
3856 * @vsi: the VSI being configured
3857 **/
3858static int i40e_vsi_configure(struct i40e_vsi *vsi)
3859{
3860	int err;
3861
3862	i40e_set_vsi_rx_mode(vsi);
3863	i40e_restore_vlan(vsi);
3864	i40e_vsi_config_dcb_rings(vsi);
3865	err = i40e_vsi_configure_tx(vsi);
3866	if (!err)
3867		err = i40e_vsi_configure_rx(vsi);
3868
3869	return err;
3870}
3871
3872/**
3873 * i40e_vsi_configure_msix - MSIX mode Interrupt Config in the HW
3874 * @vsi: the VSI being configured
3875 **/
3876static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
3877{
3878	bool has_xdp = i40e_enabled_xdp_vsi(vsi);
3879	struct i40e_pf *pf = vsi->back;
3880	struct i40e_hw *hw = &pf->hw;
3881	u16 vector;
3882	int i, q;
3883	u32 qp;
3884
3885	/* The interrupt indexing is offset by 1 in the PFINT_ITRn
3886	 * and PFINT_LNKLSTn registers, e.g.:
3887	 *   PFINT_ITRn[0..n-1] gets msix-1..msix-n  (qpair interrupts)
3888	 */
3889	qp = vsi->base_queue;
3890	vector = vsi->base_vector;
3891	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
3892		struct i40e_q_vector *q_vector = vsi->q_vectors[i];
3893
3894		q_vector->rx.next_update = jiffies + 1;
3895		q_vector->rx.target_itr =
3896			ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
3897		wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
3898		     q_vector->rx.target_itr >> 1);
3899		q_vector->rx.current_itr = q_vector->rx.target_itr;
3900
3901		q_vector->tx.next_update = jiffies + 1;
3902		q_vector->tx.target_itr =
3903			ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
3904		wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
3905		     q_vector->tx.target_itr >> 1);
3906		q_vector->tx.current_itr = q_vector->tx.target_itr;
3907
3908		wr32(hw, I40E_PFINT_RATEN(vector - 1),
3909		     i40e_intrl_usec_to_reg(vsi->int_rate_limit));
3910
3911		/* begin of linked list for RX queue assigned to this vector */
3912		wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp);
3913		for (q = 0; q < q_vector->num_ringpairs; q++) {
3914			u32 nextqp = has_xdp ? qp + vsi->alloc_queue_pairs : qp;
3915			u32 val;
3916
3917			val = I40E_QINT_RQCTL_CAUSE_ENA_MASK |
3918			      (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
3919			      (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
3920			      (nextqp << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
3921			      (I40E_QUEUE_TYPE_TX <<
3922			       I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT);
3923
3924			wr32(hw, I40E_QINT_RQCTL(qp), val);
3925
3926			if (has_xdp) {
3927				/* TX queue with next queue set to TX */
3928				val = I40E_QINT_TQCTL_CAUSE_ENA_MASK |
3929				      (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
3930				      (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
3931				      (qp << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
3932				      (I40E_QUEUE_TYPE_TX <<
3933				       I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
3934
3935				wr32(hw, I40E_QINT_TQCTL(nextqp), val);
3936			}
3937			/* TX queue with next RX or end of linked list */
3938			val = I40E_QINT_TQCTL_CAUSE_ENA_MASK |
3939			      (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
3940			      (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
3941			      ((qp + 1) << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
3942			      (I40E_QUEUE_TYPE_RX <<
3943			       I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
3944
3945			/* Terminate the linked list */
3946			if (q == (q_vector->num_ringpairs - 1))
3947				val |= (I40E_QUEUE_END_OF_LIST <<
3948					I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT);
3949
3950			wr32(hw, I40E_QINT_TQCTL(qp), val);
3951			qp++;
3952		}
3953	}
3954
3955	i40e_flush(hw);
3956}
3957
3958/**
3959 * i40e_enable_misc_int_causes - enable the non-queue interrupts
3960 * @pf: pointer to private device data structure
3961 **/
3962static void i40e_enable_misc_int_causes(struct i40e_pf *pf)
3963{
3964	struct i40e_hw *hw = &pf->hw;
3965	u32 val;
3966
3967	/* clear things first */
3968	wr32(hw, I40E_PFINT_ICR0_ENA, 0);  /* disable all */
3969	rd32(hw, I40E_PFINT_ICR0);         /* read to clear */
3970
3971	val = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK       |
3972	      I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK    |
3973	      I40E_PFINT_ICR0_ENA_GRST_MASK          |
3974	      I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK |
3975	      I40E_PFINT_ICR0_ENA_GPIO_MASK          |
3976	      I40E_PFINT_ICR0_ENA_HMC_ERR_MASK       |
3977	      I40E_PFINT_ICR0_ENA_VFLR_MASK          |
3978	      I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
3979
3980	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
3981		val |= I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
3982
3983	if (pf->flags & I40E_FLAG_PTP)
3984		val |= I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
3985
3986	wr32(hw, I40E_PFINT_ICR0_ENA, val);
3987
3988	/* SW_ITR_IDX = 0, but don't change INTENA */
3989	wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK |
3990					I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK);
3991
3992	/* OTHER_ITR_IDX = 0 */
3993	wr32(hw, I40E_PFINT_STAT_CTL0, 0);
3994}
3995
3996/**
3997 * i40e_configure_msi_and_legacy - Legacy mode interrupt config in the HW
3998 * @vsi: the VSI being configured
3999 **/
4000static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
4001{
4002	u32 nextqp = i40e_enabled_xdp_vsi(vsi) ? vsi->alloc_queue_pairs : 0;
4003	struct i40e_q_vector *q_vector = vsi->q_vectors[0];
4004	struct i40e_pf *pf = vsi->back;
4005	struct i40e_hw *hw = &pf->hw;
4006
4007	/* set the ITR configuration */
4008	q_vector->rx.next_update = jiffies + 1;
4009	q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
4010	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr >> 1);
4011	q_vector->rx.current_itr = q_vector->rx.target_itr;
4012	q_vector->tx.next_update = jiffies + 1;
4013	q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
4014	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr >> 1);
4015	q_vector->tx.current_itr = q_vector->tx.target_itr;
4016
4017	i40e_enable_misc_int_causes(pf);
4018
4019	/* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */
4020	wr32(hw, I40E_PFINT_LNKLST0, 0);
4021
4022	/* Associate the queue pair to the vector and enable the queue
4023	 * interrupt RX queue in linked list with next queue set to TX
4024	 */
4025	wr32(hw, I40E_QINT_RQCTL(0), I40E_QINT_RQCTL_VAL(nextqp, 0, TX));
4026
4027	if (i40e_enabled_xdp_vsi(vsi)) {
4028		/* TX queue in linked list with next queue set to TX */
4029		wr32(hw, I40E_QINT_TQCTL(nextqp),
4030		     I40E_QINT_TQCTL_VAL(nextqp, 0, TX));
4031	}
4032
4033	/* last TX queue so the next RX queue doesn't matter */
4034	wr32(hw, I40E_QINT_TQCTL(0),
4035	     I40E_QINT_TQCTL_VAL(I40E_QUEUE_END_OF_LIST, 0, RX));
4036	i40e_flush(hw);
4037}
4038
4039/**
4040 * i40e_irq_dynamic_disable_icr0 - Disable default interrupt generation for icr0
4041 * @pf: board private structure
4042 **/
4043void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf)
4044{
4045	struct i40e_hw *hw = &pf->hw;
4046
4047	wr32(hw, I40E_PFINT_DYN_CTL0,
4048	     I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
4049	i40e_flush(hw);
4050}
4051
4052/**
4053 * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
4054 * @pf: board private structure
4055 **/
4056void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
4057{
4058	struct i40e_hw *hw = &pf->hw;
4059	u32 val;
4060
4061	val = I40E_PFINT_DYN_CTL0_INTENA_MASK   |
4062	      I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
4063	      (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
4064
4065	wr32(hw, I40E_PFINT_DYN_CTL0, val);
4066	i40e_flush(hw);
4067}
4068
4069/**
4070 * i40e_msix_clean_rings - MSIX mode Interrupt Handler
4071 * @irq: interrupt number
4072 * @data: pointer to a q_vector
4073 **/
4074static irqreturn_t i40e_msix_clean_rings(int irq, void *data)
4075{
4076	struct i40e_q_vector *q_vector = data;
4077
4078	if (!q_vector->tx.ring && !q_vector->rx.ring)
4079		return IRQ_HANDLED;
4080
4081	napi_schedule_irqoff(&q_vector->napi);
4082
4083	return IRQ_HANDLED;
4084}
4085
4086/**
4087 * i40e_irq_affinity_notify - Callback for affinity changes
4088 * @notify: context as to what irq was changed
4089 * @mask: the new affinity mask
4090 *
4091 * This is a callback function used by the irq_set_affinity_notifier function
4092 * so that we may register to receive changes to the irq affinity masks.
4093 **/
4094static void i40e_irq_affinity_notify(struct irq_affinity_notify *notify,
4095				     const cpumask_t *mask)
4096{
4097	struct i40e_q_vector *q_vector =
4098		container_of(notify, struct i40e_q_vector, affinity_notify);
4099
4100	cpumask_copy(&q_vector->affinity_mask, mask);
4101}
4102
4103/**
4104 * i40e_irq_affinity_release - Callback for affinity notifier release
4105 * @ref: internal core kernel usage
4106 *
4107 * This is a callback function used by the irq_set_affinity_notifier function
4108 * to inform the current notification subscriber that they will no longer
4109 * receive notifications.
4110 **/
4111static void i40e_irq_affinity_release(struct kref *ref) {}
4112
4113/**
4114 * i40e_vsi_request_irq_msix - Initialize MSI-X interrupts
4115 * @vsi: the VSI being configured
4116 * @basename: name for the vector
4117 *
4118 * Allocates MSI-X vectors and requests interrupts from the kernel.
4119 **/
4120static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
4121{
4122	int q_vectors = vsi->num_q_vectors;
4123	struct i40e_pf *pf = vsi->back;
4124	int base = vsi->base_vector;
4125	int rx_int_idx = 0;
4126	int tx_int_idx = 0;
4127	int vector, err;
4128	int irq_num;
4129	int cpu;
4130
4131	for (vector = 0; vector < q_vectors; vector++) {
4132		struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
4133
4134		irq_num = pf->msix_entries[base + vector].vector;
4135
4136		if (q_vector->tx.ring && q_vector->rx.ring) {
4137			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
4138				 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
4139			tx_int_idx++;
4140		} else if (q_vector->rx.ring) {
4141			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
4142				 "%s-%s-%d", basename, "rx", rx_int_idx++);
4143		} else if (q_vector->tx.ring) {
4144			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
4145				 "%s-%s-%d", basename, "tx", tx_int_idx++);
4146		} else {
4147			/* skip this unused q_vector */
4148			continue;
4149		}
4150		err = request_irq(irq_num,
4151				  vsi->irq_handler,
4152				  0,
4153				  q_vector->name,
4154				  q_vector);
4155		if (err) {
4156			dev_info(&pf->pdev->dev,
4157				 "MSIX request_irq failed, error: %d\n", err);
4158			goto free_queue_irqs;
4159		}
4160
4161		/* register for affinity change notifications */
4162		q_vector->irq_num = irq_num;
4163		q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
4164		q_vector->affinity_notify.release = i40e_irq_affinity_release;
4165		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
4166		/* Spread affinity hints out across online CPUs.
4167		 *
4168		 * get_cpu_mask returns a static constant mask with
4169		 * a permanent lifetime so it's ok to pass to
4170		 * irq_update_affinity_hint without making a copy.
4171		 */
4172		cpu = cpumask_local_spread(q_vector->v_idx, -1);
4173		irq_update_affinity_hint(irq_num, get_cpu_mask(cpu));
4174	}
4175
4176	vsi->irqs_ready = true;
4177	return 0;
4178
4179free_queue_irqs:
4180	while (vector) {
4181		vector--;
4182		irq_num = pf->msix_entries[base + vector].vector;
4183		irq_set_affinity_notifier(irq_num, NULL);
4184		irq_update_affinity_hint(irq_num, NULL);
4185		free_irq(irq_num, &vsi->q_vectors[vector]);
4186	}
4187	return err;
4188}
4189
4190/**
4191 * i40e_vsi_disable_irq - Mask off queue interrupt generation on the VSI
4192 * @vsi: the VSI being un-configured
4193 **/
4194static void i40e_vsi_disable_irq(struct i40e_vsi *vsi)
4195{
4196	struct i40e_pf *pf = vsi->back;
4197	struct i40e_hw *hw = &pf->hw;
4198	int base = vsi->base_vector;
4199	int i;
4200
4201	/* disable interrupt causation from each queue */
4202	for (i = 0; i < vsi->num_queue_pairs; i++) {
4203		u32 val;
4204
4205		val = rd32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx));
4206		val &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
4207		wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val);
4208
4209		val = rd32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx));
4210		val &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
4211		wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), val);
4212
4213		if (!i40e_enabled_xdp_vsi(vsi))
4214			continue;
4215		wr32(hw, I40E_QINT_TQCTL(vsi->xdp_rings[i]->reg_idx), 0);
4216	}
4217
4218	/* disable each interrupt */
4219	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
4220		for (i = vsi->base_vector;
4221		     i < (vsi->num_q_vectors + vsi->base_vector); i++)
4222			wr32(hw, I40E_PFINT_DYN_CTLN(i - 1), 0);
4223
4224		i40e_flush(hw);
4225		for (i = 0; i < vsi->num_q_vectors; i++)
4226			synchronize_irq(pf->msix_entries[i + base].vector);
4227	} else {
4228		/* Legacy and MSI mode - this stops all interrupt handling */
4229		wr32(hw, I40E_PFINT_ICR0_ENA, 0);
4230		wr32(hw, I40E_PFINT_DYN_CTL0, 0);
4231		i40e_flush(hw);
4232		synchronize_irq(pf->pdev->irq);
4233	}
4234}
4235
4236/**
4237 * i40e_vsi_enable_irq - Enable IRQ for the given VSI
4238 * @vsi: the VSI being configured
4239 **/
4240static int i40e_vsi_enable_irq(struct i40e_vsi *vsi)
4241{
4242	struct i40e_pf *pf = vsi->back;
4243	int i;
4244
4245	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
4246		for (i = 0; i < vsi->num_q_vectors; i++)
4247			i40e_irq_dynamic_enable(vsi, i);
4248	} else {
4249		i40e_irq_dynamic_enable_icr0(pf);
4250	}
4251
4252	i40e_flush(&pf->hw);
4253	return 0;
4254}
4255
4256/**
4257 * i40e_free_misc_vector - Free the vector that handles non-queue events
4258 * @pf: board private structure
4259 **/
4260static void i40e_free_misc_vector(struct i40e_pf *pf)
4261{
4262	/* Disable ICR 0 */
4263	wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
4264	i40e_flush(&pf->hw);
4265
4266	if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
4267		free_irq(pf->msix_entries[0].vector, pf);
4268		clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
4269	}
4270}
4271
4272/**
4273 * i40e_intr - MSI/Legacy and non-queue interrupt handler
4274 * @irq: interrupt number
4275 * @data: pointer to a q_vector
4276 *
4277 * This is the handler used for all MSI/Legacy interrupts, and deals
4278 * with both queue and non-queue interrupts.  This is also used in
4279 * MSIX mode to handle the non-queue interrupts.
4280 **/
4281static irqreturn_t i40e_intr(int irq, void *data)
4282{
4283	struct i40e_pf *pf = (struct i40e_pf *)data;
4284	struct i40e_hw *hw = &pf->hw;
4285	irqreturn_t ret = IRQ_NONE;
4286	u32 icr0, icr0_remaining;
4287	u32 val, ena_mask;
4288
4289	icr0 = rd32(hw, I40E_PFINT_ICR0);
4290	ena_mask = rd32(hw, I40E_PFINT_ICR0_ENA);
4291
4292	/* if sharing a legacy IRQ, we might get called w/o an intr pending */
4293	if ((icr0 & I40E_PFINT_ICR0_INTEVENT_MASK) == 0)
4294		goto enable_intr;
4295
4296	/* if interrupt but no bits showing, must be SWINT */
4297	if (((icr0 & ~I40E_PFINT_ICR0_INTEVENT_MASK) == 0) ||
4298	    (icr0 & I40E_PFINT_ICR0_SWINT_MASK))
4299		pf->sw_int_count++;
4300
4301	if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
4302	    (icr0 & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) {
4303		ena_mask &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
4304		dev_dbg(&pf->pdev->dev, "cleared PE_CRITERR\n");
4305		set_bit(__I40E_CORE_RESET_REQUESTED, pf->state);
4306	}
4307
4308	/* only q0 is used in MSI/Legacy mode, and none are used in MSIX */
4309	if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) {
4310		struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
4311		struct i40e_q_vector *q_vector = vsi->q_vectors[0];
4312
4313		/* We do not have a way to disarm Queue causes while leaving
4314		 * interrupt enabled for all other causes, ideally
4315		 * interrupt should be disabled while we are in NAPI but
4316		 * this is not a performance path and napi_schedule()
4317		 * can deal with rescheduling.
4318		 */
4319		if (!test_bit(__I40E_DOWN, pf->state))
4320			napi_schedule_irqoff(&q_vector->napi);
4321	}
4322
4323	if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
4324		ena_mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
4325		set_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state);
4326		i40e_debug(&pf->hw, I40E_DEBUG_NVM, "AdminQ event\n");
4327	}
4328
4329	if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) {
4330		ena_mask &= ~I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
4331		set_bit(__I40E_MDD_EVENT_PENDING, pf->state);
4332	}
4333
4334	if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) {
4335		/* disable any further VFLR event notifications */
4336		if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state)) {
4337			u32 reg = rd32(hw, I40E_PFINT_ICR0_ENA);
4338
4339			reg &= ~I40E_PFINT_ICR0_VFLR_MASK;
4340			wr32(hw, I40E_PFINT_ICR0_ENA, reg);
4341		} else {
4342			ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK;
4343			set_bit(__I40E_VFLR_EVENT_PENDING, pf->state);
4344		}
4345	}
4346
4347	if (icr0 & I40E_PFINT_ICR0_GRST_MASK) {
4348		if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
4349			set_bit(__I40E_RESET_INTR_RECEIVED, pf->state);
4350		ena_mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK;
4351		val = rd32(hw, I40E_GLGEN_RSTAT);
4352		val = (val & I40E_GLGEN_RSTAT_RESET_TYPE_MASK)
4353		       >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT;
4354		if (val == I40E_RESET_CORER) {
4355			pf->corer_count++;
4356		} else if (val == I40E_RESET_GLOBR) {
4357			pf->globr_count++;
4358		} else if (val == I40E_RESET_EMPR) {
4359			pf->empr_count++;
4360			set_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state);
4361		}
4362	}
4363
4364	if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK) {
4365		icr0 &= ~I40E_PFINT_ICR0_HMC_ERR_MASK;
4366		dev_info(&pf->pdev->dev, "HMC error interrupt\n");
4367		dev_info(&pf->pdev->dev, "HMC error info 0x%x, HMC error data 0x%x\n",
4368			 rd32(hw, I40E_PFHMC_ERRORINFO),
4369			 rd32(hw, I40E_PFHMC_ERRORDATA));
4370	}
4371
4372	if (icr0 & I40E_PFINT_ICR0_TIMESYNC_MASK) {
4373		u32 prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_0);
4374
4375		if (prttsyn_stat & I40E_PRTTSYN_STAT_0_EVENT0_MASK)
4376			schedule_work(&pf->ptp_extts0_work);
4377
4378		if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK)
4379			i40e_ptp_tx_hwtstamp(pf);
4380
4381		icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
4382	}
4383
4384	/* If a critical error is pending we have no choice but to reset the
4385	 * device.
4386	 * Report and mask out any remaining unexpected interrupts.
4387	 */
4388	icr0_remaining = icr0 & ena_mask;
4389	if (icr0_remaining) {
4390		dev_info(&pf->pdev->dev, "unhandled interrupt icr0=0x%08x\n",
4391			 icr0_remaining);
4392		if ((icr0_remaining & I40E_PFINT_ICR0_PE_CRITERR_MASK) ||
4393		    (icr0_remaining & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) ||
4394		    (icr0_remaining & I40E_PFINT_ICR0_ECC_ERR_MASK)) {
4395			dev_info(&pf->pdev->dev, "device will be reset\n");
4396			set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
4397			i40e_service_event_schedule(pf);
4398		}
4399		ena_mask &= ~icr0_remaining;
4400	}
4401	ret = IRQ_HANDLED;
4402
4403enable_intr:
4404	/* re-enable interrupt causes */
4405	wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
4406	if (!test_bit(__I40E_DOWN, pf->state) ||
4407	    test_bit(__I40E_RECOVERY_MODE, pf->state)) {
4408		i40e_service_event_schedule(pf);
4409		i40e_irq_dynamic_enable_icr0(pf);
4410	}
4411
4412	return ret;
4413}
4414
4415/**
4416 * i40e_clean_fdir_tx_irq - Reclaim resources after transmit completes
4417 * @tx_ring:  tx ring to clean
4418 * @budget:   how many cleans we're allowed
4419 *
4420 * Returns true if there's any budget left (e.g. the clean is finished)
4421 **/
4422static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget)
4423{
4424	struct i40e_vsi *vsi = tx_ring->vsi;
4425	u16 i = tx_ring->next_to_clean;
4426	struct i40e_tx_buffer *tx_buf;
4427	struct i40e_tx_desc *tx_desc;
4428
4429	tx_buf = &tx_ring->tx_bi[i];
4430	tx_desc = I40E_TX_DESC(tx_ring, i);
4431	i -= tx_ring->count;
4432
4433	do {
4434		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
4435
4436		/* if next_to_watch is not set then there is no work pending */
4437		if (!eop_desc)
4438			break;
4439
4440		/* prevent any other reads prior to eop_desc */
4441		smp_rmb();
4442
4443		/* if the descriptor isn't done, no work yet to do */
4444		if (!(eop_desc->cmd_type_offset_bsz &
4445		      cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
4446			break;
4447
4448		/* clear next_to_watch to prevent false hangs */
4449		tx_buf->next_to_watch = NULL;
4450
4451		tx_desc->buffer_addr = 0;
4452		tx_desc->cmd_type_offset_bsz = 0;
4453		/* move past filter desc */
4454		tx_buf++;
4455		tx_desc++;
4456		i++;
4457		if (unlikely(!i)) {
4458			i -= tx_ring->count;
4459			tx_buf = tx_ring->tx_bi;
4460			tx_desc = I40E_TX_DESC(tx_ring, 0);
4461		}
4462		/* unmap skb header data */
4463		dma_unmap_single(tx_ring->dev,
4464				 dma_unmap_addr(tx_buf, dma),
4465				 dma_unmap_len(tx_buf, len),
4466				 DMA_TO_DEVICE);
4467		if (tx_buf->tx_flags & I40E_TX_FLAGS_FD_SB)
4468			kfree(tx_buf->raw_buf);
4469
4470		tx_buf->raw_buf = NULL;
4471		tx_buf->tx_flags = 0;
4472		tx_buf->next_to_watch = NULL;
4473		dma_unmap_len_set(tx_buf, len, 0);
4474		tx_desc->buffer_addr = 0;
4475		tx_desc->cmd_type_offset_bsz = 0;
4476
4477		/* move us past the eop_desc for start of next FD desc */
4478		tx_buf++;
4479		tx_desc++;
4480		i++;
4481		if (unlikely(!i)) {
4482			i -= tx_ring->count;
4483			tx_buf = tx_ring->tx_bi;
4484			tx_desc = I40E_TX_DESC(tx_ring, 0);
4485		}
4486
4487		/* update budget accounting */
4488		budget--;
4489	} while (likely(budget));
4490
4491	i += tx_ring->count;
4492	tx_ring->next_to_clean = i;
4493
4494	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED)
4495		i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx);
4496
4497	return budget > 0;
4498}
4499
4500/**
4501 * i40e_fdir_clean_ring - Interrupt Handler for FDIR SB ring
4502 * @irq: interrupt number
4503 * @data: pointer to a q_vector
4504 **/
4505static irqreturn_t i40e_fdir_clean_ring(int irq, void *data)
4506{
4507	struct i40e_q_vector *q_vector = data;
4508	struct i40e_vsi *vsi;
4509
4510	if (!q_vector->tx.ring)
4511		return IRQ_HANDLED;
4512
4513	vsi = q_vector->tx.ring->vsi;
4514	i40e_clean_fdir_tx_irq(q_vector->tx.ring, vsi->work_limit);
4515
4516	return IRQ_HANDLED;
4517}
4518
4519/**
4520 * i40e_map_vector_to_qp - Assigns the queue pair to the vector
4521 * @vsi: the VSI being configured
4522 * @v_idx: vector index
4523 * @qp_idx: queue pair index
4524 **/
4525static void i40e_map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx)
4526{
4527	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
4528	struct i40e_ring *tx_ring = vsi->tx_rings[qp_idx];
4529	struct i40e_ring *rx_ring = vsi->rx_rings[qp_idx];
4530
4531	tx_ring->q_vector = q_vector;
4532	tx_ring->next = q_vector->tx.ring;
4533	q_vector->tx.ring = tx_ring;
4534	q_vector->tx.count++;
4535
4536	/* Place XDP Tx ring in the same q_vector ring list as regular Tx */
4537	if (i40e_enabled_xdp_vsi(vsi)) {
4538		struct i40e_ring *xdp_ring = vsi->xdp_rings[qp_idx];
4539
4540		xdp_ring->q_vector = q_vector;
4541		xdp_ring->next = q_vector->tx.ring;
4542		q_vector->tx.ring = xdp_ring;
4543		q_vector->tx.count++;
4544	}
4545
4546	rx_ring->q_vector = q_vector;
4547	rx_ring->next = q_vector->rx.ring;
4548	q_vector->rx.ring = rx_ring;
4549	q_vector->rx.count++;
4550}
4551
4552/**
4553 * i40e_vsi_map_rings_to_vectors - Maps descriptor rings to vectors
4554 * @vsi: the VSI being configured
4555 *
4556 * This function maps descriptor rings to the queue-specific vectors
4557 * we were allotted through the MSI-X enabling code.  Ideally, we'd have
4558 * one vector per queue pair, but on a constrained vector budget, we
4559 * group the queue pairs as "efficiently" as possible.
4560 **/
4561static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi)
4562{
4563	int qp_remaining = vsi->num_queue_pairs;
4564	int q_vectors = vsi->num_q_vectors;
4565	int num_ringpairs;
4566	int v_start = 0;
4567	int qp_idx = 0;
4568
4569	/* If we don't have enough vectors for a 1-to-1 mapping, we'll have to
4570	 * group them so there are multiple queues per vector.
4571	 * It is also important to go through all the vectors available to be
4572	 * sure that if we don't use all the vectors, that the remaining vectors
4573	 * are cleared. This is especially important when decreasing the
4574	 * number of queues in use.
4575	 */
4576	for (; v_start < q_vectors; v_start++) {
4577		struct i40e_q_vector *q_vector = vsi->q_vectors[v_start];
4578
4579		num_ringpairs = DIV_ROUND_UP(qp_remaining, q_vectors - v_start);
4580
4581		q_vector->num_ringpairs = num_ringpairs;
4582		q_vector->reg_idx = q_vector->v_idx + vsi->base_vector - 1;
4583
4584		q_vector->rx.count = 0;
4585		q_vector->tx.count = 0;
4586		q_vector->rx.ring = NULL;
4587		q_vector->tx.ring = NULL;
4588
4589		while (num_ringpairs--) {
4590			i40e_map_vector_to_qp(vsi, v_start, qp_idx);
4591			qp_idx++;
4592			qp_remaining--;
4593		}
4594	}
4595}
4596
4597/**
4598 * i40e_vsi_request_irq - Request IRQ from the OS
4599 * @vsi: the VSI being configured
4600 * @basename: name for the vector
4601 **/
4602static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename)
4603{
4604	struct i40e_pf *pf = vsi->back;
4605	int err;
4606
4607	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
4608		err = i40e_vsi_request_irq_msix(vsi, basename);
4609	else if (pf->flags & I40E_FLAG_MSI_ENABLED)
4610		err = request_irq(pf->pdev->irq, i40e_intr, 0,
4611				  pf->int_name, pf);
4612	else
4613		err = request_irq(pf->pdev->irq, i40e_intr, IRQF_SHARED,
4614				  pf->int_name, pf);
4615
4616	if (err)
4617		dev_info(&pf->pdev->dev, "request_irq failed, Error %d\n", err);
4618
4619	return err;
4620}
4621
4622#ifdef CONFIG_NET_POLL_CONTROLLER
4623/**
4624 * i40e_netpoll - A Polling 'interrupt' handler
4625 * @netdev: network interface device structure
4626 *
4627 * This is used by netconsole to send skbs without having to re-enable
4628 * interrupts.  It's not called while the normal interrupt routine is executing.
4629 **/
4630static void i40e_netpoll(struct net_device *netdev)
4631{
4632	struct i40e_netdev_priv *np = netdev_priv(netdev);
4633	struct i40e_vsi *vsi = np->vsi;
4634	struct i40e_pf *pf = vsi->back;
4635	int i;
4636
4637	/* if interface is down do nothing */
4638	if (test_bit(__I40E_VSI_DOWN, vsi->state))
4639		return;
4640
4641	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
4642		for (i = 0; i < vsi->num_q_vectors; i++)
4643			i40e_msix_clean_rings(0, vsi->q_vectors[i]);
4644	} else {
4645		i40e_intr(pf->pdev->irq, netdev);
4646	}
4647}
4648#endif
4649
4650#define I40E_QTX_ENA_WAIT_COUNT 50
4651
4652/**
4653 * i40e_pf_txq_wait - Wait for a PF's Tx queue to be enabled or disabled
4654 * @pf: the PF being configured
4655 * @pf_q: the PF queue
4656 * @enable: enable or disable state of the queue
4657 *
4658 * This routine will wait for the given Tx queue of the PF to reach the
4659 * enabled or disabled state.
4660 * Returns -ETIMEDOUT in case of failing to reach the requested state after
4661 * multiple retries; else will return 0 in case of success.
4662 **/
4663static int i40e_pf_txq_wait(struct i40e_pf *pf, int pf_q, bool enable)
4664{
4665	int i;
4666	u32 tx_reg;
4667
4668	for (i = 0; i < I40E_QUEUE_WAIT_RETRY_LIMIT; i++) {
4669		tx_reg = rd32(&pf->hw, I40E_QTX_ENA(pf_q));
4670		if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
4671			break;
4672
4673		usleep_range(10, 20);
4674	}
4675	if (i >= I40E_QUEUE_WAIT_RETRY_LIMIT)
4676		return -ETIMEDOUT;
4677
4678	return 0;
4679}
4680
4681/**
4682 * i40e_control_tx_q - Start or stop a particular Tx queue
4683 * @pf: the PF structure
4684 * @pf_q: the PF queue to configure
4685 * @enable: start or stop the queue
4686 *
4687 * This function enables or disables a single queue. Note that any delay
4688 * required after the operation is expected to be handled by the caller of
4689 * this function.
4690 **/
4691static void i40e_control_tx_q(struct i40e_pf *pf, int pf_q, bool enable)
4692{
4693	struct i40e_hw *hw = &pf->hw;
4694	u32 tx_reg;
4695	int i;
4696
4697	/* warn the TX unit of coming changes */
4698	i40e_pre_tx_queue_cfg(&pf->hw, pf_q, enable);
4699	if (!enable)
4700		usleep_range(10, 20);
4701
4702	for (i = 0; i < I40E_QTX_ENA_WAIT_COUNT; i++) {
4703		tx_reg = rd32(hw, I40E_QTX_ENA(pf_q));
4704		if (((tx_reg >> I40E_QTX_ENA_QENA_REQ_SHIFT) & 1) ==
4705		    ((tx_reg >> I40E_QTX_ENA_QENA_STAT_SHIFT) & 1))
4706			break;
4707		usleep_range(1000, 2000);
4708	}
4709
4710	/* Skip if the queue is already in the requested state */
4711	if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
4712		return;
4713
4714	/* turn on/off the queue */
4715	if (enable) {
4716		wr32(hw, I40E_QTX_HEAD(pf_q), 0);
4717		tx_reg |= I40E_QTX_ENA_QENA_REQ_MASK;
4718	} else {
4719		tx_reg &= ~I40E_QTX_ENA_QENA_REQ_MASK;
4720	}
4721
4722	wr32(hw, I40E_QTX_ENA(pf_q), tx_reg);
4723}
4724
4725/**
4726 * i40e_control_wait_tx_q - Start/stop Tx queue and wait for completion
4727 * @seid: VSI SEID
4728 * @pf: the PF structure
4729 * @pf_q: the PF queue to configure
4730 * @is_xdp: true if the queue is used for XDP
4731 * @enable: start or stop the queue
4732 **/
4733int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q,
4734			   bool is_xdp, bool enable)
4735{
4736	int ret;
4737
4738	i40e_control_tx_q(pf, pf_q, enable);
4739
4740	/* wait for the change to finish */
4741	ret = i40e_pf_txq_wait(pf, pf_q, enable);
4742	if (ret) {
4743		dev_info(&pf->pdev->dev,
4744			 "VSI seid %d %sTx ring %d %sable timeout\n",
4745			 seid, (is_xdp ? "XDP " : ""), pf_q,
4746			 (enable ? "en" : "dis"));
4747	}
4748
4749	return ret;
4750}
4751
4752/**
4753 * i40e_vsi_enable_tx - Start a VSI's rings
4754 * @vsi: the VSI being configured
4755 **/
4756static int i40e_vsi_enable_tx(struct i40e_vsi *vsi)
4757{
4758	struct i40e_pf *pf = vsi->back;
4759	int i, pf_q, ret = 0;
4760
4761	pf_q = vsi->base_queue;
4762	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
4763		ret = i40e_control_wait_tx_q(vsi->seid, pf,
4764					     pf_q,
4765					     false /*is xdp*/, true);
4766		if (ret)
4767			break;
4768
4769		if (!i40e_enabled_xdp_vsi(vsi))
4770			continue;
4771
4772		ret = i40e_control_wait_tx_q(vsi->seid, pf,
4773					     pf_q + vsi->alloc_queue_pairs,
4774					     true /*is xdp*/, true);
4775		if (ret)
4776			break;
4777	}
4778	return ret;
4779}
4780
4781/**
4782 * i40e_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
4783 * @pf: the PF being configured
4784 * @pf_q: the PF queue
4785 * @enable: enable or disable state of the queue
4786 *
4787 * This routine will wait for the given Rx queue of the PF to reach the
4788 * enabled or disabled state.
4789 * Returns -ETIMEDOUT in case of failing to reach the requested state after
4790 * multiple retries; else will return 0 in case of success.
4791 **/
4792static int i40e_pf_rxq_wait(struct i40e_pf *pf, int pf_q, bool enable)
4793{
4794	int i;
4795	u32 rx_reg;
4796
4797	for (i = 0; i < I40E_QUEUE_WAIT_RETRY_LIMIT; i++) {
4798		rx_reg = rd32(&pf->hw, I40E_QRX_ENA(pf_q));
4799		if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
4800			break;
4801
4802		usleep_range(10, 20);
4803	}
4804	if (i >= I40E_QUEUE_WAIT_RETRY_LIMIT)
4805		return -ETIMEDOUT;
4806
4807	return 0;
4808}
4809
4810/**
4811 * i40e_control_rx_q - Start or stop a particular Rx queue
4812 * @pf: the PF structure
4813 * @pf_q: the PF queue to configure
4814 * @enable: start or stop the queue
4815 *
4816 * This function enables or disables a single queue. Note that
4817 * any delay required after the operation is expected to be
4818 * handled by the caller of this function.
4819 **/
4820static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
4821{
4822	struct i40e_hw *hw = &pf->hw;
4823	u32 rx_reg;
4824	int i;
4825
4826	for (i = 0; i < I40E_QTX_ENA_WAIT_COUNT; i++) {
4827		rx_reg = rd32(hw, I40E_QRX_ENA(pf_q));
4828		if (((rx_reg >> I40E_QRX_ENA_QENA_REQ_SHIFT) & 1) ==
4829		    ((rx_reg >> I40E_QRX_ENA_QENA_STAT_SHIFT) & 1))
4830			break;
4831		usleep_range(1000, 2000);
4832	}
4833
4834	/* Skip if the queue is already in the requested state */
4835	if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
4836		return;
4837
4838	/* turn on/off the queue */
4839	if (enable)
4840		rx_reg |= I40E_QRX_ENA_QENA_REQ_MASK;
4841	else
4842		rx_reg &= ~I40E_QRX_ENA_QENA_REQ_MASK;
4843
4844	wr32(hw, I40E_QRX_ENA(pf_q), rx_reg);
4845}
4846
4847/**
4848 * i40e_control_wait_rx_q
4849 * @pf: the PF structure
4850 * @pf_q: queue being configured
4851 * @enable: start or stop the rings
4852 *
4853 * This function enables or disables a single queue along with waiting
4854 * for the change to finish. The caller of this function should handle
4855 * the delays needed in the case of disabling queues.
4856 **/
4857int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
4858{
4859	int ret = 0;
4860
4861	i40e_control_rx_q(pf, pf_q, enable);
4862
4863	/* wait for the change to finish */
4864	ret = i40e_pf_rxq_wait(pf, pf_q, enable);
4865	if (ret)
4866		return ret;
4867
4868	return ret;
4869}
4870
4871/**
4872 * i40e_vsi_enable_rx - Start a VSI's rings
4873 * @vsi: the VSI being configured
4874 **/
4875static int i40e_vsi_enable_rx(struct i40e_vsi *vsi)
4876{
4877	struct i40e_pf *pf = vsi->back;
4878	int i, pf_q, ret = 0;
4879
4880	pf_q = vsi->base_queue;
4881	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
4882		ret = i40e_control_wait_rx_q(pf, pf_q, true);
4883		if (ret) {
4884			dev_info(&pf->pdev->dev,
4885				 "VSI seid %d Rx ring %d enable timeout\n",
4886				 vsi->seid, pf_q);
4887			break;
4888		}
4889	}
4890
4891	return ret;
4892}
4893
4894/**
4895 * i40e_vsi_start_rings - Start a VSI's rings
4896 * @vsi: the VSI being configured
4897 **/
4898int i40e_vsi_start_rings(struct i40e_vsi *vsi)
4899{
4900	int ret = 0;
4901
4902	/* do rx first for enable and last for disable */
4903	ret = i40e_vsi_enable_rx(vsi);
4904	if (ret)
4905		return ret;
4906	ret = i40e_vsi_enable_tx(vsi);
4907
4908	return ret;
4909}
4910
4911#define I40E_DISABLE_TX_GAP_MSEC	50
4912
4913/**
4914 * i40e_vsi_stop_rings - Stop a VSI's rings
4915 * @vsi: the VSI being configured
4916 **/
4917void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
4918{
4919	struct i40e_pf *pf = vsi->back;
4920	int pf_q, err, q_end;
4921
4922	/* When port TX is suspended, don't wait */
4923	if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state))
4924		return i40e_vsi_stop_rings_no_wait(vsi);
4925
4926	q_end = vsi->base_queue + vsi->num_queue_pairs;
4927	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
4928		i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false);
4929
4930	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) {
4931		err = i40e_control_wait_rx_q(pf, pf_q, false);
4932		if (err)
4933			dev_info(&pf->pdev->dev,
4934				 "VSI seid %d Rx ring %d disable timeout\n",
4935				 vsi->seid, pf_q);
4936	}
4937
4938	msleep(I40E_DISABLE_TX_GAP_MSEC);
4939	pf_q = vsi->base_queue;
4940	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
4941		wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0);
4942
4943	i40e_vsi_wait_queues_disabled(vsi);
4944}
4945
4946/**
4947 * i40e_vsi_stop_rings_no_wait - Stop a VSI's rings and do not delay
4948 * @vsi: the VSI being shutdown
4949 *
4950 * This function stops all the rings for a VSI but does not delay to verify
4951 * that rings have been disabled. It is expected that the caller is shutting
4952 * down multiple VSIs at once and will delay together for all the VSIs after
4953 * initiating the shutdown. This is particularly useful for shutting down lots
4954 * of VFs together. Otherwise, a large delay can be incurred while configuring
4955 * each VSI in serial.
4956 **/
4957void i40e_vsi_stop_rings_no_wait(struct i40e_vsi *vsi)
4958{
4959	struct i40e_pf *pf = vsi->back;
4960	int i, pf_q;
4961
4962	pf_q = vsi->base_queue;
4963	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
4964		i40e_control_tx_q(pf, pf_q, false);
4965		i40e_control_rx_q(pf, pf_q, false);
4966	}
4967}
4968
4969/**
4970 * i40e_vsi_free_irq - Free the irq association with the OS
4971 * @vsi: the VSI being configured
4972 **/
4973static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
4974{
4975	struct i40e_pf *pf = vsi->back;
4976	struct i40e_hw *hw = &pf->hw;
4977	int base = vsi->base_vector;
4978	u32 val, qp;
4979	int i;
4980
4981	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
4982		if (!vsi->q_vectors)
4983			return;
4984
4985		if (!vsi->irqs_ready)
4986			return;
4987
4988		vsi->irqs_ready = false;
4989		for (i = 0; i < vsi->num_q_vectors; i++) {
4990			int irq_num;
4991			u16 vector;
4992
4993			vector = i + base;
4994			irq_num = pf->msix_entries[vector].vector;
4995
4996			/* free only the irqs that were actually requested */
4997			if (!vsi->q_vectors[i] ||
4998			    !vsi->q_vectors[i]->num_ringpairs)
4999				continue;
5000
5001			/* clear the affinity notifier in the IRQ descriptor */
5002			irq_set_affinity_notifier(irq_num, NULL);
5003			/* remove our suggested affinity mask for this IRQ */
5004			irq_update_affinity_hint(irq_num, NULL);
5005			free_irq(irq_num, vsi->q_vectors[i]);
5006
5007			/* Tear down the interrupt queue link list
5008			 *
5009			 * We know that they come in pairs and always
5010			 * the Rx first, then the Tx.  To clear the
5011			 * link list, stick the EOL value into the
5012			 * next_q field of the registers.
5013			 */
5014			val = rd32(hw, I40E_PFINT_LNKLSTN(vector - 1));
5015			qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK)
5016				>> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
5017			val |= I40E_QUEUE_END_OF_LIST
5018				<< I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
5019			wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), val);
5020
5021			while (qp != I40E_QUEUE_END_OF_LIST) {
5022				u32 next;
5023
5024				val = rd32(hw, I40E_QINT_RQCTL(qp));
5025
5026				val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK  |
5027					 I40E_QINT_RQCTL_MSIX0_INDX_MASK |
5028					 I40E_QINT_RQCTL_CAUSE_ENA_MASK  |
5029					 I40E_QINT_RQCTL_INTEVENT_MASK);
5030
5031				val |= (I40E_QINT_RQCTL_ITR_INDX_MASK |
5032					 I40E_QINT_RQCTL_NEXTQ_INDX_MASK);
5033
5034				wr32(hw, I40E_QINT_RQCTL(qp), val);
5035
5036				val = rd32(hw, I40E_QINT_TQCTL(qp));
5037
5038				next = (val & I40E_QINT_TQCTL_NEXTQ_INDX_MASK)
5039					>> I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT;
5040
5041				val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK  |
5042					 I40E_QINT_TQCTL_MSIX0_INDX_MASK |
5043					 I40E_QINT_TQCTL_CAUSE_ENA_MASK  |
5044					 I40E_QINT_TQCTL_INTEVENT_MASK);
5045
5046				val |= (I40E_QINT_TQCTL_ITR_INDX_MASK |
5047					 I40E_QINT_TQCTL_NEXTQ_INDX_MASK);
5048
5049				wr32(hw, I40E_QINT_TQCTL(qp), val);
5050				qp = next;
5051			}
5052		}
5053	} else {
5054		free_irq(pf->pdev->irq, pf);
5055
5056		val = rd32(hw, I40E_PFINT_LNKLST0);
5057		qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK)
5058			>> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
5059		val |= I40E_QUEUE_END_OF_LIST
5060			<< I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT;
5061		wr32(hw, I40E_PFINT_LNKLST0, val);
5062
5063		val = rd32(hw, I40E_QINT_RQCTL(qp));
5064		val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK  |
5065			 I40E_QINT_RQCTL_MSIX0_INDX_MASK |
5066			 I40E_QINT_RQCTL_CAUSE_ENA_MASK  |
5067			 I40E_QINT_RQCTL_INTEVENT_MASK);
5068
5069		val |= (I40E_QINT_RQCTL_ITR_INDX_MASK |
5070			I40E_QINT_RQCTL_NEXTQ_INDX_MASK);
5071
5072		wr32(hw, I40E_QINT_RQCTL(qp), val);
5073
5074		val = rd32(hw, I40E_QINT_TQCTL(qp));
5075
5076		val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK  |
5077			 I40E_QINT_TQCTL_MSIX0_INDX_MASK |
5078			 I40E_QINT_TQCTL_CAUSE_ENA_MASK  |
5079			 I40E_QINT_TQCTL_INTEVENT_MASK);
5080
5081		val |= (I40E_QINT_TQCTL_ITR_INDX_MASK |
5082			I40E_QINT_TQCTL_NEXTQ_INDX_MASK);
5083
5084		wr32(hw, I40E_QINT_TQCTL(qp), val);
5085	}
5086}
5087
5088/**
5089 * i40e_free_q_vector - Free memory allocated for specific interrupt vector
5090 * @vsi: the VSI being configured
5091 * @v_idx: Index of vector to be freed
5092 *
5093 * This function frees the memory allocated to the q_vector.  In addition if
5094 * NAPI is enabled it will delete any references to the NAPI struct prior
5095 * to freeing the q_vector.
5096 **/
5097static void i40e_free_q_vector(struct i40e_vsi *vsi, int v_idx)
5098{
5099	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
5100	struct i40e_ring *ring;
5101
5102	if (!q_vector)
5103		return;
5104
5105	/* disassociate q_vector from rings */
5106	i40e_for_each_ring(ring, q_vector->tx)
5107		ring->q_vector = NULL;
5108
5109	i40e_for_each_ring(ring, q_vector->rx)
5110		ring->q_vector = NULL;
5111
5112	/* only VSI w/ an associated netdev is set up w/ NAPI */
5113	if (vsi->netdev)
5114		netif_napi_del(&q_vector->napi);
5115
5116	vsi->q_vectors[v_idx] = NULL;
5117
5118	kfree_rcu(q_vector, rcu);
5119}
5120
5121/**
5122 * i40e_vsi_free_q_vectors - Free memory allocated for interrupt vectors
5123 * @vsi: the VSI being un-configured
5124 *
5125 * This frees the memory allocated to the q_vectors and
5126 * deletes references to the NAPI struct.
5127 **/
5128static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi)
5129{
5130	int v_idx;
5131
5132	for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++)
5133		i40e_free_q_vector(vsi, v_idx);
5134}
5135
5136/**
5137 * i40e_reset_interrupt_capability - Disable interrupt setup in OS
5138 * @pf: board private structure
5139 **/
5140static void i40e_reset_interrupt_capability(struct i40e_pf *pf)
5141{
5142	/* If we're in Legacy mode, the interrupt was cleaned in vsi_close */
5143	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
5144		pci_disable_msix(pf->pdev);
5145		kfree(pf->msix_entries);
5146		pf->msix_entries = NULL;
5147		kfree(pf->irq_pile);
5148		pf->irq_pile = NULL;
5149	} else if (pf->flags & I40E_FLAG_MSI_ENABLED) {
5150		pci_disable_msi(pf->pdev);
5151	}
5152	pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
5153}
5154
5155/**
5156 * i40e_clear_interrupt_scheme - Clear the current interrupt scheme settings
5157 * @pf: board private structure
5158 *
5159 * We go through and clear interrupt specific resources and reset the structure
5160 * to pre-load conditions
5161 **/
5162static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
5163{
5164	int i;
5165
5166	if (test_bit(__I40E_MISC_IRQ_REQUESTED, pf->state))
5167		i40e_free_misc_vector(pf);
5168
5169	i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
5170		      I40E_IWARP_IRQ_PILE_ID);
5171
5172	i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
5173	for (i = 0; i < pf->num_alloc_vsi; i++)
5174		if (pf->vsi[i])
5175			i40e_vsi_free_q_vectors(pf->vsi[i]);
5176	i40e_reset_interrupt_capability(pf);
5177}
5178
5179/**
5180 * i40e_napi_enable_all - Enable NAPI for all q_vectors in the VSI
5181 * @vsi: the VSI being configured
5182 **/
5183static void i40e_napi_enable_all(struct i40e_vsi *vsi)
5184{
5185	int q_idx;
5186
5187	if (!vsi->netdev)
5188		return;
5189
5190	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
5191		struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
5192
5193		if (q_vector->rx.ring || q_vector->tx.ring)
5194			napi_enable(&q_vector->napi);
5195	}
5196}
5197
5198/**
5199 * i40e_napi_disable_all - Disable NAPI for all q_vectors in the VSI
5200 * @vsi: the VSI being configured
5201 **/
5202static void i40e_napi_disable_all(struct i40e_vsi *vsi)
5203{
5204	int q_idx;
5205
5206	if (!vsi->netdev)
5207		return;
5208
5209	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
5210		struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
5211
5212		if (q_vector->rx.ring || q_vector->tx.ring)
5213			napi_disable(&q_vector->napi);
5214	}
5215}
5216
5217/**
5218 * i40e_vsi_close - Shut down a VSI
5219 * @vsi: the vsi to be quelled
5220 **/
5221static void i40e_vsi_close(struct i40e_vsi *vsi)
5222{
5223	struct i40e_pf *pf = vsi->back;
5224	if (!test_and_set_bit(__I40E_VSI_DOWN, vsi->state))
5225		i40e_down(vsi);
5226	i40e_vsi_free_irq(vsi);
5227	i40e_vsi_free_tx_resources(vsi);
5228	i40e_vsi_free_rx_resources(vsi);
5229	vsi->current_netdev_flags = 0;
5230	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
5231	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
5232		set_bit(__I40E_CLIENT_RESET, pf->state);
5233}
5234
5235/**
5236 * i40e_quiesce_vsi - Pause a given VSI
5237 * @vsi: the VSI being paused
5238 **/
5239static void i40e_quiesce_vsi(struct i40e_vsi *vsi)
5240{
5241	if (test_bit(__I40E_VSI_DOWN, vsi->state))
5242		return;
5243
5244	set_bit(__I40E_VSI_NEEDS_RESTART, vsi->state);
5245	if (vsi->netdev && netif_running(vsi->netdev))
5246		vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
5247	else
5248		i40e_vsi_close(vsi);
5249}
5250
5251/**
5252 * i40e_unquiesce_vsi - Resume a given VSI
5253 * @vsi: the VSI being resumed
5254 **/
5255static void i40e_unquiesce_vsi(struct i40e_vsi *vsi)
5256{
5257	if (!test_and_clear_bit(__I40E_VSI_NEEDS_RESTART, vsi->state))
5258		return;
5259
5260	if (vsi->netdev && netif_running(vsi->netdev))
5261		vsi->netdev->netdev_ops->ndo_open(vsi->netdev);
5262	else
5263		i40e_vsi_open(vsi);   /* this clears the DOWN bit */
5264}
5265
5266/**
5267 * i40e_pf_quiesce_all_vsi - Pause all VSIs on a PF
5268 * @pf: the PF
5269 **/
5270static void i40e_pf_quiesce_all_vsi(struct i40e_pf *pf)
5271{
5272	int v;
5273
5274	for (v = 0; v < pf->num_alloc_vsi; v++) {
5275		if (pf->vsi[v])
5276			i40e_quiesce_vsi(pf->vsi[v]);
5277	}
5278}
5279
5280/**
5281 * i40e_pf_unquiesce_all_vsi - Resume all VSIs on a PF
5282 * @pf: the PF
5283 **/
5284static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf)
5285{
5286	int v;
5287
5288	for (v = 0; v < pf->num_alloc_vsi; v++) {
5289		if (pf->vsi[v])
5290			i40e_unquiesce_vsi(pf->vsi[v]);
5291	}
5292}
5293
5294/**
5295 * i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled
5296 * @vsi: the VSI being configured
5297 *
5298 * Wait until all queues on a given VSI have been disabled.
5299 **/
5300int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi)
5301{
5302	struct i40e_pf *pf = vsi->back;
5303	int i, pf_q, ret;
5304
5305	pf_q = vsi->base_queue;
5306	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
5307		/* Check and wait for the Tx queue */
5308		ret = i40e_pf_txq_wait(pf, pf_q, false);
5309		if (ret) {
5310			dev_info(&pf->pdev->dev,
5311				 "VSI seid %d Tx ring %d disable timeout\n",
5312				 vsi->seid, pf_q);
5313			return ret;
5314		}
5315
5316		if (!i40e_enabled_xdp_vsi(vsi))
5317			goto wait_rx;
5318
5319		/* Check and wait for the XDP Tx queue */
5320		ret = i40e_pf_txq_wait(pf, pf_q + vsi->alloc_queue_pairs,
5321				       false);
5322		if (ret) {
5323			dev_info(&pf->pdev->dev,
5324				 "VSI seid %d XDP Tx ring %d disable timeout\n",
5325				 vsi->seid, pf_q);
5326			return ret;
5327		}
5328wait_rx:
5329		/* Check and wait for the Rx queue */
5330		ret = i40e_pf_rxq_wait(pf, pf_q, false);
5331		if (ret) {
5332			dev_info(&pf->pdev->dev,
5333				 "VSI seid %d Rx ring %d disable timeout\n",
5334				 vsi->seid, pf_q);
5335			return ret;
5336		}
5337	}
5338
5339	return 0;
5340}
5341
5342#ifdef CONFIG_I40E_DCB
5343/**
5344 * i40e_pf_wait_queues_disabled - Wait for all queues of PF VSIs to be disabled
5345 * @pf: the PF
5346 *
5347 * This function waits for the queues to be in disabled state for all the
5348 * VSIs that are managed by this PF.
5349 **/
5350static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
5351{
5352	int v, ret = 0;
5353
5354	for (v = 0; v < pf->num_alloc_vsi; v++) {
5355		if (pf->vsi[v]) {
5356			ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]);
5357			if (ret)
5358				break;
5359		}
5360	}
5361
5362	return ret;
5363}
5364
5365#endif
5366
5367/**
5368 * i40e_get_iscsi_tc_map - Return TC map for iSCSI APP
5369 * @pf: pointer to PF
5370 *
5371 * Get TC map for ISCSI PF type that will include iSCSI TC
5372 * and LAN TC.
5373 **/
5374static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf)
5375{
5376	struct i40e_dcb_app_priority_table app;
5377	struct i40e_hw *hw = &pf->hw;
5378	u8 enabled_tc = 1; /* TC0 is always enabled */
5379	u8 tc, i;
5380	/* Get the iSCSI APP TLV */
5381	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
5382
5383	for (i = 0; i < dcbcfg->numapps; i++) {
5384		app = dcbcfg->app[i];
5385		if (app.selector == I40E_APP_SEL_TCPIP &&
5386		    app.protocolid == I40E_APP_PROTOID_ISCSI) {
5387			tc = dcbcfg->etscfg.prioritytable[app.priority];
5388			enabled_tc |= BIT(tc);
5389			break;
5390		}
5391	}
5392
5393	return enabled_tc;
5394}
5395
5396/**
5397 * i40e_dcb_get_num_tc -  Get the number of TCs from DCBx config
5398 * @dcbcfg: the corresponding DCBx configuration structure
5399 *
5400 * Return the number of TCs from given DCBx configuration
5401 **/
5402static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
5403{
5404	int i, tc_unused = 0;
5405	u8 num_tc = 0;
5406	u8 ret = 0;
5407
5408	/* Scan the ETS Config Priority Table to find
5409	 * traffic class enabled for a given priority
5410	 * and create a bitmask of enabled TCs
5411	 */
5412	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
5413		num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]);
5414
5415	/* Now scan the bitmask to check for
5416	 * contiguous TCs starting with TC0
5417	 */
5418	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
5419		if (num_tc & BIT(i)) {
5420			if (!tc_unused) {
5421				ret++;
5422			} else {
5423				pr_err("Non-contiguous TC - Disabling DCB\n");
5424				return 1;
5425			}
5426		} else {
5427			tc_unused = 1;
5428		}
5429	}
5430
5431	/* There is always at least TC0 */
5432	if (!ret)
5433		ret = 1;
5434
5435	return ret;
5436}
5437
5438/**
5439 * i40e_dcb_get_enabled_tc - Get enabled traffic classes
5440 * @dcbcfg: the corresponding DCBx configuration structure
5441 *
5442 * Query the current DCB configuration and return the number of
5443 * traffic classes enabled from the given DCBX config
5444 **/
5445static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg)
5446{
5447	u8 num_tc = i40e_dcb_get_num_tc(dcbcfg);
5448	u8 enabled_tc = 1;
5449	u8 i;
5450
5451	for (i = 0; i < num_tc; i++)
5452		enabled_tc |= BIT(i);
5453
5454	return enabled_tc;
5455}
5456
5457/**
5458 * i40e_mqprio_get_enabled_tc - Get enabled traffic classes
5459 * @pf: PF being queried
5460 *
5461 * Query the current MQPRIO configuration and return the number of
5462 * traffic classes enabled.
5463 **/
5464static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf)
5465{
5466	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
5467	u8 num_tc = vsi->mqprio_qopt.qopt.num_tc;
5468	u8 enabled_tc = 1, i;
5469
5470	for (i = 1; i < num_tc; i++)
5471		enabled_tc |= BIT(i);
5472	return enabled_tc;
5473}
5474
5475/**
5476 * i40e_pf_get_num_tc - Get enabled traffic classes for PF
5477 * @pf: PF being queried
5478 *
5479 * Return number of traffic classes enabled for the given PF
5480 **/
5481static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
5482{
5483	struct i40e_hw *hw = &pf->hw;
5484	u8 i, enabled_tc = 1;
5485	u8 num_tc = 0;
5486	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
5487
5488	if (i40e_is_tc_mqprio_enabled(pf))
5489		return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
5490
5491	/* If neither MQPRIO nor DCB is enabled, then always use single TC */
5492	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
5493		return 1;
5494
5495	/* SFP mode will be enabled for all TCs on port */
5496	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
5497		return i40e_dcb_get_num_tc(dcbcfg);
5498
5499	/* MFP mode return count of enabled TCs for this PF */
5500	if (pf->hw.func_caps.iscsi)
5501		enabled_tc =  i40e_get_iscsi_tc_map(pf);
5502	else
5503		return 1; /* Only TC0 */
5504
5505	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
5506		if (enabled_tc & BIT(i))
5507			num_tc++;
5508	}
5509	return num_tc;
5510}
5511
5512/**
5513 * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes
5514 * @pf: PF being queried
5515 *
5516 * Return a bitmap for enabled traffic classes for this PF.
5517 **/
5518static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
5519{
5520	if (i40e_is_tc_mqprio_enabled(pf))
5521		return i40e_mqprio_get_enabled_tc(pf);
5522
5523	/* If neither MQPRIO nor DCB is enabled for this PF then just return
5524	 * default TC
5525	 */
5526	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
5527		return I40E_DEFAULT_TRAFFIC_CLASS;
5528
5529	/* SFP mode we want PF to be enabled for all TCs */
5530	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
5531		return i40e_dcb_get_enabled_tc(&pf->hw.local_dcbx_config);
5532
5533	/* MFP enabled and iSCSI PF type */
5534	if (pf->hw.func_caps.iscsi)
5535		return i40e_get_iscsi_tc_map(pf);
5536	else
5537		return I40E_DEFAULT_TRAFFIC_CLASS;
5538}
5539
5540/**
5541 * i40e_vsi_get_bw_info - Query VSI BW Information
5542 * @vsi: the VSI being queried
5543 *
5544 * Returns 0 on success, negative value on failure
5545 **/
5546static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
5547{
5548	struct i40e_aqc_query_vsi_ets_sla_config_resp bw_ets_config = {0};
5549	struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
5550	struct i40e_pf *pf = vsi->back;
5551	struct i40e_hw *hw = &pf->hw;
5552	u32 tc_bw_max;
5553	int ret;
5554	int i;
5555
5556	/* Get the VSI level BW configuration */
5557	ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL);
5558	if (ret) {
5559		dev_info(&pf->pdev->dev,
5560			 "couldn't get PF vsi bw config, err %pe aq_err %s\n",
5561			 ERR_PTR(ret),
5562			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
5563		return -EINVAL;
5564	}
5565
5566	/* Get the VSI level BW configuration per TC */
5567	ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid, &bw_ets_config,
5568					       NULL);
5569	if (ret) {
5570		dev_info(&pf->pdev->dev,
5571			 "couldn't get PF vsi ets bw config, err %pe aq_err %s\n",
5572			 ERR_PTR(ret),
5573			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
5574		return -EINVAL;
5575	}
5576
5577	if (bw_config.tc_valid_bits != bw_ets_config.tc_valid_bits) {
5578		dev_info(&pf->pdev->dev,
5579			 "Enabled TCs mismatch from querying VSI BW info 0x%08x 0x%08x\n",
5580			 bw_config.tc_valid_bits,
5581			 bw_ets_config.tc_valid_bits);
5582		/* Still continuing */
5583	}
5584
5585	vsi->bw_limit = le16_to_cpu(bw_config.port_bw_limit);
5586	vsi->bw_max_quanta = bw_config.max_bw;
5587	tc_bw_max = le16_to_cpu(bw_ets_config.tc_bw_max[0]) |
5588		    (le16_to_cpu(bw_ets_config.tc_bw_max[1]) << 16);
5589	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
5590		vsi->bw_ets_share_credits[i] = bw_ets_config.share_credits[i];
5591		vsi->bw_ets_limit_credits[i] =
5592					le16_to_cpu(bw_ets_config.credits[i]);
5593		/* 3 bits out of 4 for each TC */
5594		vsi->bw_ets_max_quanta[i] = (u8)((tc_bw_max >> (i*4)) & 0x7);
5595	}
5596
5597	return 0;
5598}
5599
5600/**
5601 * i40e_vsi_configure_bw_alloc - Configure VSI BW allocation per TC
5602 * @vsi: the VSI being configured
5603 * @enabled_tc: TC bitmap
5604 * @bw_share: BW shared credits per TC
5605 *
5606 * Returns 0 on success, negative value on failure
5607 **/
5608static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
5609				       u8 *bw_share)
5610{
5611	struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
5612	struct i40e_pf *pf = vsi->back;
5613	int ret;
5614	int i;
5615
5616	/* There is no need to reset BW when mqprio mode is on.  */
5617	if (i40e_is_tc_mqprio_enabled(pf))
5618		return 0;
5619	if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
5620		ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
5621		if (ret)
5622			dev_info(&pf->pdev->dev,
5623				 "Failed to reset tx rate for vsi->seid %u\n",
5624				 vsi->seid);
5625		return ret;
5626	}
5627	memset(&bw_data, 0, sizeof(bw_data));
5628	bw_data.tc_valid_bits = enabled_tc;
5629	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
5630		bw_data.tc_bw_credits[i] = bw_share[i];
5631
5632	ret = i40e_aq_config_vsi_tc_bw(&pf->hw, vsi->seid, &bw_data, NULL);
5633	if (ret) {
5634		dev_info(&pf->pdev->dev,
5635			 "AQ command Config VSI BW allocation per TC failed = %d\n",
5636			 pf->hw.aq.asq_last_status);
5637		return -EINVAL;
5638	}
5639
5640	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
5641		vsi->info.qs_handle[i] = bw_data.qs_handles[i];
5642
5643	return 0;
5644}
5645
5646/**
5647 * i40e_vsi_config_netdev_tc - Setup the netdev TC configuration
5648 * @vsi: the VSI being configured
5649 * @enabled_tc: TC map to be enabled
5650 *
5651 **/
5652static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc)
5653{
5654	struct net_device *netdev = vsi->netdev;
5655	struct i40e_pf *pf = vsi->back;
5656	struct i40e_hw *hw = &pf->hw;
5657	u8 netdev_tc = 0;
5658	int i;
5659	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
5660
5661	if (!netdev)
5662		return;
5663
5664	if (!enabled_tc) {
5665		netdev_reset_tc(netdev);
5666		return;
5667	}
5668
5669	/* Set up actual enabled TCs on the VSI */
5670	if (netdev_set_num_tc(netdev, vsi->tc_config.numtc))
5671		return;
5672
5673	/* set per TC queues for the VSI */
5674	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
5675		/* Only set TC queues for enabled tcs
5676		 *
5677		 * e.g. For a VSI that has TC0 and TC3 enabled the
5678		 * enabled_tc bitmap would be 0x00001001; the driver
5679		 * will set the numtc for netdev as 2 that will be
5680		 * referenced by the netdev layer as TC 0 and 1.
5681		 */
5682		if (vsi->tc_config.enabled_tc & BIT(i))
5683			netdev_set_tc_queue(netdev,
5684					vsi->tc_config.tc_info[i].netdev_tc,
5685					vsi->tc_config.tc_info[i].qcount,
5686					vsi->tc_config.tc_info[i].qoffset);
5687	}
5688
5689	if (i40e_is_tc_mqprio_enabled(pf))
5690		return;
5691
5692	/* Assign UP2TC map for the VSI */
5693	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
5694		/* Get the actual TC# for the UP */
5695		u8 ets_tc = dcbcfg->etscfg.prioritytable[i];
5696		/* Get the mapped netdev TC# for the UP */
5697		netdev_tc =  vsi->tc_config.tc_info[ets_tc].netdev_tc;
5698		netdev_set_prio_tc_map(netdev, i, netdev_tc);
5699	}
5700}
5701
5702/**
5703 * i40e_vsi_update_queue_map - Update our copy of VSi info with new queue map
5704 * @vsi: the VSI being configured
5705 * @ctxt: the ctxt buffer returned from AQ VSI update param command
5706 **/
5707static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi,
5708				      struct i40e_vsi_context *ctxt)
5709{
5710	/* copy just the sections touched not the entire info
5711	 * since not all sections are valid as returned by
5712	 * update vsi params
5713	 */
5714	vsi->info.mapping_flags = ctxt->info.mapping_flags;
5715	memcpy(&vsi->info.queue_mapping,
5716	       &ctxt->info.queue_mapping, sizeof(vsi->info.queue_mapping));
5717	memcpy(&vsi->info.tc_mapping, ctxt->info.tc_mapping,
5718	       sizeof(vsi->info.tc_mapping));
5719}
5720
5721/**
5722 * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI
5723 * @vsi: the VSI being reconfigured
5724 * @vsi_offset: offset from main VF VSI
5725 */
5726int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset)
5727{
5728	struct i40e_vsi_context ctxt = {};
5729	struct i40e_pf *pf;
5730	struct i40e_hw *hw;
5731	int ret;
5732
5733	if (!vsi)
5734		return -EINVAL;
5735	pf = vsi->back;
5736	hw = &pf->hw;
5737
5738	ctxt.seid = vsi->seid;
5739	ctxt.pf_num = hw->pf_id;
5740	ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset;
5741	ctxt.uplink_seid = vsi->uplink_seid;
5742	ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
5743	ctxt.flags = I40E_AQ_VSI_TYPE_VF;
5744	ctxt.info = vsi->info;
5745
5746	i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc,
5747				 false);
5748	if (vsi->reconfig_rss) {
5749		vsi->rss_size = min_t(int, pf->alloc_rss_size,
5750				      vsi->num_queue_pairs);
5751		ret = i40e_vsi_config_rss(vsi);
5752		if (ret) {
5753			dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n");
5754			return ret;
5755		}
5756		vsi->reconfig_rss = false;
5757	}
5758
5759	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
5760	if (ret) {
5761		dev_info(&pf->pdev->dev, "Update vsi config failed, err %pe aq_err %s\n",
5762			 ERR_PTR(ret),
5763			 i40e_aq_str(hw, hw->aq.asq_last_status));
5764		return ret;
5765	}
5766	/* update the local VSI info with updated queue map */
5767	i40e_vsi_update_queue_map(vsi, &ctxt);
5768	vsi->info.valid_sections = 0;
5769
5770	return ret;
5771}
5772
5773/**
5774 * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map
5775 * @vsi: VSI to be configured
5776 * @enabled_tc: TC bitmap
5777 *
5778 * This configures a particular VSI for TCs that are mapped to the
5779 * given TC bitmap. It uses default bandwidth share for TCs across
5780 * VSIs to configure TC for a particular VSI.
5781 *
5782 * NOTE:
5783 * It is expected that the VSI queues have been quisced before calling
5784 * this function.
5785 **/
5786static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
5787{
5788	u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
5789	struct i40e_pf *pf = vsi->back;
5790	struct i40e_hw *hw = &pf->hw;
5791	struct i40e_vsi_context ctxt;
5792	int ret = 0;
5793	int i;
5794
5795	/* Check if enabled_tc is same as existing or new TCs */
5796	if (vsi->tc_config.enabled_tc == enabled_tc &&
5797	    vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
5798		return ret;
5799
5800	/* Enable ETS TCs with equal BW Share for now across all VSIs */
5801	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
5802		if (enabled_tc & BIT(i))
5803			bw_share[i] = 1;
5804	}
5805
5806	ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
5807	if (ret) {
5808		struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
5809
5810		dev_info(&pf->pdev->dev,
5811			 "Failed configuring TC map %d for VSI %d\n",
5812			 enabled_tc, vsi->seid);
5813		ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid,
5814						  &bw_config, NULL);
5815		if (ret) {
5816			dev_info(&pf->pdev->dev,
5817				 "Failed querying vsi bw info, err %pe aq_err %s\n",
5818				 ERR_PTR(ret),
5819				 i40e_aq_str(hw, hw->aq.asq_last_status));
5820			goto out;
5821		}
5822		if ((bw_config.tc_valid_bits & enabled_tc) != enabled_tc) {
5823			u8 valid_tc = bw_config.tc_valid_bits & enabled_tc;
5824
5825			if (!valid_tc)
5826				valid_tc = bw_config.tc_valid_bits;
5827			/* Always enable TC0, no matter what */
5828			valid_tc |= 1;
5829			dev_info(&pf->pdev->dev,
5830				 "Requested tc 0x%x, but FW reports 0x%x as valid. Attempting to use 0x%x.\n",
5831				 enabled_tc, bw_config.tc_valid_bits, valid_tc);
5832			enabled_tc = valid_tc;
5833		}
5834
5835		ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
5836		if (ret) {
5837			dev_err(&pf->pdev->dev,
5838				"Unable to  configure TC map %d for VSI %d\n",
5839				enabled_tc, vsi->seid);
5840			goto out;
5841		}
5842	}
5843
5844	/* Update Queue Pairs Mapping for currently enabled UPs */
5845	ctxt.seid = vsi->seid;
5846	ctxt.pf_num = vsi->back->hw.pf_id;
5847	ctxt.vf_num = 0;
5848	ctxt.uplink_seid = vsi->uplink_seid;
5849	ctxt.info = vsi->info;
5850	if (i40e_is_tc_mqprio_enabled(pf)) {
5851		ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc);
5852		if (ret)
5853			goto out;
5854	} else {
5855		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
5856	}
5857
5858	/* On destroying the qdisc, reset vsi->rss_size, as number of enabled
5859	 * queues changed.
5860	 */
5861	if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) {
5862		vsi->rss_size = min_t(int, vsi->back->alloc_rss_size,
5863				      vsi->num_queue_pairs);
5864		ret = i40e_vsi_config_rss(vsi);
5865		if (ret) {
5866			dev_info(&vsi->back->pdev->dev,
5867				 "Failed to reconfig rss for num_queues\n");
5868			return ret;
5869		}
5870		vsi->reconfig_rss = false;
5871	}
5872	if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
5873		ctxt.info.valid_sections |=
5874				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
5875		ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
5876	}
5877
5878	/* Update the VSI after updating the VSI queue-mapping
5879	 * information
5880	 */
5881	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
5882	if (ret) {
5883		dev_info(&pf->pdev->dev,
5884			 "Update vsi tc config failed, err %pe aq_err %s\n",
5885			 ERR_PTR(ret),
5886			 i40e_aq_str(hw, hw->aq.asq_last_status));
5887		goto out;
5888	}
5889	/* update the local VSI info with updated queue map */
5890	i40e_vsi_update_queue_map(vsi, &ctxt);
5891	vsi->info.valid_sections = 0;
5892
5893	/* Update current VSI BW information */
5894	ret = i40e_vsi_get_bw_info(vsi);
5895	if (ret) {
5896		dev_info(&pf->pdev->dev,
5897			 "Failed updating vsi bw info, err %pe aq_err %s\n",
5898			 ERR_PTR(ret),
5899			 i40e_aq_str(hw, hw->aq.asq_last_status));
5900		goto out;
5901	}
5902
5903	/* Update the netdev TC setup */
5904	i40e_vsi_config_netdev_tc(vsi, enabled_tc);
5905out:
5906	return ret;
5907}
5908
5909/**
5910 * i40e_get_link_speed - Returns link speed for the interface
5911 * @vsi: VSI to be configured
5912 *
5913 **/
5914static int i40e_get_link_speed(struct i40e_vsi *vsi)
5915{
5916	struct i40e_pf *pf = vsi->back;
5917
5918	switch (pf->hw.phy.link_info.link_speed) {
5919	case I40E_LINK_SPEED_40GB:
5920		return 40000;
5921	case I40E_LINK_SPEED_25GB:
5922		return 25000;
5923	case I40E_LINK_SPEED_20GB:
5924		return 20000;
5925	case I40E_LINK_SPEED_10GB:
5926		return 10000;
5927	case I40E_LINK_SPEED_1GB:
5928		return 1000;
5929	default:
5930		return -EINVAL;
5931	}
5932}
5933
5934/**
5935 * i40e_bw_bytes_to_mbits - Convert max_tx_rate from bytes to mbits
5936 * @vsi: Pointer to vsi structure
5937 * @max_tx_rate: max TX rate in bytes to be converted into Mbits
5938 *
5939 * Helper function to convert units before send to set BW limit
5940 **/
5941static u64 i40e_bw_bytes_to_mbits(struct i40e_vsi *vsi, u64 max_tx_rate)
5942{
5943	if (max_tx_rate < I40E_BW_MBPS_DIVISOR) {
5944		dev_warn(&vsi->back->pdev->dev,
5945			 "Setting max tx rate to minimum usable value of 50Mbps.\n");
5946		max_tx_rate = I40E_BW_CREDIT_DIVISOR;
5947	} else {
5948		do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
5949	}
5950
5951	return max_tx_rate;
5952}
5953
5954/**
5955 * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
5956 * @vsi: VSI to be configured
5957 * @seid: seid of the channel/VSI
5958 * @max_tx_rate: max TX rate to be configured as BW limit
5959 *
5960 * Helper function to set BW limit for a given VSI
5961 **/
5962int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
5963{
5964	struct i40e_pf *pf = vsi->back;
5965	u64 credits = 0;
5966	int speed = 0;
5967	int ret = 0;
5968
5969	speed = i40e_get_link_speed(vsi);
5970	if (max_tx_rate > speed) {
5971		dev_err(&pf->pdev->dev,
5972			"Invalid max tx rate %llu specified for VSI seid %d.",
5973			max_tx_rate, seid);
5974		return -EINVAL;
5975	}
5976	if (max_tx_rate && max_tx_rate < I40E_BW_CREDIT_DIVISOR) {
5977		dev_warn(&pf->pdev->dev,
5978			 "Setting max tx rate to minimum usable value of 50Mbps.\n");
5979		max_tx_rate = I40E_BW_CREDIT_DIVISOR;
5980	}
5981
5982	/* Tx rate credits are in values of 50Mbps, 0 is disabled */
5983	credits = max_tx_rate;
5984	do_div(credits, I40E_BW_CREDIT_DIVISOR);
5985	ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits,
5986					  I40E_MAX_BW_INACTIVE_ACCUM, NULL);
5987	if (ret)
5988		dev_err(&pf->pdev->dev,
5989			"Failed set tx rate (%llu Mbps) for vsi->seid %u, err %pe aq_err %s\n",
5990			max_tx_rate, seid, ERR_PTR(ret),
5991			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
5992	return ret;
5993}
5994
5995/**
5996 * i40e_remove_queue_channels - Remove queue channels for the TCs
5997 * @vsi: VSI to be configured
5998 *
5999 * Remove queue channels for the TCs
6000 **/
6001static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
6002{
6003	enum i40e_admin_queue_err last_aq_status;
6004	struct i40e_cloud_filter *cfilter;
6005	struct i40e_channel *ch, *ch_tmp;
6006	struct i40e_pf *pf = vsi->back;
6007	struct hlist_node *node;
6008	int ret, i;
6009
6010	/* Reset rss size that was stored when reconfiguring rss for
6011	 * channel VSIs with non-power-of-2 queue count.
6012	 */
6013	vsi->current_rss_size = 0;
6014
6015	/* perform cleanup for channels if they exist */
6016	if (list_empty(&vsi->ch_list))
6017		return;
6018
6019	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
6020		struct i40e_vsi *p_vsi;
6021
6022		list_del(&ch->list);
6023		p_vsi = ch->parent_vsi;
6024		if (!p_vsi || !ch->initialized) {
6025			kfree(ch);
6026			continue;
6027		}
6028		/* Reset queue contexts */
6029		for (i = 0; i < ch->num_queue_pairs; i++) {
6030			struct i40e_ring *tx_ring, *rx_ring;
6031			u16 pf_q;
6032
6033			pf_q = ch->base_queue + i;
6034			tx_ring = vsi->tx_rings[pf_q];
6035			tx_ring->ch = NULL;
6036
6037			rx_ring = vsi->rx_rings[pf_q];
6038			rx_ring->ch = NULL;
6039		}
6040
6041		/* Reset BW configured for this VSI via mqprio */
6042		ret = i40e_set_bw_limit(vsi, ch->seid, 0);
6043		if (ret)
6044			dev_info(&vsi->back->pdev->dev,
6045				 "Failed to reset tx rate for ch->seid %u\n",
6046				 ch->seid);
6047
6048		/* delete cloud filters associated with this channel */
6049		hlist_for_each_entry_safe(cfilter, node,
6050					  &pf->cloud_filter_list, cloud_node) {
6051			if (cfilter->seid != ch->seid)
6052				continue;
6053
6054			hash_del(&cfilter->cloud_node);
6055			if (cfilter->dst_port)
6056				ret = i40e_add_del_cloud_filter_big_buf(vsi,
6057									cfilter,
6058									false);
6059			else
6060				ret = i40e_add_del_cloud_filter(vsi, cfilter,
6061								false);
6062			last_aq_status = pf->hw.aq.asq_last_status;
6063			if (ret)
6064				dev_info(&pf->pdev->dev,
6065					 "Failed to delete cloud filter, err %pe aq_err %s\n",
6066					 ERR_PTR(ret),
6067					 i40e_aq_str(&pf->hw, last_aq_status));
6068			kfree(cfilter);
6069		}
6070
6071		/* delete VSI from FW */
6072		ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
6073					     NULL);
6074		if (ret)
6075			dev_err(&vsi->back->pdev->dev,
6076				"unable to remove channel (%d) for parent VSI(%d)\n",
6077				ch->seid, p_vsi->seid);
6078		kfree(ch);
6079	}
6080	INIT_LIST_HEAD(&vsi->ch_list);
6081}
6082
6083/**
6084 * i40e_get_max_queues_for_channel
6085 * @vsi: ptr to VSI to which channels are associated with
6086 *
6087 * Helper function which returns max value among the queue counts set on the
6088 * channels/TCs created.
6089 **/
6090static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi)
6091{
6092	struct i40e_channel *ch, *ch_tmp;
6093	int max = 0;
6094
6095	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
6096		if (!ch->initialized)
6097			continue;
6098		if (ch->num_queue_pairs > max)
6099			max = ch->num_queue_pairs;
6100	}
6101
6102	return max;
6103}
6104
6105/**
6106 * i40e_validate_num_queues - validate num_queues w.r.t channel
6107 * @pf: ptr to PF device
6108 * @num_queues: number of queues
6109 * @vsi: the parent VSI
6110 * @reconfig_rss: indicates should the RSS be reconfigured or not
6111 *
6112 * This function validates number of queues in the context of new channel
6113 * which is being established and determines if RSS should be reconfigured
6114 * or not for parent VSI.
6115 **/
6116static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues,
6117				    struct i40e_vsi *vsi, bool *reconfig_rss)
6118{
6119	int max_ch_queues;
6120
6121	if (!reconfig_rss)
6122		return -EINVAL;
6123
6124	*reconfig_rss = false;
6125	if (vsi->current_rss_size) {
6126		if (num_queues > vsi->current_rss_size) {
6127			dev_dbg(&pf->pdev->dev,
6128				"Error: num_queues (%d) > vsi's current_size(%d)\n",
6129				num_queues, vsi->current_rss_size);
6130			return -EINVAL;
6131		} else if ((num_queues < vsi->current_rss_size) &&
6132			   (!is_power_of_2(num_queues))) {
6133			dev_dbg(&pf->pdev->dev,
6134				"Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n",
6135				num_queues, vsi->current_rss_size);
6136			return -EINVAL;
6137		}
6138	}
6139
6140	if (!is_power_of_2(num_queues)) {
6141		/* Find the max num_queues configured for channel if channel
6142		 * exist.
6143		 * if channel exist, then enforce 'num_queues' to be more than
6144		 * max ever queues configured for channel.
6145		 */
6146		max_ch_queues = i40e_get_max_queues_for_channel(vsi);
6147		if (num_queues < max_ch_queues) {
6148			dev_dbg(&pf->pdev->dev,
6149				"Error: num_queues (%d) < max queues configured for channel(%d)\n",
6150				num_queues, max_ch_queues);
6151			return -EINVAL;
6152		}
6153		*reconfig_rss = true;
6154	}
6155
6156	return 0;
6157}
6158
6159/**
6160 * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size
6161 * @vsi: the VSI being setup
6162 * @rss_size: size of RSS, accordingly LUT gets reprogrammed
6163 *
6164 * This function reconfigures RSS by reprogramming LUTs using 'rss_size'
6165 **/
6166static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
6167{
6168	struct i40e_pf *pf = vsi->back;
6169	u8 seed[I40E_HKEY_ARRAY_SIZE];
6170	struct i40e_hw *hw = &pf->hw;
6171	int local_rss_size;
6172	u8 *lut;
6173	int ret;
6174
6175	if (!vsi->rss_size)
6176		return -EINVAL;
6177
6178	if (rss_size > vsi->rss_size)
6179		return -EINVAL;
6180
6181	local_rss_size = min_t(int, vsi->rss_size, rss_size);
6182	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
6183	if (!lut)
6184		return -ENOMEM;
6185
6186	/* Ignoring user configured lut if there is one */
6187	i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size);
6188
6189	/* Use user configured hash key if there is one, otherwise
6190	 * use default.
6191	 */
6192	if (vsi->rss_hkey_user)
6193		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
6194	else
6195		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
6196
6197	ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
6198	if (ret) {
6199		dev_info(&pf->pdev->dev,
6200			 "Cannot set RSS lut, err %pe aq_err %s\n",
6201			 ERR_PTR(ret),
6202			 i40e_aq_str(hw, hw->aq.asq_last_status));
6203		kfree(lut);
6204		return ret;
6205	}
6206	kfree(lut);
6207
6208	/* Do the update w.r.t. storing rss_size */
6209	if (!vsi->orig_rss_size)
6210		vsi->orig_rss_size = vsi->rss_size;
6211	vsi->current_rss_size = local_rss_size;
6212
6213	return ret;
6214}
6215
6216/**
6217 * i40e_channel_setup_queue_map - Setup a channel queue map
6218 * @pf: ptr to PF device
6219 * @ctxt: VSI context structure
6220 * @ch: ptr to channel structure
6221 *
6222 * Setup queue map for a specific channel
6223 **/
6224static void i40e_channel_setup_queue_map(struct i40e_pf *pf,
6225					 struct i40e_vsi_context *ctxt,
6226					 struct i40e_channel *ch)
6227{
6228	u16 qcount, qmap, sections = 0;
6229	u8 offset = 0;
6230	int pow;
6231
6232	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
6233	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
6234
6235	qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix);
6236	ch->num_queue_pairs = qcount;
6237
6238	/* find the next higher power-of-2 of num queue pairs */
6239	pow = ilog2(qcount);
6240	if (!is_power_of_2(qcount))
6241		pow++;
6242
6243	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
6244		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
6245
6246	/* Setup queue TC[0].qmap for given VSI context */
6247	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
6248
6249	ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */
6250	ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
6251	ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue);
6252	ctxt->info.valid_sections |= cpu_to_le16(sections);
6253}
6254
6255/**
6256 * i40e_add_channel - add a channel by adding VSI
6257 * @pf: ptr to PF device
6258 * @uplink_seid: underlying HW switching element (VEB) ID
6259 * @ch: ptr to channel structure
6260 *
6261 * Add a channel (VSI) using add_vsi and queue_map
6262 **/
6263static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
6264			    struct i40e_channel *ch)
6265{
6266	struct i40e_hw *hw = &pf->hw;
6267	struct i40e_vsi_context ctxt;
6268	u8 enabled_tc = 0x1; /* TC0 enabled */
6269	int ret;
6270
6271	if (ch->type != I40E_VSI_VMDQ2) {
6272		dev_info(&pf->pdev->dev,
6273			 "add new vsi failed, ch->type %d\n", ch->type);
6274		return -EINVAL;
6275	}
6276
6277	memset(&ctxt, 0, sizeof(ctxt));
6278	ctxt.pf_num = hw->pf_id;
6279	ctxt.vf_num = 0;
6280	ctxt.uplink_seid = uplink_seid;
6281	ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
6282	if (ch->type == I40E_VSI_VMDQ2)
6283		ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
6284
6285	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) {
6286		ctxt.info.valid_sections |=
6287		     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
6288		ctxt.info.switch_id =
6289		   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
6290	}
6291
6292	/* Set queue map for a given VSI context */
6293	i40e_channel_setup_queue_map(pf, &ctxt, ch);
6294
6295	/* Now time to create VSI */
6296	ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
6297	if (ret) {
6298		dev_info(&pf->pdev->dev,
6299			 "add new vsi failed, err %pe aq_err %s\n",
6300			 ERR_PTR(ret),
6301			 i40e_aq_str(&pf->hw,
6302				     pf->hw.aq.asq_last_status));
6303		return -ENOENT;
6304	}
6305
6306	/* Success, update channel, set enabled_tc only if the channel
6307	 * is not a macvlan
6308	 */
6309	ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc;
6310	ch->seid = ctxt.seid;
6311	ch->vsi_number = ctxt.vsi_number;
6312	ch->stat_counter_idx = le16_to_cpu(ctxt.info.stat_counter_idx);
6313
6314	/* copy just the sections touched not the entire info
6315	 * since not all sections are valid as returned by
6316	 * update vsi params
6317	 */
6318	ch->info.mapping_flags = ctxt.info.mapping_flags;
6319	memcpy(&ch->info.queue_mapping,
6320	       &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping));
6321	memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping,
6322	       sizeof(ctxt.info.tc_mapping));
6323
6324	return 0;
6325}
6326
6327static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch,
6328				  u8 *bw_share)
6329{
6330	struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
6331	int ret;
6332	int i;
6333
6334	memset(&bw_data, 0, sizeof(bw_data));
6335	bw_data.tc_valid_bits = ch->enabled_tc;
6336	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
6337		bw_data.tc_bw_credits[i] = bw_share[i];
6338
6339	ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid,
6340				       &bw_data, NULL);
6341	if (ret) {
6342		dev_info(&vsi->back->pdev->dev,
6343			 "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n",
6344			 vsi->back->hw.aq.asq_last_status, ch->seid);
6345		return -EINVAL;
6346	}
6347
6348	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
6349		ch->info.qs_handle[i] = bw_data.qs_handles[i];
6350
6351	return 0;
6352}
6353
6354/**
6355 * i40e_channel_config_tx_ring - config TX ring associated with new channel
6356 * @pf: ptr to PF device
6357 * @vsi: the VSI being setup
6358 * @ch: ptr to channel structure
6359 *
6360 * Configure TX rings associated with channel (VSI) since queues are being
6361 * from parent VSI.
6362 **/
6363static int i40e_channel_config_tx_ring(struct i40e_pf *pf,
6364				       struct i40e_vsi *vsi,
6365				       struct i40e_channel *ch)
6366{
6367	u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
6368	int ret;
6369	int i;
6370
6371	/* Enable ETS TCs with equal BW Share for now across all VSIs */
6372	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
6373		if (ch->enabled_tc & BIT(i))
6374			bw_share[i] = 1;
6375	}
6376
6377	/* configure BW for new VSI */
6378	ret = i40e_channel_config_bw(vsi, ch, bw_share);
6379	if (ret) {
6380		dev_info(&vsi->back->pdev->dev,
6381			 "Failed configuring TC map %d for channel (seid %u)\n",
6382			 ch->enabled_tc, ch->seid);
6383		return ret;
6384	}
6385
6386	for (i = 0; i < ch->num_queue_pairs; i++) {
6387		struct i40e_ring *tx_ring, *rx_ring;
6388		u16 pf_q;
6389
6390		pf_q = ch->base_queue + i;
6391
6392		/* Get to TX ring ptr of main VSI, for re-setup TX queue
6393		 * context
6394		 */
6395		tx_ring = vsi->tx_rings[pf_q];
6396		tx_ring->ch = ch;
6397
6398		/* Get the RX ring ptr */
6399		rx_ring = vsi->rx_rings[pf_q];
6400		rx_ring->ch = ch;
6401	}
6402
6403	return 0;
6404}
6405
6406/**
6407 * i40e_setup_hw_channel - setup new channel
6408 * @pf: ptr to PF device
6409 * @vsi: the VSI being setup
6410 * @ch: ptr to channel structure
6411 * @uplink_seid: underlying HW switching element (VEB) ID
6412 * @type: type of channel to be created (VMDq2/VF)
6413 *
6414 * Setup new channel (VSI) based on specified type (VMDq2/VF)
6415 * and configures TX rings accordingly
6416 **/
6417static inline int i40e_setup_hw_channel(struct i40e_pf *pf,
6418					struct i40e_vsi *vsi,
6419					struct i40e_channel *ch,
6420					u16 uplink_seid, u8 type)
6421{
6422	int ret;
6423
6424	ch->initialized = false;
6425	ch->base_queue = vsi->next_base_queue;
6426	ch->type = type;
6427
6428	/* Proceed with creation of channel (VMDq2) VSI */
6429	ret = i40e_add_channel(pf, uplink_seid, ch);
6430	if (ret) {
6431		dev_info(&pf->pdev->dev,
6432			 "failed to add_channel using uplink_seid %u\n",
6433			 uplink_seid);
6434		return ret;
6435	}
6436
6437	/* Mark the successful creation of channel */
6438	ch->initialized = true;
6439
6440	/* Reconfigure TX queues using QTX_CTL register */
6441	ret = i40e_channel_config_tx_ring(pf, vsi, ch);
6442	if (ret) {
6443		dev_info(&pf->pdev->dev,
6444			 "failed to configure TX rings for channel %u\n",
6445			 ch->seid);
6446		return ret;
6447	}
6448
6449	/* update 'next_base_queue' */
6450	vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs;
6451	dev_dbg(&pf->pdev->dev,
6452		"Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n",
6453		ch->seid, ch->vsi_number, ch->stat_counter_idx,
6454		ch->num_queue_pairs,
6455		vsi->next_base_queue);
6456	return ret;
6457}
6458
6459/**
6460 * i40e_setup_channel - setup new channel using uplink element
6461 * @pf: ptr to PF device
6462 * @vsi: pointer to the VSI to set up the channel within
6463 * @ch: ptr to channel structure
6464 *
6465 * Setup new channel (VSI) based on specified type (VMDq2/VF)
6466 * and uplink switching element (uplink_seid)
6467 **/
6468static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi,
6469			       struct i40e_channel *ch)
6470{
6471	u8 vsi_type;
6472	u16 seid;
6473	int ret;
6474
6475	if (vsi->type == I40E_VSI_MAIN) {
6476		vsi_type = I40E_VSI_VMDQ2;
6477	} else {
6478		dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n",
6479			vsi->type);
6480		return false;
6481	}
6482
6483	/* underlying switching element */
6484	seid = pf->vsi[pf->lan_vsi]->uplink_seid;
6485
6486	/* create channel (VSI), configure TX rings */
6487	ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type);
6488	if (ret) {
6489		dev_err(&pf->pdev->dev, "failed to setup hw_channel\n");
6490		return false;
6491	}
6492
6493	return ch->initialized ? true : false;
6494}
6495
6496/**
6497 * i40e_validate_and_set_switch_mode - sets up switch mode correctly
6498 * @vsi: ptr to VSI which has PF backing
6499 *
6500 * Sets up switch mode correctly if it needs to be changed and perform
6501 * what are allowed modes.
6502 **/
6503static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
6504{
6505	u8 mode;
6506	struct i40e_pf *pf = vsi->back;
6507	struct i40e_hw *hw = &pf->hw;
6508	int ret;
6509
6510	ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities);
6511	if (ret)
6512		return -EINVAL;
6513
6514	if (hw->dev_caps.switch_mode) {
6515		/* if switch mode is set, support mode2 (non-tunneled for
6516		 * cloud filter) for now
6517		 */
6518		u32 switch_mode = hw->dev_caps.switch_mode &
6519				  I40E_SWITCH_MODE_MASK;
6520		if (switch_mode >= I40E_CLOUD_FILTER_MODE1) {
6521			if (switch_mode == I40E_CLOUD_FILTER_MODE2)
6522				return 0;
6523			dev_err(&pf->pdev->dev,
6524				"Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n",
6525				hw->dev_caps.switch_mode);
6526			return -EINVAL;
6527		}
6528	}
6529
6530	/* Set Bit 7 to be valid */
6531	mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
6532
6533	/* Set L4type for TCP support */
6534	mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP;
6535
6536	/* Set cloud filter mode */
6537	mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
6538
6539	/* Prep mode field for set_switch_config */
6540	ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags,
6541					pf->last_sw_conf_valid_flags,
6542					mode, NULL);
6543	if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH)
6544		dev_err(&pf->pdev->dev,
6545			"couldn't set switch config bits, err %pe aq_err %s\n",
6546			ERR_PTR(ret),
6547			i40e_aq_str(hw,
6548				    hw->aq.asq_last_status));
6549
6550	return ret;
6551}
6552
6553/**
6554 * i40e_create_queue_channel - function to create channel
6555 * @vsi: VSI to be configured
6556 * @ch: ptr to channel (it contains channel specific params)
6557 *
6558 * This function creates channel (VSI) using num_queues specified by user,
6559 * reconfigs RSS if needed.
6560 **/
6561int i40e_create_queue_channel(struct i40e_vsi *vsi,
6562			      struct i40e_channel *ch)
6563{
6564	struct i40e_pf *pf = vsi->back;
6565	bool reconfig_rss;
6566	int err;
6567
6568	if (!ch)
6569		return -EINVAL;
6570
6571	if (!ch->num_queue_pairs) {
6572		dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n",
6573			ch->num_queue_pairs);
6574		return -EINVAL;
6575	}
6576
6577	/* validate user requested num_queues for channel */
6578	err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi,
6579				       &reconfig_rss);
6580	if (err) {
6581		dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n",
6582			 ch->num_queue_pairs);
6583		return -EINVAL;
6584	}
6585
6586	/* By default we are in VEPA mode, if this is the first VF/VMDq
6587	 * VSI to be added switch to VEB mode.
6588	 */
6589
6590	if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
6591		pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
6592
6593		if (vsi->type == I40E_VSI_MAIN) {
6594			if (i40e_is_tc_mqprio_enabled(pf))
6595				i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
6596			else
6597				i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
6598		}
6599		/* now onwards for main VSI, number of queues will be value
6600		 * of TC0's queue count
6601		 */
6602	}
6603
6604	/* By this time, vsi->cnt_q_avail shall be set to non-zero and
6605	 * it should be more than num_queues
6606	 */
6607	if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) {
6608		dev_dbg(&pf->pdev->dev,
6609			"Error: cnt_q_avail (%u) less than num_queues %d\n",
6610			vsi->cnt_q_avail, ch->num_queue_pairs);
6611		return -EINVAL;
6612	}
6613
6614	/* reconfig_rss only if vsi type is MAIN_VSI */
6615	if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) {
6616		err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs);
6617		if (err) {
6618			dev_info(&pf->pdev->dev,
6619				 "Error: unable to reconfig rss for num_queues (%u)\n",
6620				 ch->num_queue_pairs);
6621			return -EINVAL;
6622		}
6623	}
6624
6625	if (!i40e_setup_channel(pf, vsi, ch)) {
6626		dev_info(&pf->pdev->dev, "Failed to setup channel\n");
6627		return -EINVAL;
6628	}
6629
6630	dev_info(&pf->pdev->dev,
6631		 "Setup channel (id:%u) utilizing num_queues %d\n",
6632		 ch->seid, ch->num_queue_pairs);
6633
6634	/* configure VSI for BW limit */
6635	if (ch->max_tx_rate) {
6636		u64 credits = ch->max_tx_rate;
6637
6638		if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate))
6639			return -EINVAL;
6640
6641		do_div(credits, I40E_BW_CREDIT_DIVISOR);
6642		dev_dbg(&pf->pdev->dev,
6643			"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
6644			ch->max_tx_rate,
6645			credits,
6646			ch->seid);
6647	}
6648
6649	/* in case of VF, this will be main SRIOV VSI */
6650	ch->parent_vsi = vsi;
6651
6652	/* and update main_vsi's count for queue_available to use */
6653	vsi->cnt_q_avail -= ch->num_queue_pairs;
6654
6655	return 0;
6656}
6657
6658/**
6659 * i40e_configure_queue_channels - Add queue channel for the given TCs
6660 * @vsi: VSI to be configured
6661 *
6662 * Configures queue channel mapping to the given TCs
6663 **/
6664static int i40e_configure_queue_channels(struct i40e_vsi *vsi)
6665{
6666	struct i40e_channel *ch;
6667	u64 max_rate = 0;
6668	int ret = 0, i;
6669
6670	/* Create app vsi with the TCs. Main VSI with TC0 is already set up */
6671	vsi->tc_seid_map[0] = vsi->seid;
6672	for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) {
6673		if (vsi->tc_config.enabled_tc & BIT(i)) {
6674			ch = kzalloc(sizeof(*ch), GFP_KERNEL);
6675			if (!ch) {
6676				ret = -ENOMEM;
6677				goto err_free;
6678			}
6679
6680			INIT_LIST_HEAD(&ch->list);
6681			ch->num_queue_pairs =
6682				vsi->tc_config.tc_info[i].qcount;
6683			ch->base_queue =
6684				vsi->tc_config.tc_info[i].qoffset;
6685
6686			/* Bandwidth limit through tc interface is in bytes/s,
6687			 * change to Mbit/s
6688			 */
6689			max_rate = vsi->mqprio_qopt.max_rate[i];
6690			do_div(max_rate, I40E_BW_MBPS_DIVISOR);
6691			ch->max_tx_rate = max_rate;
6692
6693			list_add_tail(&ch->list, &vsi->ch_list);
6694
6695			ret = i40e_create_queue_channel(vsi, ch);
6696			if (ret) {
6697				dev_err(&vsi->back->pdev->dev,
6698					"Failed creating queue channel with TC%d: queues %d\n",
6699					i, ch->num_queue_pairs);
6700				goto err_free;
6701			}
6702			vsi->tc_seid_map[i] = ch->seid;
6703		}
6704	}
6705
6706	/* reset to reconfigure TX queue contexts */
6707	i40e_do_reset(vsi->back, I40E_PF_RESET_FLAG, true);
6708	return ret;
6709
6710err_free:
6711	i40e_remove_queue_channels(vsi);
6712	return ret;
6713}
6714
6715/**
6716 * i40e_veb_config_tc - Configure TCs for given VEB
6717 * @veb: given VEB
6718 * @enabled_tc: TC bitmap
6719 *
6720 * Configures given TC bitmap for VEB (switching) element
6721 **/
6722int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
6723{
6724	struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0};
6725	struct i40e_pf *pf = veb->pf;
6726	int ret = 0;
6727	int i;
6728
6729	/* No TCs or already enabled TCs just return */
6730	if (!enabled_tc || veb->enabled_tc == enabled_tc)
6731		return ret;
6732
6733	bw_data.tc_valid_bits = enabled_tc;
6734	/* bw_data.absolute_credits is not set (relative) */
6735
6736	/* Enable ETS TCs with equal BW Share for now */
6737	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
6738		if (enabled_tc & BIT(i))
6739			bw_data.tc_bw_share_credits[i] = 1;
6740	}
6741
6742	ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid,
6743						   &bw_data, NULL);
6744	if (ret) {
6745		dev_info(&pf->pdev->dev,
6746			 "VEB bw config failed, err %pe aq_err %s\n",
6747			 ERR_PTR(ret),
6748			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
6749		goto out;
6750	}
6751
6752	/* Update the BW information */
6753	ret = i40e_veb_get_bw_info(veb);
6754	if (ret) {
6755		dev_info(&pf->pdev->dev,
6756			 "Failed getting veb bw config, err %pe aq_err %s\n",
6757			 ERR_PTR(ret),
6758			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
6759	}
6760
6761out:
6762	return ret;
6763}
6764
6765#ifdef CONFIG_I40E_DCB
6766/**
6767 * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs
6768 * @pf: PF struct
6769 *
6770 * Reconfigure VEB/VSIs on a given PF; it is assumed that
6771 * the caller would've quiesce all the VSIs before calling
6772 * this function
6773 **/
6774static void i40e_dcb_reconfigure(struct i40e_pf *pf)
6775{
6776	u8 tc_map = 0;
6777	int ret;
6778	u8 v;
6779
6780	/* Enable the TCs available on PF to all VEBs */
6781	tc_map = i40e_pf_get_tc_map(pf);
6782	if (tc_map == I40E_DEFAULT_TRAFFIC_CLASS)
6783		return;
6784
6785	for (v = 0; v < I40E_MAX_VEB; v++) {
6786		if (!pf->veb[v])
6787			continue;
6788		ret = i40e_veb_config_tc(pf->veb[v], tc_map);
6789		if (ret) {
6790			dev_info(&pf->pdev->dev,
6791				 "Failed configuring TC for VEB seid=%d\n",
6792				 pf->veb[v]->seid);
6793			/* Will try to configure as many components */
6794		}
6795	}
6796
6797	/* Update each VSI */
6798	for (v = 0; v < pf->num_alloc_vsi; v++) {
6799		if (!pf->vsi[v])
6800			continue;
6801
6802		/* - Enable all TCs for the LAN VSI
6803		 * - For all others keep them at TC0 for now
6804		 */
6805		if (v == pf->lan_vsi)
6806			tc_map = i40e_pf_get_tc_map(pf);
6807		else
6808			tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
6809
6810		ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
6811		if (ret) {
6812			dev_info(&pf->pdev->dev,
6813				 "Failed configuring TC for VSI seid=%d\n",
6814				 pf->vsi[v]->seid);
6815			/* Will try to configure as many components */
6816		} else {
6817			/* Re-configure VSI vectors based on updated TC map */
6818			i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
6819			if (pf->vsi[v]->netdev)
6820				i40e_dcbnl_set_all(pf->vsi[v]);
6821		}
6822	}
6823}
6824
6825/**
6826 * i40e_resume_port_tx - Resume port Tx
6827 * @pf: PF struct
6828 *
6829 * Resume a port's Tx and issue a PF reset in case of failure to
6830 * resume.
6831 **/
6832static int i40e_resume_port_tx(struct i40e_pf *pf)
6833{
6834	struct i40e_hw *hw = &pf->hw;
6835	int ret;
6836
6837	ret = i40e_aq_resume_port_tx(hw, NULL);
6838	if (ret) {
6839		dev_info(&pf->pdev->dev,
6840			 "Resume Port Tx failed, err %pe aq_err %s\n",
6841			  ERR_PTR(ret),
6842			  i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
6843		/* Schedule PF reset to recover */
6844		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
6845		i40e_service_event_schedule(pf);
6846	}
6847
6848	return ret;
6849}
6850
6851/**
6852 * i40e_suspend_port_tx - Suspend port Tx
6853 * @pf: PF struct
6854 *
6855 * Suspend a port's Tx and issue a PF reset in case of failure.
6856 **/
6857static int i40e_suspend_port_tx(struct i40e_pf *pf)
6858{
6859	struct i40e_hw *hw = &pf->hw;
6860	int ret;
6861
6862	ret = i40e_aq_suspend_port_tx(hw, pf->mac_seid, NULL);
6863	if (ret) {
6864		dev_info(&pf->pdev->dev,
6865			 "Suspend Port Tx failed, err %pe aq_err %s\n",
6866			 ERR_PTR(ret),
6867			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
6868		/* Schedule PF reset to recover */
6869		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
6870		i40e_service_event_schedule(pf);
6871	}
6872
6873	return ret;
6874}
6875
6876/**
6877 * i40e_hw_set_dcb_config - Program new DCBX settings into HW
6878 * @pf: PF being configured
6879 * @new_cfg: New DCBX configuration
6880 *
6881 * Program DCB settings into HW and reconfigure VEB/VSIs on
6882 * given PF. Uses "Set LLDP MIB" AQC to program the hardware.
6883 **/
6884static int i40e_hw_set_dcb_config(struct i40e_pf *pf,
6885				  struct i40e_dcbx_config *new_cfg)
6886{
6887	struct i40e_dcbx_config *old_cfg = &pf->hw.local_dcbx_config;
6888	int ret;
6889
6890	/* Check if need reconfiguration */
6891	if (!memcmp(&new_cfg, &old_cfg, sizeof(new_cfg))) {
6892		dev_dbg(&pf->pdev->dev, "No Change in DCB Config required.\n");
6893		return 0;
6894	}
6895
6896	/* Config change disable all VSIs */
6897	i40e_pf_quiesce_all_vsi(pf);
6898
6899	/* Copy the new config to the current config */
6900	*old_cfg = *new_cfg;
6901	old_cfg->etsrec = old_cfg->etscfg;
6902	ret = i40e_set_dcb_config(&pf->hw);
6903	if (ret) {
6904		dev_info(&pf->pdev->dev,
6905			 "Set DCB Config failed, err %pe aq_err %s\n",
6906			 ERR_PTR(ret),
6907			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
6908		goto out;
6909	}
6910
6911	/* Changes in configuration update VEB/VSI */
6912	i40e_dcb_reconfigure(pf);
6913out:
6914	/* In case of reset do not try to resume anything */
6915	if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) {
6916		/* Re-start the VSIs if disabled */
6917		ret = i40e_resume_port_tx(pf);
6918		/* In case of error no point in resuming VSIs */
6919		if (ret)
6920			goto err;
6921		i40e_pf_unquiesce_all_vsi(pf);
6922	}
6923err:
6924	return ret;
6925}
6926
6927/**
6928 * i40e_hw_dcb_config - Program new DCBX settings into HW
6929 * @pf: PF being configured
6930 * @new_cfg: New DCBX configuration
6931 *
6932 * Program DCB settings into HW and reconfigure VEB/VSIs on
6933 * given PF
6934 **/
6935int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg)
6936{
6937	struct i40e_aqc_configure_switching_comp_ets_data ets_data;
6938	u8 prio_type[I40E_MAX_TRAFFIC_CLASS] = {0};
6939	u32 mfs_tc[I40E_MAX_TRAFFIC_CLASS];
6940	struct i40e_dcbx_config *old_cfg;
6941	u8 mode[I40E_MAX_TRAFFIC_CLASS];
6942	struct i40e_rx_pb_config pb_cfg;
6943	struct i40e_hw *hw = &pf->hw;
6944	u8 num_ports = hw->num_ports;
6945	bool need_reconfig;
6946	int ret = -EINVAL;
6947	u8 lltc_map = 0;
6948	u8 tc_map = 0;
6949	u8 new_numtc;
6950	u8 i;
6951
6952	dev_dbg(&pf->pdev->dev, "Configuring DCB registers directly\n");
6953	/* Un-pack information to Program ETS HW via shared API
6954	 * numtc, tcmap
6955	 * LLTC map
6956	 * ETS/NON-ETS arbiter mode
6957	 * max exponent (credit refills)
6958	 * Total number of ports
6959	 * PFC priority bit-map
6960	 * Priority Table
6961	 * BW % per TC
6962	 * Arbiter mode between UPs sharing same TC
6963	 * TSA table (ETS or non-ETS)
6964	 * EEE enabled or not
6965	 * MFS TC table
6966	 */
6967
6968	new_numtc = i40e_dcb_get_num_tc(new_cfg);
6969
6970	memset(&ets_data, 0, sizeof(ets_data));
6971	for (i = 0; i < new_numtc; i++) {
6972		tc_map |= BIT(i);
6973		switch (new_cfg->etscfg.tsatable[i]) {
6974		case I40E_IEEE_TSA_ETS:
6975			prio_type[i] = I40E_DCB_PRIO_TYPE_ETS;
6976			ets_data.tc_bw_share_credits[i] =
6977					new_cfg->etscfg.tcbwtable[i];
6978			break;
6979		case I40E_IEEE_TSA_STRICT:
6980			prio_type[i] = I40E_DCB_PRIO_TYPE_STRICT;
6981			lltc_map |= BIT(i);
6982			ets_data.tc_bw_share_credits[i] =
6983					I40E_DCB_STRICT_PRIO_CREDITS;
6984			break;
6985		default:
6986			/* Invalid TSA type */
6987			need_reconfig = false;
6988			goto out;
6989		}
6990	}
6991
6992	old_cfg = &hw->local_dcbx_config;
6993	/* Check if need reconfiguration */
6994	need_reconfig = i40e_dcb_need_reconfig(pf, old_cfg, new_cfg);
6995
6996	/* If needed, enable/disable frame tagging, disable all VSIs
6997	 * and suspend port tx
6998	 */
6999	if (need_reconfig) {
7000		/* Enable DCB tagging only when more than one TC */
7001		if (new_numtc > 1)
7002			pf->flags |= I40E_FLAG_DCB_ENABLED;
7003		else
7004			pf->flags &= ~I40E_FLAG_DCB_ENABLED;
7005
7006		set_bit(__I40E_PORT_SUSPENDED, pf->state);
7007		/* Reconfiguration needed quiesce all VSIs */
7008		i40e_pf_quiesce_all_vsi(pf);
7009		ret = i40e_suspend_port_tx(pf);
7010		if (ret)
7011			goto err;
7012	}
7013
7014	/* Configure Port ETS Tx Scheduler */
7015	ets_data.tc_valid_bits = tc_map;
7016	ets_data.tc_strict_priority_flags = lltc_map;
7017	ret = i40e_aq_config_switch_comp_ets
7018		(hw, pf->mac_seid, &ets_data,
7019		 i40e_aqc_opc_modify_switching_comp_ets, NULL);
7020	if (ret) {
7021		dev_info(&pf->pdev->dev,
7022			 "Modify Port ETS failed, err %pe aq_err %s\n",
7023			 ERR_PTR(ret),
7024			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
7025		goto out;
7026	}
7027
7028	/* Configure Rx ETS HW */
7029	memset(&mode, I40E_DCB_ARB_MODE_ROUND_ROBIN, sizeof(mode));
7030	i40e_dcb_hw_set_num_tc(hw, new_numtc);
7031	i40e_dcb_hw_rx_fifo_config(hw, I40E_DCB_ARB_MODE_ROUND_ROBIN,
7032				   I40E_DCB_ARB_MODE_STRICT_PRIORITY,
7033				   I40E_DCB_DEFAULT_MAX_EXPONENT,
7034				   lltc_map);
7035	i40e_dcb_hw_rx_cmd_monitor_config(hw, new_numtc, num_ports);
7036	i40e_dcb_hw_rx_ets_bw_config(hw, new_cfg->etscfg.tcbwtable, mode,
7037				     prio_type);
7038	i40e_dcb_hw_pfc_config(hw, new_cfg->pfc.pfcenable,
7039			       new_cfg->etscfg.prioritytable);
7040	i40e_dcb_hw_rx_up2tc_config(hw, new_cfg->etscfg.prioritytable);
7041
7042	/* Configure Rx Packet Buffers in HW */
7043	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
7044		mfs_tc[i] = pf->vsi[pf->lan_vsi]->netdev->mtu;
7045		mfs_tc[i] += I40E_PACKET_HDR_PAD;
7046	}
7047
7048	i40e_dcb_hw_calculate_pool_sizes(hw, num_ports,
7049					 false, new_cfg->pfc.pfcenable,
7050					 mfs_tc, &pb_cfg);
7051	i40e_dcb_hw_rx_pb_config(hw, &pf->pb_cfg, &pb_cfg);
7052
7053	/* Update the local Rx Packet buffer config */
7054	pf->pb_cfg = pb_cfg;
7055
7056	/* Inform the FW about changes to DCB configuration */
7057	ret = i40e_aq_dcb_updated(&pf->hw, NULL);
7058	if (ret) {
7059		dev_info(&pf->pdev->dev,
7060			 "DCB Updated failed, err %pe aq_err %s\n",
7061			 ERR_PTR(ret),
7062			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
7063		goto out;
7064	}
7065
7066	/* Update the port DCBx configuration */
7067	*old_cfg = *new_cfg;
7068
7069	/* Changes in configuration update VEB/VSI */
7070	i40e_dcb_reconfigure(pf);
7071out:
7072	/* Re-start the VSIs if disabled */
7073	if (need_reconfig) {
7074		ret = i40e_resume_port_tx(pf);
7075
7076		clear_bit(__I40E_PORT_SUSPENDED, pf->state);
7077		/* In case of error no point in resuming VSIs */
7078		if (ret)
7079			goto err;
7080
7081		/* Wait for the PF's queues to be disabled */
7082		ret = i40e_pf_wait_queues_disabled(pf);
7083		if (ret) {
7084			/* Schedule PF reset to recover */
7085			set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
7086			i40e_service_event_schedule(pf);
7087			goto err;
7088		} else {
7089			i40e_pf_unquiesce_all_vsi(pf);
7090			set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
7091			set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
7092		}
7093		/* registers are set, lets apply */
7094		if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB)
7095			ret = i40e_hw_set_dcb_config(pf, new_cfg);
7096	}
7097
7098err:
7099	return ret;
7100}
7101
7102/**
7103 * i40e_dcb_sw_default_config - Set default DCB configuration when DCB in SW
7104 * @pf: PF being queried
7105 *
7106 * Set default DCB configuration in case DCB is to be done in SW.
7107 **/
7108int i40e_dcb_sw_default_config(struct i40e_pf *pf)
7109{
7110	struct i40e_dcbx_config *dcb_cfg = &pf->hw.local_dcbx_config;
7111	struct i40e_aqc_configure_switching_comp_ets_data ets_data;
7112	struct i40e_hw *hw = &pf->hw;
7113	int err;
7114
7115	if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) {
7116		/* Update the local cached instance with TC0 ETS */
7117		memset(&pf->tmp_cfg, 0, sizeof(struct i40e_dcbx_config));
7118		pf->tmp_cfg.etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING;
7119		pf->tmp_cfg.etscfg.maxtcs = 0;
7120		pf->tmp_cfg.etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW;
7121		pf->tmp_cfg.etscfg.tsatable[0] = I40E_IEEE_TSA_ETS;
7122		pf->tmp_cfg.pfc.willing = I40E_IEEE_DEFAULT_PFC_WILLING;
7123		pf->tmp_cfg.pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
7124		/* FW needs one App to configure HW */
7125		pf->tmp_cfg.numapps = I40E_IEEE_DEFAULT_NUM_APPS;
7126		pf->tmp_cfg.app[0].selector = I40E_APP_SEL_ETHTYPE;
7127		pf->tmp_cfg.app[0].priority = I40E_IEEE_DEFAULT_APP_PRIO;
7128		pf->tmp_cfg.app[0].protocolid = I40E_APP_PROTOID_FCOE;
7129
7130		return i40e_hw_set_dcb_config(pf, &pf->tmp_cfg);
7131	}
7132
7133	memset(&ets_data, 0, sizeof(ets_data));
7134	ets_data.tc_valid_bits = I40E_DEFAULT_TRAFFIC_CLASS; /* TC0 only */
7135	ets_data.tc_strict_priority_flags = 0; /* ETS */
7136	ets_data.tc_bw_share_credits[0] = I40E_IEEE_DEFAULT_ETS_TCBW; /* 100% to TC0 */
7137
7138	/* Enable ETS on the Physical port */
7139	err = i40e_aq_config_switch_comp_ets
7140		(hw, pf->mac_seid, &ets_data,
7141		 i40e_aqc_opc_enable_switching_comp_ets, NULL);
7142	if (err) {
7143		dev_info(&pf->pdev->dev,
7144			 "Enable Port ETS failed, err %pe aq_err %s\n",
7145			 ERR_PTR(err),
7146			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
7147		err = -ENOENT;
7148		goto out;
7149	}
7150
7151	/* Update the local cached instance with TC0 ETS */
7152	dcb_cfg->etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING;
7153	dcb_cfg->etscfg.cbs = 0;
7154	dcb_cfg->etscfg.maxtcs = I40E_MAX_TRAFFIC_CLASS;
7155	dcb_cfg->etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW;
7156
7157out:
7158	return err;
7159}
7160
7161/**
7162 * i40e_init_pf_dcb - Initialize DCB configuration
7163 * @pf: PF being configured
7164 *
7165 * Query the current DCB configuration and cache it
7166 * in the hardware structure
7167 **/
7168static int i40e_init_pf_dcb(struct i40e_pf *pf)
7169{
7170	struct i40e_hw *hw = &pf->hw;
7171	int err;
7172
7173	/* Do not enable DCB for SW1 and SW2 images even if the FW is capable
7174	 * Also do not enable DCBx if FW LLDP agent is disabled
7175	 */
7176	if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT) {
7177		dev_info(&pf->pdev->dev, "DCB is not supported.\n");
7178		err = -EOPNOTSUPP;
7179		goto out;
7180	}
7181	if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
7182		dev_info(&pf->pdev->dev, "FW LLDP is disabled, attempting SW DCB\n");
7183		err = i40e_dcb_sw_default_config(pf);
7184		if (err) {
7185			dev_info(&pf->pdev->dev, "Could not initialize SW DCB\n");
7186			goto out;
7187		}
7188		dev_info(&pf->pdev->dev, "SW DCB initialization succeeded.\n");
7189		pf->dcbx_cap = DCB_CAP_DCBX_HOST |
7190			       DCB_CAP_DCBX_VER_IEEE;
7191		/* at init capable but disabled */
7192		pf->flags |= I40E_FLAG_DCB_CAPABLE;
7193		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
7194		goto out;
7195	}
7196	err = i40e_init_dcb(hw, true);
7197	if (!err) {
7198		/* Device/Function is not DCBX capable */
7199		if ((!hw->func_caps.dcb) ||
7200		    (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
7201			dev_info(&pf->pdev->dev,
7202				 "DCBX offload is not supported or is disabled for this PF.\n");
7203		} else {
7204			/* When status is not DISABLED then DCBX in FW */
7205			pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
7206				       DCB_CAP_DCBX_VER_IEEE;
7207
7208			pf->flags |= I40E_FLAG_DCB_CAPABLE;
7209			/* Enable DCB tagging only when more than one TC
7210			 * or explicitly disable if only one TC
7211			 */
7212			if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
7213				pf->flags |= I40E_FLAG_DCB_ENABLED;
7214			else
7215				pf->flags &= ~I40E_FLAG_DCB_ENABLED;
7216			dev_dbg(&pf->pdev->dev,
7217				"DCBX offload is supported for this PF.\n");
7218		}
7219	} else if (pf->hw.aq.asq_last_status == I40E_AQ_RC_EPERM) {
7220		dev_info(&pf->pdev->dev, "FW LLDP disabled for this PF.\n");
7221		pf->flags |= I40E_FLAG_DISABLE_FW_LLDP;
7222	} else {
7223		dev_info(&pf->pdev->dev,
7224			 "Query for DCB configuration failed, err %pe aq_err %s\n",
7225			 ERR_PTR(err),
7226			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
7227	}
7228
7229out:
7230	return err;
7231}
7232#endif /* CONFIG_I40E_DCB */
7233
7234/**
7235 * i40e_print_link_message - print link up or down
7236 * @vsi: the VSI for which link needs a message
7237 * @isup: true of link is up, false otherwise
7238 */
7239void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
7240{
7241	enum i40e_aq_link_speed new_speed;
7242	struct i40e_pf *pf = vsi->back;
7243	char *speed = "Unknown";
7244	char *fc = "Unknown";
7245	char *fec = "";
7246	char *req_fec = "";
7247	char *an = "";
7248
7249	if (isup)
7250		new_speed = pf->hw.phy.link_info.link_speed;
7251	else
7252		new_speed = I40E_LINK_SPEED_UNKNOWN;
7253
7254	if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
7255		return;
7256	vsi->current_isup = isup;
7257	vsi->current_speed = new_speed;
7258	if (!isup) {
7259		netdev_info(vsi->netdev, "NIC Link is Down\n");
7260		return;
7261	}
7262
7263	/* Warn user if link speed on NPAR enabled partition is not at
7264	 * least 10GB
7265	 */
7266	if (pf->hw.func_caps.npar_enable &&
7267	    (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
7268	     pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
7269		netdev_warn(vsi->netdev,
7270			    "The partition detected link speed that is less than 10Gbps\n");
7271
7272	switch (pf->hw.phy.link_info.link_speed) {
7273	case I40E_LINK_SPEED_40GB:
7274		speed = "40 G";
7275		break;
7276	case I40E_LINK_SPEED_20GB:
7277		speed = "20 G";
7278		break;
7279	case I40E_LINK_SPEED_25GB:
7280		speed = "25 G";
7281		break;
7282	case I40E_LINK_SPEED_10GB:
7283		speed = "10 G";
7284		break;
7285	case I40E_LINK_SPEED_5GB:
7286		speed = "5 G";
7287		break;
7288	case I40E_LINK_SPEED_2_5GB:
7289		speed = "2.5 G";
7290		break;
7291	case I40E_LINK_SPEED_1GB:
7292		speed = "1000 M";
7293		break;
7294	case I40E_LINK_SPEED_100MB:
7295		speed = "100 M";
7296		break;
7297	default:
7298		break;
7299	}
7300
7301	switch (pf->hw.fc.current_mode) {
7302	case I40E_FC_FULL:
7303		fc = "RX/TX";
7304		break;
7305	case I40E_FC_TX_PAUSE:
7306		fc = "TX";
7307		break;
7308	case I40E_FC_RX_PAUSE:
7309		fc = "RX";
7310		break;
7311	default:
7312		fc = "None";
7313		break;
7314	}
7315
7316	if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
7317		req_fec = "None";
7318		fec = "None";
7319		an = "False";
7320
7321		if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
7322			an = "True";
7323
7324		if (pf->hw.phy.link_info.fec_info &
7325		    I40E_AQ_CONFIG_FEC_KR_ENA)
7326			fec = "CL74 FC-FEC/BASE-R";
7327		else if (pf->hw.phy.link_info.fec_info &
7328			 I40E_AQ_CONFIG_FEC_RS_ENA)
7329			fec = "CL108 RS-FEC";
7330
7331		/* 'CL108 RS-FEC' should be displayed when RS is requested, or
7332		 * both RS and FC are requested
7333		 */
7334		if (vsi->back->hw.phy.link_info.req_fec_info &
7335		    (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
7336			if (vsi->back->hw.phy.link_info.req_fec_info &
7337			    I40E_AQ_REQUEST_FEC_RS)
7338				req_fec = "CL108 RS-FEC";
7339			else
7340				req_fec = "CL74 FC-FEC/BASE-R";
7341		}
7342		netdev_info(vsi->netdev,
7343			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
7344			    speed, req_fec, fec, an, fc);
7345	} else if (pf->hw.device_id == I40E_DEV_ID_KX_X722) {
7346		req_fec = "None";
7347		fec = "None";
7348		an = "False";
7349
7350		if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
7351			an = "True";
7352
7353		if (pf->hw.phy.link_info.fec_info &
7354		    I40E_AQ_CONFIG_FEC_KR_ENA)
7355			fec = "CL74 FC-FEC/BASE-R";
7356
7357		if (pf->hw.phy.link_info.req_fec_info &
7358		    I40E_AQ_REQUEST_FEC_KR)
7359			req_fec = "CL74 FC-FEC/BASE-R";
7360
7361		netdev_info(vsi->netdev,
7362			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
7363			    speed, req_fec, fec, an, fc);
7364	} else {
7365		netdev_info(vsi->netdev,
7366			    "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n",
7367			    speed, fc);
7368	}
7369
7370}
7371
7372/**
7373 * i40e_up_complete - Finish the last steps of bringing up a connection
7374 * @vsi: the VSI being configured
7375 **/
7376static int i40e_up_complete(struct i40e_vsi *vsi)
7377{
7378	struct i40e_pf *pf = vsi->back;
7379	int err;
7380
7381	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
7382		i40e_vsi_configure_msix(vsi);
7383	else
7384		i40e_configure_msi_and_legacy(vsi);
7385
7386	/* start rings */
7387	err = i40e_vsi_start_rings(vsi);
7388	if (err)
7389		return err;
7390
7391	clear_bit(__I40E_VSI_DOWN, vsi->state);
7392	i40e_napi_enable_all(vsi);
7393	i40e_vsi_enable_irq(vsi);
7394
7395	if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
7396	    (vsi->netdev)) {
7397		i40e_print_link_message(vsi, true);
7398		netif_tx_start_all_queues(vsi->netdev);
7399		netif_carrier_on(vsi->netdev);
7400	}
7401
7402	/* replay FDIR SB filters */
7403	if (vsi->type == I40E_VSI_FDIR) {
7404		/* reset fd counters */
7405		pf->fd_add_err = 0;
7406		pf->fd_atr_cnt = 0;
7407		i40e_fdir_filter_restore(vsi);
7408	}
7409
7410	/* On the next run of the service_task, notify any clients of the new
7411	 * opened netdev
7412	 */
7413	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
7414	i40e_service_event_schedule(pf);
7415
7416	return 0;
7417}
7418
7419/**
7420 * i40e_vsi_reinit_locked - Reset the VSI
7421 * @vsi: the VSI being configured
7422 *
7423 * Rebuild the ring structs after some configuration
7424 * has changed, e.g. MTU size.
7425 **/
7426static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
7427{
7428	struct i40e_pf *pf = vsi->back;
7429
7430	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
7431		usleep_range(1000, 2000);
7432	i40e_down(vsi);
7433
7434	i40e_up(vsi);
7435	clear_bit(__I40E_CONFIG_BUSY, pf->state);
7436}
7437
7438/**
7439 * i40e_force_link_state - Force the link status
7440 * @pf: board private structure
7441 * @is_up: whether the link state should be forced up or down
7442 **/
7443static int i40e_force_link_state(struct i40e_pf *pf, bool is_up)
7444{
7445	struct i40e_aq_get_phy_abilities_resp abilities;
7446	struct i40e_aq_set_phy_config config = {0};
7447	bool non_zero_phy_type = is_up;
7448	struct i40e_hw *hw = &pf->hw;
7449	u64 mask;
7450	u8 speed;
7451	int err;
7452
7453	/* Card might've been put in an unstable state by other drivers
7454	 * and applications, which causes incorrect speed values being
7455	 * set on startup. In order to clear speed registers, we call
7456	 * get_phy_capabilities twice, once to get initial state of
7457	 * available speeds, and once to get current PHY config.
7458	 */
7459	err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities,
7460					   NULL);
7461	if (err) {
7462		dev_err(&pf->pdev->dev,
7463			"failed to get phy cap., ret =  %pe last_status =  %s\n",
7464			ERR_PTR(err),
7465			i40e_aq_str(hw, hw->aq.asq_last_status));
7466		return err;
7467	}
7468	speed = abilities.link_speed;
7469
7470	/* Get the current phy config */
7471	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
7472					   NULL);
7473	if (err) {
7474		dev_err(&pf->pdev->dev,
7475			"failed to get phy cap., ret =  %pe last_status =  %s\n",
7476			ERR_PTR(err),
7477			i40e_aq_str(hw, hw->aq.asq_last_status));
7478		return err;
7479	}
7480
7481	/* If link needs to go up, but was not forced to go down,
7482	 * and its speed values are OK, no need for a flap
7483	 * if non_zero_phy_type was set, still need to force up
7484	 */
7485	if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)
7486		non_zero_phy_type = true;
7487	else if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0)
7488		return 0;
7489
7490	/* To force link we need to set bits for all supported PHY types,
7491	 * but there are now more than 32, so we need to split the bitmap
7492	 * across two fields.
7493	 */
7494	mask = I40E_PHY_TYPES_BITMASK;
7495	config.phy_type =
7496		non_zero_phy_type ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
7497	config.phy_type_ext =
7498		non_zero_phy_type ? (u8)((mask >> 32) & 0xff) : 0;
7499	/* Copy the old settings, except of phy_type */
7500	config.abilities = abilities.abilities;
7501	if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED) {
7502		if (is_up)
7503			config.abilities |= I40E_AQ_PHY_ENABLE_LINK;
7504		else
7505			config.abilities &= ~(I40E_AQ_PHY_ENABLE_LINK);
7506	}
7507	if (abilities.link_speed != 0)
7508		config.link_speed = abilities.link_speed;
7509	else
7510		config.link_speed = speed;
7511	config.eee_capability = abilities.eee_capability;
7512	config.eeer = abilities.eeer_val;
7513	config.low_power_ctrl = abilities.d3_lpan;
7514	config.fec_config = abilities.fec_cfg_curr_mod_ext_info &
7515			    I40E_AQ_PHY_FEC_CONFIG_MASK;
7516	err = i40e_aq_set_phy_config(hw, &config, NULL);
7517
7518	if (err) {
7519		dev_err(&pf->pdev->dev,
7520			"set phy config ret =  %pe last_status =  %s\n",
7521			ERR_PTR(err),
7522			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
7523		return err;
7524	}
7525
7526	/* Update the link info */
7527	err = i40e_update_link_info(hw);
7528	if (err) {
7529		/* Wait a little bit (on 40G cards it sometimes takes a really
7530		 * long time for link to come back from the atomic reset)
7531		 * and try once more
7532		 */
7533		msleep(1000);
7534		i40e_update_link_info(hw);
7535	}
7536
7537	i40e_aq_set_link_restart_an(hw, is_up, NULL);
7538
7539	return 0;
7540}
7541
7542/**
7543 * i40e_up - Bring the connection back up after being down
7544 * @vsi: the VSI being configured
7545 **/
7546int i40e_up(struct i40e_vsi *vsi)
7547{
7548	int err;
7549
7550	if (vsi->type == I40E_VSI_MAIN &&
7551	    (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED ||
7552	     vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED))
7553		i40e_force_link_state(vsi->back, true);
7554
7555	err = i40e_vsi_configure(vsi);
7556	if (!err)
7557		err = i40e_up_complete(vsi);
7558
7559	return err;
7560}
7561
7562/**
7563 * i40e_down - Shutdown the connection processing
7564 * @vsi: the VSI being stopped
7565 **/
7566void i40e_down(struct i40e_vsi *vsi)
7567{
7568	int i;
7569
7570	/* It is assumed that the caller of this function
7571	 * sets the vsi->state __I40E_VSI_DOWN bit.
7572	 */
7573	if (vsi->netdev) {
7574		netif_carrier_off(vsi->netdev);
7575		netif_tx_disable(vsi->netdev);
7576	}
7577	i40e_vsi_disable_irq(vsi);
7578	i40e_vsi_stop_rings(vsi);
7579	if (vsi->type == I40E_VSI_MAIN &&
7580	   (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED ||
7581	    vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED))
7582		i40e_force_link_state(vsi->back, false);
7583	i40e_napi_disable_all(vsi);
7584
7585	for (i = 0; i < vsi->num_queue_pairs; i++) {
7586		i40e_clean_tx_ring(vsi->tx_rings[i]);
7587		if (i40e_enabled_xdp_vsi(vsi)) {
7588			/* Make sure that in-progress ndo_xdp_xmit and
7589			 * ndo_xsk_wakeup calls are completed.
7590			 */
7591			synchronize_rcu();
7592			i40e_clean_tx_ring(vsi->xdp_rings[i]);
7593		}
7594		i40e_clean_rx_ring(vsi->rx_rings[i]);
7595	}
7596
7597}
7598
7599/**
7600 * i40e_validate_mqprio_qopt- validate queue mapping info
7601 * @vsi: the VSI being configured
7602 * @mqprio_qopt: queue parametrs
7603 **/
7604static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
7605				     struct tc_mqprio_qopt_offload *mqprio_qopt)
7606{
7607	u64 sum_max_rate = 0;
7608	u64 max_rate = 0;
7609	int i;
7610
7611	if (mqprio_qopt->qopt.offset[0] != 0 ||
7612	    mqprio_qopt->qopt.num_tc < 1 ||
7613	    mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS)
7614		return -EINVAL;
7615	for (i = 0; ; i++) {
7616		if (!mqprio_qopt->qopt.count[i])
7617			return -EINVAL;
7618		if (mqprio_qopt->min_rate[i]) {
7619			dev_err(&vsi->back->pdev->dev,
7620				"Invalid min tx rate (greater than 0) specified\n");
7621			return -EINVAL;
7622		}
7623		max_rate = mqprio_qopt->max_rate[i];
7624		do_div(max_rate, I40E_BW_MBPS_DIVISOR);
7625		sum_max_rate += max_rate;
7626
7627		if (i >= mqprio_qopt->qopt.num_tc - 1)
7628			break;
7629		if (mqprio_qopt->qopt.offset[i + 1] !=
7630		    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
7631			return -EINVAL;
7632	}
7633	if (vsi->num_queue_pairs <
7634	    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
7635		dev_err(&vsi->back->pdev->dev,
7636			"Failed to create traffic channel, insufficient number of queues.\n");
7637		return -EINVAL;
7638	}
7639	if (sum_max_rate > i40e_get_link_speed(vsi)) {
7640		dev_err(&vsi->back->pdev->dev,
7641			"Invalid max tx rate specified\n");
7642		return -EINVAL;
7643	}
7644	return 0;
7645}
7646
7647/**
7648 * i40e_vsi_set_default_tc_config - set default values for tc configuration
7649 * @vsi: the VSI being configured
7650 **/
7651static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
7652{
7653	u16 qcount;
7654	int i;
7655
7656	/* Only TC0 is enabled */
7657	vsi->tc_config.numtc = 1;
7658	vsi->tc_config.enabled_tc = 1;
7659	qcount = min_t(int, vsi->alloc_queue_pairs,
7660		       i40e_pf_get_max_q_per_tc(vsi->back));
7661	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
7662		/* For the TC that is not enabled set the offset to default
7663		 * queue and allocate one queue for the given TC.
7664		 */
7665		vsi->tc_config.tc_info[i].qoffset = 0;
7666		if (i == 0)
7667			vsi->tc_config.tc_info[i].qcount = qcount;
7668		else
7669			vsi->tc_config.tc_info[i].qcount = 1;
7670		vsi->tc_config.tc_info[i].netdev_tc = 0;
7671	}
7672}
7673
7674/**
7675 * i40e_del_macvlan_filter
7676 * @hw: pointer to the HW structure
7677 * @seid: seid of the channel VSI
7678 * @macaddr: the mac address to apply as a filter
7679 * @aq_err: store the admin Q error
7680 *
7681 * This function deletes a mac filter on the channel VSI which serves as the
7682 * macvlan. Returns 0 on success.
7683 **/
7684static int i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid,
7685				   const u8 *macaddr, int *aq_err)
7686{
7687	struct i40e_aqc_remove_macvlan_element_data element;
7688	int status;
7689
7690	memset(&element, 0, sizeof(element));
7691	ether_addr_copy(element.mac_addr, macaddr);
7692	element.vlan_tag = 0;
7693	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
7694	status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL);
7695	*aq_err = hw->aq.asq_last_status;
7696
7697	return status;
7698}
7699
7700/**
7701 * i40e_add_macvlan_filter
7702 * @hw: pointer to the HW structure
7703 * @seid: seid of the channel VSI
7704 * @macaddr: the mac address to apply as a filter
7705 * @aq_err: store the admin Q error
7706 *
7707 * This function adds a mac filter on the channel VSI which serves as the
7708 * macvlan. Returns 0 on success.
7709 **/
7710static int i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid,
7711				   const u8 *macaddr, int *aq_err)
7712{
7713	struct i40e_aqc_add_macvlan_element_data element;
7714	u16 cmd_flags = 0;
7715	int status;
7716
7717	ether_addr_copy(element.mac_addr, macaddr);
7718	element.vlan_tag = 0;
7719	element.queue_number = 0;
7720	element.match_method = I40E_AQC_MM_ERR_NO_RES;
7721	cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
7722	element.flags = cpu_to_le16(cmd_flags);
7723	status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL);
7724	*aq_err = hw->aq.asq_last_status;
7725
7726	return status;
7727}
7728
7729/**
7730 * i40e_reset_ch_rings - Reset the queue contexts in a channel
7731 * @vsi: the VSI we want to access
7732 * @ch: the channel we want to access
7733 */
7734static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch)
7735{
7736	struct i40e_ring *tx_ring, *rx_ring;
7737	u16 pf_q;
7738	int i;
7739
7740	for (i = 0; i < ch->num_queue_pairs; i++) {
7741		pf_q = ch->base_queue + i;
7742		tx_ring = vsi->tx_rings[pf_q];
7743		tx_ring->ch = NULL;
7744		rx_ring = vsi->rx_rings[pf_q];
7745		rx_ring->ch = NULL;
7746	}
7747}
7748
7749/**
7750 * i40e_free_macvlan_channels
7751 * @vsi: the VSI we want to access
7752 *
7753 * This function frees the Qs of the channel VSI from
7754 * the stack and also deletes the channel VSIs which
7755 * serve as macvlans.
7756 */
7757static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
7758{
7759	struct i40e_channel *ch, *ch_tmp;
7760	int ret;
7761
7762	if (list_empty(&vsi->macvlan_list))
7763		return;
7764
7765	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
7766		struct i40e_vsi *parent_vsi;
7767
7768		if (i40e_is_channel_macvlan(ch)) {
7769			i40e_reset_ch_rings(vsi, ch);
7770			clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
7771			netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev);
7772			netdev_set_sb_channel(ch->fwd->netdev, 0);
7773			kfree(ch->fwd);
7774			ch->fwd = NULL;
7775		}
7776
7777		list_del(&ch->list);
7778		parent_vsi = ch->parent_vsi;
7779		if (!parent_vsi || !ch->initialized) {
7780			kfree(ch);
7781			continue;
7782		}
7783
7784		/* remove the VSI */
7785		ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
7786					     NULL);
7787		if (ret)
7788			dev_err(&vsi->back->pdev->dev,
7789				"unable to remove channel (%d) for parent VSI(%d)\n",
7790				ch->seid, parent_vsi->seid);
7791		kfree(ch);
7792	}
7793	vsi->macvlan_cnt = 0;
7794}
7795
7796/**
7797 * i40e_fwd_ring_up - bring the macvlan device up
7798 * @vsi: the VSI we want to access
7799 * @vdev: macvlan netdevice
7800 * @fwd: the private fwd structure
7801 */
7802static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
7803			    struct i40e_fwd_adapter *fwd)
7804{
7805	struct i40e_channel *ch = NULL, *ch_tmp, *iter;
7806	int ret = 0, num_tc = 1,  i, aq_err;
7807	struct i40e_pf *pf = vsi->back;
7808	struct i40e_hw *hw = &pf->hw;
7809
7810	/* Go through the list and find an available channel */
7811	list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) {
7812		if (!i40e_is_channel_macvlan(iter)) {
7813			iter->fwd = fwd;
7814			/* record configuration for macvlan interface in vdev */
7815			for (i = 0; i < num_tc; i++)
7816				netdev_bind_sb_channel_queue(vsi->netdev, vdev,
7817							     i,
7818							     iter->num_queue_pairs,
7819							     iter->base_queue);
7820			for (i = 0; i < iter->num_queue_pairs; i++) {
7821				struct i40e_ring *tx_ring, *rx_ring;
7822				u16 pf_q;
7823
7824				pf_q = iter->base_queue + i;
7825
7826				/* Get to TX ring ptr */
7827				tx_ring = vsi->tx_rings[pf_q];
7828				tx_ring->ch = iter;
7829
7830				/* Get the RX ring ptr */
7831				rx_ring = vsi->rx_rings[pf_q];
7832				rx_ring->ch = iter;
7833			}
7834			ch = iter;
7835			break;
7836		}
7837	}
7838
7839	if (!ch)
7840		return -EINVAL;
7841
7842	/* Guarantee all rings are updated before we update the
7843	 * MAC address filter.
7844	 */
7845	wmb();
7846
7847	/* Add a mac filter */
7848	ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err);
7849	if (ret) {
7850		/* if we cannot add the MAC rule then disable the offload */
7851		macvlan_release_l2fw_offload(vdev);
7852		for (i = 0; i < ch->num_queue_pairs; i++) {
7853			struct i40e_ring *rx_ring;
7854			u16 pf_q;
7855
7856			pf_q = ch->base_queue + i;
7857			rx_ring = vsi->rx_rings[pf_q];
7858			rx_ring->netdev = NULL;
7859		}
7860		dev_info(&pf->pdev->dev,
7861			 "Error adding mac filter on macvlan err %pe, aq_err %s\n",
7862			  ERR_PTR(ret),
7863			  i40e_aq_str(hw, aq_err));
7864		netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n");
7865	}
7866
7867	return ret;
7868}
7869
7870/**
7871 * i40e_setup_macvlans - create the channels which will be macvlans
7872 * @vsi: the VSI we want to access
7873 * @macvlan_cnt: no. of macvlans to be setup
7874 * @qcnt: no. of Qs per macvlan
7875 * @vdev: macvlan netdevice
7876 */
7877static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt,
7878			       struct net_device *vdev)
7879{
7880	struct i40e_pf *pf = vsi->back;
7881	struct i40e_hw *hw = &pf->hw;
7882	struct i40e_vsi_context ctxt;
7883	u16 sections, qmap, num_qps;
7884	struct i40e_channel *ch;
7885	int i, pow, ret = 0;
7886	u8 offset = 0;
7887
7888	if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt)
7889		return -EINVAL;
7890
7891	num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt);
7892
7893	/* find the next higher power-of-2 of num queue pairs */
7894	pow = fls(roundup_pow_of_two(num_qps) - 1);
7895
7896	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
7897		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
7898
7899	/* Setup context bits for the main VSI */
7900	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
7901	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
7902	memset(&ctxt, 0, sizeof(ctxt));
7903	ctxt.seid = vsi->seid;
7904	ctxt.pf_num = vsi->back->hw.pf_id;
7905	ctxt.vf_num = 0;
7906	ctxt.uplink_seid = vsi->uplink_seid;
7907	ctxt.info = vsi->info;
7908	ctxt.info.tc_mapping[0] = cpu_to_le16(qmap);
7909	ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
7910	ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
7911	ctxt.info.valid_sections |= cpu_to_le16(sections);
7912
7913	/* Reconfigure RSS for main VSI with new max queue count */
7914	vsi->rss_size = max_t(u16, num_qps, qcnt);
7915	ret = i40e_vsi_config_rss(vsi);
7916	if (ret) {
7917		dev_info(&pf->pdev->dev,
7918			 "Failed to reconfig RSS for num_queues (%u)\n",
7919			 vsi->rss_size);
7920		return ret;
7921	}
7922	vsi->reconfig_rss = true;
7923	dev_dbg(&vsi->back->pdev->dev,
7924		"Reconfigured RSS with num_queues (%u)\n", vsi->rss_size);
7925	vsi->next_base_queue = num_qps;
7926	vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps;
7927
7928	/* Update the VSI after updating the VSI queue-mapping
7929	 * information
7930	 */
7931	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
7932	if (ret) {
7933		dev_info(&pf->pdev->dev,
7934			 "Update vsi tc config failed, err %pe aq_err %s\n",
7935			 ERR_PTR(ret),
7936			 i40e_aq_str(hw, hw->aq.asq_last_status));
7937		return ret;
7938	}
7939	/* update the local VSI info with updated queue map */
7940	i40e_vsi_update_queue_map(vsi, &ctxt);
7941	vsi->info.valid_sections = 0;
7942
7943	/* Create channels for macvlans */
7944	INIT_LIST_HEAD(&vsi->macvlan_list);
7945	for (i = 0; i < macvlan_cnt; i++) {
7946		ch = kzalloc(sizeof(*ch), GFP_KERNEL);
7947		if (!ch) {
7948			ret = -ENOMEM;
7949			goto err_free;
7950		}
7951		INIT_LIST_HEAD(&ch->list);
7952		ch->num_queue_pairs = qcnt;
7953		if (!i40e_setup_channel(pf, vsi, ch)) {
7954			ret = -EINVAL;
7955			kfree(ch);
7956			goto err_free;
7957		}
7958		ch->parent_vsi = vsi;
7959		vsi->cnt_q_avail -= ch->num_queue_pairs;
7960		vsi->macvlan_cnt++;
7961		list_add_tail(&ch->list, &vsi->macvlan_list);
7962	}
7963
7964	return ret;
7965
7966err_free:
7967	dev_info(&pf->pdev->dev, "Failed to setup macvlans\n");
7968	i40e_free_macvlan_channels(vsi);
7969
7970	return ret;
7971}
7972
7973/**
7974 * i40e_fwd_add - configure macvlans
7975 * @netdev: net device to configure
7976 * @vdev: macvlan netdevice
7977 **/
7978static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
7979{
7980	struct i40e_netdev_priv *np = netdev_priv(netdev);
7981	u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors;
7982	struct i40e_vsi *vsi = np->vsi;
7983	struct i40e_pf *pf = vsi->back;
7984	struct i40e_fwd_adapter *fwd;
7985	int avail_macvlan, ret;
7986
7987	if ((pf->flags & I40E_FLAG_DCB_ENABLED)) {
7988		netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
7989		return ERR_PTR(-EINVAL);
7990	}
7991	if (i40e_is_tc_mqprio_enabled(pf)) {
7992		netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n");
7993		return ERR_PTR(-EINVAL);
7994	}
7995	if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) {
7996		netdev_info(netdev, "Not enough vectors available to support macvlans\n");
7997		return ERR_PTR(-EINVAL);
7998	}
7999
8000	/* The macvlan device has to be a single Q device so that the
8001	 * tc_to_txq field can be reused to pick the tx queue.
8002	 */
8003	if (netif_is_multiqueue(vdev))
8004		return ERR_PTR(-ERANGE);
8005
8006	if (!vsi->macvlan_cnt) {
8007		/* reserve bit 0 for the pf device */
8008		set_bit(0, vsi->fwd_bitmask);
8009
8010		/* Try to reserve as many queues as possible for macvlans. First
8011		 * reserve 3/4th of max vectors, then half, then quarter and
8012		 * calculate Qs per macvlan as you go
8013		 */
8014		vectors = pf->num_lan_msix;
8015		if (vectors <= I40E_MAX_MACVLANS && vectors > 64) {
8016			/* allocate 4 Qs per macvlan and 32 Qs to the PF*/
8017			q_per_macvlan = 4;
8018			macvlan_cnt = (vectors - 32) / 4;
8019		} else if (vectors <= 64 && vectors > 32) {
8020			/* allocate 2 Qs per macvlan and 16 Qs to the PF*/
8021			q_per_macvlan = 2;
8022			macvlan_cnt = (vectors - 16) / 2;
8023		} else if (vectors <= 32 && vectors > 16) {
8024			/* allocate 1 Q per macvlan and 16 Qs to the PF*/
8025			q_per_macvlan = 1;
8026			macvlan_cnt = vectors - 16;
8027		} else if (vectors <= 16 && vectors > 8) {
8028			/* allocate 1 Q per macvlan and 8 Qs to the PF */
8029			q_per_macvlan = 1;
8030			macvlan_cnt = vectors - 8;
8031		} else {
8032			/* allocate 1 Q per macvlan and 1 Q to the PF */
8033			q_per_macvlan = 1;
8034			macvlan_cnt = vectors - 1;
8035		}
8036
8037		if (macvlan_cnt == 0)
8038			return ERR_PTR(-EBUSY);
8039
8040		/* Quiesce VSI queues */
8041		i40e_quiesce_vsi(vsi);
8042
8043		/* sets up the macvlans but does not "enable" them */
8044		ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan,
8045					  vdev);
8046		if (ret)
8047			return ERR_PTR(ret);
8048
8049		/* Unquiesce VSI */
8050		i40e_unquiesce_vsi(vsi);
8051	}
8052	avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask,
8053					    vsi->macvlan_cnt);
8054	if (avail_macvlan >= I40E_MAX_MACVLANS)
8055		return ERR_PTR(-EBUSY);
8056
8057	/* create the fwd struct */
8058	fwd = kzalloc(sizeof(*fwd), GFP_KERNEL);
8059	if (!fwd)
8060		return ERR_PTR(-ENOMEM);
8061
8062	set_bit(avail_macvlan, vsi->fwd_bitmask);
8063	fwd->bit_no = avail_macvlan;
8064	netdev_set_sb_channel(vdev, avail_macvlan);
8065	fwd->netdev = vdev;
8066
8067	if (!netif_running(netdev))
8068		return fwd;
8069
8070	/* Set fwd ring up */
8071	ret = i40e_fwd_ring_up(vsi, vdev, fwd);
8072	if (ret) {
8073		/* unbind the queues and drop the subordinate channel config */
8074		netdev_unbind_sb_channel(netdev, vdev);
8075		netdev_set_sb_channel(vdev, 0);
8076
8077		kfree(fwd);
8078		return ERR_PTR(-EINVAL);
8079	}
8080
8081	return fwd;
8082}
8083
8084/**
8085 * i40e_del_all_macvlans - Delete all the mac filters on the channels
8086 * @vsi: the VSI we want to access
8087 */
8088static void i40e_del_all_macvlans(struct i40e_vsi *vsi)
8089{
8090	struct i40e_channel *ch, *ch_tmp;
8091	struct i40e_pf *pf = vsi->back;
8092	struct i40e_hw *hw = &pf->hw;
8093	int aq_err, ret = 0;
8094
8095	if (list_empty(&vsi->macvlan_list))
8096		return;
8097
8098	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
8099		if (i40e_is_channel_macvlan(ch)) {
8100			ret = i40e_del_macvlan_filter(hw, ch->seid,
8101						      i40e_channel_mac(ch),
8102						      &aq_err);
8103			if (!ret) {
8104				/* Reset queue contexts */
8105				i40e_reset_ch_rings(vsi, ch);
8106				clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
8107				netdev_unbind_sb_channel(vsi->netdev,
8108							 ch->fwd->netdev);
8109				netdev_set_sb_channel(ch->fwd->netdev, 0);
8110				kfree(ch->fwd);
8111				ch->fwd = NULL;
8112			}
8113		}
8114	}
8115}
8116
8117/**
8118 * i40e_fwd_del - delete macvlan interfaces
8119 * @netdev: net device to configure
8120 * @vdev: macvlan netdevice
8121 */
8122static void i40e_fwd_del(struct net_device *netdev, void *vdev)
8123{
8124	struct i40e_netdev_priv *np = netdev_priv(netdev);
8125	struct i40e_fwd_adapter *fwd = vdev;
8126	struct i40e_channel *ch, *ch_tmp;
8127	struct i40e_vsi *vsi = np->vsi;
8128	struct i40e_pf *pf = vsi->back;
8129	struct i40e_hw *hw = &pf->hw;
8130	int aq_err, ret = 0;
8131
8132	/* Find the channel associated with the macvlan and del mac filter */
8133	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
8134		if (i40e_is_channel_macvlan(ch) &&
8135		    ether_addr_equal(i40e_channel_mac(ch),
8136				     fwd->netdev->dev_addr)) {
8137			ret = i40e_del_macvlan_filter(hw, ch->seid,
8138						      i40e_channel_mac(ch),
8139						      &aq_err);
8140			if (!ret) {
8141				/* Reset queue contexts */
8142				i40e_reset_ch_rings(vsi, ch);
8143				clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
8144				netdev_unbind_sb_channel(netdev, fwd->netdev);
8145				netdev_set_sb_channel(fwd->netdev, 0);
8146				kfree(ch->fwd);
8147				ch->fwd = NULL;
8148			} else {
8149				dev_info(&pf->pdev->dev,
8150					 "Error deleting mac filter on macvlan err %pe, aq_err %s\n",
8151					  ERR_PTR(ret),
8152					  i40e_aq_str(hw, aq_err));
8153			}
8154			break;
8155		}
8156	}
8157}
8158
8159/**
8160 * i40e_setup_tc - configure multiple traffic classes
8161 * @netdev: net device to configure
8162 * @type_data: tc offload data
8163 **/
8164static int i40e_setup_tc(struct net_device *netdev, void *type_data)
8165{
8166	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
8167	struct i40e_netdev_priv *np = netdev_priv(netdev);
8168	struct i40e_vsi *vsi = np->vsi;
8169	struct i40e_pf *pf = vsi->back;
8170	u8 enabled_tc = 0, num_tc, hw;
8171	bool need_reset = false;
8172	int old_queue_pairs;
8173	int ret = -EINVAL;
8174	u16 mode;
8175	int i;
8176
8177	old_queue_pairs = vsi->num_queue_pairs;
8178	num_tc = mqprio_qopt->qopt.num_tc;
8179	hw = mqprio_qopt->qopt.hw;
8180	mode = mqprio_qopt->mode;
8181	if (!hw) {
8182		pf->flags &= ~I40E_FLAG_TC_MQPRIO;
8183		memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
8184		goto config_tc;
8185	}
8186
8187	/* Check if MFP enabled */
8188	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
8189		netdev_info(netdev,
8190			    "Configuring TC not supported in MFP mode\n");
8191		return ret;
8192	}
8193	switch (mode) {
8194	case TC_MQPRIO_MODE_DCB:
8195		pf->flags &= ~I40E_FLAG_TC_MQPRIO;
8196
8197		/* Check if DCB enabled to continue */
8198		if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
8199			netdev_info(netdev,
8200				    "DCB is not enabled for adapter\n");
8201			return ret;
8202		}
8203
8204		/* Check whether tc count is within enabled limit */
8205		if (num_tc > i40e_pf_get_num_tc(pf)) {
8206			netdev_info(netdev,
8207				    "TC count greater than enabled on link for adapter\n");
8208			return ret;
8209		}
8210		break;
8211	case TC_MQPRIO_MODE_CHANNEL:
8212		if (pf->flags & I40E_FLAG_DCB_ENABLED) {
8213			netdev_info(netdev,
8214				    "Full offload of TC Mqprio options is not supported when DCB is enabled\n");
8215			return ret;
8216		}
8217		if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
8218			return ret;
8219		ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
8220		if (ret)
8221			return ret;
8222		memcpy(&vsi->mqprio_qopt, mqprio_qopt,
8223		       sizeof(*mqprio_qopt));
8224		pf->flags |= I40E_FLAG_TC_MQPRIO;
8225		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
8226		break;
8227	default:
8228		return -EINVAL;
8229	}
8230
8231config_tc:
8232	/* Generate TC map for number of tc requested */
8233	for (i = 0; i < num_tc; i++)
8234		enabled_tc |= BIT(i);
8235
8236	/* Requesting same TC configuration as already enabled */
8237	if (enabled_tc == vsi->tc_config.enabled_tc &&
8238	    mode != TC_MQPRIO_MODE_CHANNEL)
8239		return 0;
8240
8241	/* Quiesce VSI queues */
8242	i40e_quiesce_vsi(vsi);
8243
8244	if (!hw && !i40e_is_tc_mqprio_enabled(pf))
8245		i40e_remove_queue_channels(vsi);
8246
8247	/* Configure VSI for enabled TCs */
8248	ret = i40e_vsi_config_tc(vsi, enabled_tc);
8249	if (ret) {
8250		netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
8251			    vsi->seid);
8252		need_reset = true;
8253		goto exit;
8254	} else if (enabled_tc &&
8255		   (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) {
8256		netdev_info(netdev,
8257			    "Failed to create channel. Override queues (%u) not power of 2\n",
8258			    vsi->tc_config.tc_info[0].qcount);
8259		ret = -EINVAL;
8260		need_reset = true;
8261		goto exit;
8262	}
8263
8264	dev_info(&vsi->back->pdev->dev,
8265		 "Setup channel (id:%u) utilizing num_queues %d\n",
8266		 vsi->seid, vsi->tc_config.tc_info[0].qcount);
8267
8268	if (i40e_is_tc_mqprio_enabled(pf)) {
8269		if (vsi->mqprio_qopt.max_rate[0]) {
8270			u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
8271						  vsi->mqprio_qopt.max_rate[0]);
8272
8273			ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
8274			if (!ret) {
8275				u64 credits = max_tx_rate;
8276
8277				do_div(credits, I40E_BW_CREDIT_DIVISOR);
8278				dev_dbg(&vsi->back->pdev->dev,
8279					"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
8280					max_tx_rate,
8281					credits,
8282					vsi->seid);
8283			} else {
8284				need_reset = true;
8285				goto exit;
8286			}
8287		}
8288		ret = i40e_configure_queue_channels(vsi);
8289		if (ret) {
8290			vsi->num_queue_pairs = old_queue_pairs;
8291			netdev_info(netdev,
8292				    "Failed configuring queue channels\n");
8293			need_reset = true;
8294			goto exit;
8295		}
8296	}
8297
8298exit:
8299	/* Reset the configuration data to defaults, only TC0 is enabled */
8300	if (need_reset) {
8301		i40e_vsi_set_default_tc_config(vsi);
8302		need_reset = false;
8303	}
8304
8305	/* Unquiesce VSI */
8306	i40e_unquiesce_vsi(vsi);
8307	return ret;
8308}
8309
8310/**
8311 * i40e_set_cld_element - sets cloud filter element data
8312 * @filter: cloud filter rule
8313 * @cld: ptr to cloud filter element data
8314 *
8315 * This is helper function to copy data into cloud filter element
8316 **/
8317static inline void
8318i40e_set_cld_element(struct i40e_cloud_filter *filter,
8319		     struct i40e_aqc_cloud_filters_element_data *cld)
8320{
8321	u32 ipa;
8322	int i;
8323
8324	memset(cld, 0, sizeof(*cld));
8325	ether_addr_copy(cld->outer_mac, filter->dst_mac);
8326	ether_addr_copy(cld->inner_mac, filter->src_mac);
8327
8328	if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6)
8329		return;
8330
8331	if (filter->n_proto == ETH_P_IPV6) {
8332#define IPV6_MAX_INDEX	(ARRAY_SIZE(filter->dst_ipv6) - 1)
8333		for (i = 0; i < ARRAY_SIZE(filter->dst_ipv6); i++) {
8334			ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]);
8335
8336			*(__le32 *)&cld->ipaddr.raw_v6.data[i * 2] = cpu_to_le32(ipa);
8337		}
8338	} else {
8339		ipa = be32_to_cpu(filter->dst_ipv4);
8340
8341		memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa));
8342	}
8343
8344	cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id));
8345
8346	/* tenant_id is not supported by FW now, once the support is enabled
8347	 * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id)
8348	 */
8349	if (filter->tenant_id)
8350		return;
8351}
8352
8353/**
8354 * i40e_add_del_cloud_filter - Add/del cloud filter
8355 * @vsi: pointer to VSI
8356 * @filter: cloud filter rule
8357 * @add: if true, add, if false, delete
8358 *
8359 * Add or delete a cloud filter for a specific flow spec.
8360 * Returns 0 if the filter were successfully added.
8361 **/
8362int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
8363			      struct i40e_cloud_filter *filter, bool add)
8364{
8365	struct i40e_aqc_cloud_filters_element_data cld_filter;
8366	struct i40e_pf *pf = vsi->back;
8367	int ret;
8368	static const u16 flag_table[128] = {
8369		[I40E_CLOUD_FILTER_FLAGS_OMAC]  =
8370			I40E_AQC_ADD_CLOUD_FILTER_OMAC,
8371		[I40E_CLOUD_FILTER_FLAGS_IMAC]  =
8372			I40E_AQC_ADD_CLOUD_FILTER_IMAC,
8373		[I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN]  =
8374			I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN,
8375		[I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] =
8376			I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID,
8377		[I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] =
8378			I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC,
8379		[I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] =
8380			I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID,
8381		[I40E_CLOUD_FILTER_FLAGS_IIP] =
8382			I40E_AQC_ADD_CLOUD_FILTER_IIP,
8383	};
8384
8385	if (filter->flags >= ARRAY_SIZE(flag_table))
8386		return -EIO;
8387
8388	memset(&cld_filter, 0, sizeof(cld_filter));
8389
8390	/* copy element needed to add cloud filter from filter */
8391	i40e_set_cld_element(filter, &cld_filter);
8392
8393	if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE)
8394		cld_filter.flags = cpu_to_le16(filter->tunnel_type <<
8395					     I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT);
8396
8397	if (filter->n_proto == ETH_P_IPV6)
8398		cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
8399						I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
8400	else
8401		cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
8402						I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
8403
8404	if (add)
8405		ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid,
8406						&cld_filter, 1);
8407	else
8408		ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid,
8409						&cld_filter, 1);
8410	if (ret)
8411		dev_dbg(&pf->pdev->dev,
8412			"Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n",
8413			add ? "add" : "delete", filter->dst_port, ret,
8414			pf->hw.aq.asq_last_status);
8415	else
8416		dev_info(&pf->pdev->dev,
8417			 "%s cloud filter for VSI: %d\n",
8418			 add ? "Added" : "Deleted", filter->seid);
8419	return ret;
8420}
8421
8422/**
8423 * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf
8424 * @vsi: pointer to VSI
8425 * @filter: cloud filter rule
8426 * @add: if true, add, if false, delete
8427 *
8428 * Add or delete a cloud filter for a specific flow spec using big buffer.
8429 * Returns 0 if the filter were successfully added.
8430 **/
8431int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
8432				      struct i40e_cloud_filter *filter,
8433				      bool add)
8434{
8435	struct i40e_aqc_cloud_filters_element_bb cld_filter;
8436	struct i40e_pf *pf = vsi->back;
8437	int ret;
8438
8439	/* Both (src/dst) valid mac_addr are not supported */
8440	if ((is_valid_ether_addr(filter->dst_mac) &&
8441	     is_valid_ether_addr(filter->src_mac)) ||
8442	    (is_multicast_ether_addr(filter->dst_mac) &&
8443	     is_multicast_ether_addr(filter->src_mac)))
8444		return -EOPNOTSUPP;
8445
8446	/* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP
8447	 * ports are not supported via big buffer now.
8448	 */
8449	if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP)
8450		return -EOPNOTSUPP;
8451
8452	/* adding filter using src_port/src_ip is not supported at this stage */
8453	if (filter->src_port ||
8454	    (filter->src_ipv4 && filter->n_proto != ETH_P_IPV6) ||
8455	    !ipv6_addr_any(&filter->ip.v6.src_ip6))
8456		return -EOPNOTSUPP;
8457
8458	memset(&cld_filter, 0, sizeof(cld_filter));
8459
8460	/* copy element needed to add cloud filter from filter */
8461	i40e_set_cld_element(filter, &cld_filter.element);
8462
8463	if (is_valid_ether_addr(filter->dst_mac) ||
8464	    is_valid_ether_addr(filter->src_mac) ||
8465	    is_multicast_ether_addr(filter->dst_mac) ||
8466	    is_multicast_ether_addr(filter->src_mac)) {
8467		/* MAC + IP : unsupported mode */
8468		if (filter->dst_ipv4)
8469			return -EOPNOTSUPP;
8470
8471		/* since we validated that L4 port must be valid before
8472		 * we get here, start with respective "flags" value
8473		 * and update if vlan is present or not
8474		 */
8475		cld_filter.element.flags =
8476			cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT);
8477
8478		if (filter->vlan_id) {
8479			cld_filter.element.flags =
8480			cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT);
8481		}
8482
8483	} else if ((filter->dst_ipv4 && filter->n_proto != ETH_P_IPV6) ||
8484		   !ipv6_addr_any(&filter->ip.v6.dst_ip6)) {
8485		cld_filter.element.flags =
8486				cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT);
8487		if (filter->n_proto == ETH_P_IPV6)
8488			cld_filter.element.flags |=
8489				cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
8490		else
8491			cld_filter.element.flags |=
8492				cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
8493	} else {
8494		dev_err(&pf->pdev->dev,
8495			"either mac or ip has to be valid for cloud filter\n");
8496		return -EINVAL;
8497	}
8498
8499	/* Now copy L4 port in Byte 6..7 in general fields */
8500	cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] =
8501						be16_to_cpu(filter->dst_port);
8502
8503	if (add) {
8504		/* Validate current device switch mode, change if necessary */
8505		ret = i40e_validate_and_set_switch_mode(vsi);
8506		if (ret) {
8507			dev_err(&pf->pdev->dev,
8508				"failed to set switch mode, ret %d\n",
8509				ret);
8510			return ret;
8511		}
8512
8513		ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid,
8514						   &cld_filter, 1);
8515	} else {
8516		ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid,
8517						   &cld_filter, 1);
8518	}
8519
8520	if (ret)
8521		dev_dbg(&pf->pdev->dev,
8522			"Failed to %s cloud filter(big buffer) err %d aq_err %d\n",
8523			add ? "add" : "delete", ret, pf->hw.aq.asq_last_status);
8524	else
8525		dev_info(&pf->pdev->dev,
8526			 "%s cloud filter for VSI: %d, L4 port: %d\n",
8527			 add ? "add" : "delete", filter->seid,
8528			 ntohs(filter->dst_port));
8529	return ret;
8530}
8531
8532/**
8533 * i40e_parse_cls_flower - Parse tc flower filters provided by kernel
8534 * @vsi: Pointer to VSI
8535 * @f: Pointer to struct flow_cls_offload
8536 * @filter: Pointer to cloud filter structure
8537 *
8538 **/
8539static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
8540				 struct flow_cls_offload *f,
8541				 struct i40e_cloud_filter *filter)
8542{
8543	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
8544	struct flow_dissector *dissector = rule->match.dissector;
8545	u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
8546	struct i40e_pf *pf = vsi->back;
8547	u8 field_flags = 0;
8548
8549	if (dissector->used_keys &
8550	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
8551	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
8552	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
8553	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
8554	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
8555	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
8556	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
8557	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
8558		dev_err(&pf->pdev->dev, "Unsupported key used: 0x%llx\n",
8559			dissector->used_keys);
8560		return -EOPNOTSUPP;
8561	}
8562
8563	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
8564		struct flow_match_enc_keyid match;
8565
8566		flow_rule_match_enc_keyid(rule, &match);
8567		if (match.mask->keyid != 0)
8568			field_flags |= I40E_CLOUD_FIELD_TEN_ID;
8569
8570		filter->tenant_id = be32_to_cpu(match.key->keyid);
8571	}
8572
8573	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
8574		struct flow_match_basic match;
8575
8576		flow_rule_match_basic(rule, &match);
8577		n_proto_key = ntohs(match.key->n_proto);
8578		n_proto_mask = ntohs(match.mask->n_proto);
8579
8580		if (n_proto_key == ETH_P_ALL) {
8581			n_proto_key = 0;
8582			n_proto_mask = 0;
8583		}
8584		filter->n_proto = n_proto_key & n_proto_mask;
8585		filter->ip_proto = match.key->ip_proto;
8586	}
8587
8588	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
8589		struct flow_match_eth_addrs match;
8590
8591		flow_rule_match_eth_addrs(rule, &match);
8592
8593		/* use is_broadcast and is_zero to check for all 0xf or 0 */
8594		if (!is_zero_ether_addr(match.mask->dst)) {
8595			if (is_broadcast_ether_addr(match.mask->dst)) {
8596				field_flags |= I40E_CLOUD_FIELD_OMAC;
8597			} else {
8598				dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n",
8599					match.mask->dst);
8600				return -EIO;
8601			}
8602		}
8603
8604		if (!is_zero_ether_addr(match.mask->src)) {
8605			if (is_broadcast_ether_addr(match.mask->src)) {
8606				field_flags |= I40E_CLOUD_FIELD_IMAC;
8607			} else {
8608				dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n",
8609					match.mask->src);
8610				return -EIO;
8611			}
8612		}
8613		ether_addr_copy(filter->dst_mac, match.key->dst);
8614		ether_addr_copy(filter->src_mac, match.key->src);
8615	}
8616
8617	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
8618		struct flow_match_vlan match;
8619
8620		flow_rule_match_vlan(rule, &match);
8621		if (match.mask->vlan_id) {
8622			if (match.mask->vlan_id == VLAN_VID_MASK) {
8623				field_flags |= I40E_CLOUD_FIELD_IVLAN;
8624
8625			} else {
8626				dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n",
8627					match.mask->vlan_id);
8628				return -EIO;
8629			}
8630		}
8631
8632		filter->vlan_id = cpu_to_be16(match.key->vlan_id);
8633	}
8634
8635	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
8636		struct flow_match_control match;
8637
8638		flow_rule_match_control(rule, &match);
8639		addr_type = match.key->addr_type;
8640	}
8641
8642	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
8643		struct flow_match_ipv4_addrs match;
8644
8645		flow_rule_match_ipv4_addrs(rule, &match);
8646		if (match.mask->dst) {
8647			if (match.mask->dst == cpu_to_be32(0xffffffff)) {
8648				field_flags |= I40E_CLOUD_FIELD_IIP;
8649			} else {
8650				dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n",
8651					&match.mask->dst);
8652				return -EIO;
8653			}
8654		}
8655
8656		if (match.mask->src) {
8657			if (match.mask->src == cpu_to_be32(0xffffffff)) {
8658				field_flags |= I40E_CLOUD_FIELD_IIP;
8659			} else {
8660				dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n",
8661					&match.mask->src);
8662				return -EIO;
8663			}
8664		}
8665
8666		if (field_flags & I40E_CLOUD_FIELD_TEN_ID) {
8667			dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n");
8668			return -EIO;
8669		}
8670		filter->dst_ipv4 = match.key->dst;
8671		filter->src_ipv4 = match.key->src;
8672	}
8673
8674	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
8675		struct flow_match_ipv6_addrs match;
8676
8677		flow_rule_match_ipv6_addrs(rule, &match);
8678
8679		/* src and dest IPV6 address should not be LOOPBACK
8680		 * (0:0:0:0:0:0:0:1), which can be represented as ::1
8681		 */
8682		if (ipv6_addr_loopback(&match.key->dst) ||
8683		    ipv6_addr_loopback(&match.key->src)) {
8684			dev_err(&pf->pdev->dev,
8685				"Bad ipv6, addr is LOOPBACK\n");
8686			return -EIO;
8687		}
8688		if (!ipv6_addr_any(&match.mask->dst) ||
8689		    !ipv6_addr_any(&match.mask->src))
8690			field_flags |= I40E_CLOUD_FIELD_IIP;
8691
8692		memcpy(&filter->src_ipv6, &match.key->src.s6_addr32,
8693		       sizeof(filter->src_ipv6));
8694		memcpy(&filter->dst_ipv6, &match.key->dst.s6_addr32,
8695		       sizeof(filter->dst_ipv6));
8696	}
8697
8698	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
8699		struct flow_match_ports match;
8700
8701		flow_rule_match_ports(rule, &match);
8702		if (match.mask->src) {
8703			if (match.mask->src == cpu_to_be16(0xffff)) {
8704				field_flags |= I40E_CLOUD_FIELD_IIP;
8705			} else {
8706				dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n",
8707					be16_to_cpu(match.mask->src));
8708				return -EIO;
8709			}
8710		}
8711
8712		if (match.mask->dst) {
8713			if (match.mask->dst == cpu_to_be16(0xffff)) {
8714				field_flags |= I40E_CLOUD_FIELD_IIP;
8715			} else {
8716				dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n",
8717					be16_to_cpu(match.mask->dst));
8718				return -EIO;
8719			}
8720		}
8721
8722		filter->dst_port = match.key->dst;
8723		filter->src_port = match.key->src;
8724
8725		switch (filter->ip_proto) {
8726		case IPPROTO_TCP:
8727		case IPPROTO_UDP:
8728			break;
8729		default:
8730			dev_err(&pf->pdev->dev,
8731				"Only UDP and TCP transport are supported\n");
8732			return -EINVAL;
8733		}
8734	}
8735	filter->flags = field_flags;
8736	return 0;
8737}
8738
8739/**
8740 * i40e_handle_tclass: Forward to a traffic class on the device
8741 * @vsi: Pointer to VSI
8742 * @tc: traffic class index on the device
8743 * @filter: Pointer to cloud filter structure
8744 *
8745 **/
8746static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc,
8747			      struct i40e_cloud_filter *filter)
8748{
8749	struct i40e_channel *ch, *ch_tmp;
8750
8751	/* direct to a traffic class on the same device */
8752	if (tc == 0) {
8753		filter->seid = vsi->seid;
8754		return 0;
8755	} else if (vsi->tc_config.enabled_tc & BIT(tc)) {
8756		if (!filter->dst_port) {
8757			dev_err(&vsi->back->pdev->dev,
8758				"Specify destination port to direct to traffic class that is not default\n");
8759			return -EINVAL;
8760		}
8761		if (list_empty(&vsi->ch_list))
8762			return -EINVAL;
8763		list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list,
8764					 list) {
8765			if (ch->seid == vsi->tc_seid_map[tc])
8766				filter->seid = ch->seid;
8767		}
8768		return 0;
8769	}
8770	dev_err(&vsi->back->pdev->dev, "TC is not enabled\n");
8771	return -EINVAL;
8772}
8773
8774/**
8775 * i40e_configure_clsflower - Configure tc flower filters
8776 * @vsi: Pointer to VSI
8777 * @cls_flower: Pointer to struct flow_cls_offload
8778 *
8779 **/
8780static int i40e_configure_clsflower(struct i40e_vsi *vsi,
8781				    struct flow_cls_offload *cls_flower)
8782{
8783	int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
8784	struct i40e_cloud_filter *filter = NULL;
8785	struct i40e_pf *pf = vsi->back;
8786	int err = 0;
8787
8788	if (tc < 0) {
8789		dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
8790		return -EOPNOTSUPP;
8791	}
8792
8793	if (!tc) {
8794		dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination");
8795		return -EINVAL;
8796	}
8797
8798	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
8799	    test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
8800		return -EBUSY;
8801
8802	if (pf->fdir_pf_active_filters ||
8803	    (!hlist_empty(&pf->fdir_filter_list))) {
8804		dev_err(&vsi->back->pdev->dev,
8805			"Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n");
8806		return -EINVAL;
8807	}
8808
8809	if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) {
8810		dev_err(&vsi->back->pdev->dev,
8811			"Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n");
8812		vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED;
8813		vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
8814	}
8815
8816	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
8817	if (!filter)
8818		return -ENOMEM;
8819
8820	filter->cookie = cls_flower->cookie;
8821
8822	err = i40e_parse_cls_flower(vsi, cls_flower, filter);
8823	if (err < 0)
8824		goto err;
8825
8826	err = i40e_handle_tclass(vsi, tc, filter);
8827	if (err < 0)
8828		goto err;
8829
8830	/* Add cloud filter */
8831	if (filter->dst_port)
8832		err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true);
8833	else
8834		err = i40e_add_del_cloud_filter(vsi, filter, true);
8835
8836	if (err) {
8837		dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n",
8838			err);
8839		goto err;
8840	}
8841
8842	/* add filter to the ordered list */
8843	INIT_HLIST_NODE(&filter->cloud_node);
8844
8845	hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list);
8846
8847	pf->num_cloud_filters++;
8848
8849	return err;
8850err:
8851	kfree(filter);
8852	return err;
8853}
8854
8855/**
8856 * i40e_find_cloud_filter - Find the could filter in the list
8857 * @vsi: Pointer to VSI
8858 * @cookie: filter specific cookie
8859 *
8860 **/
8861static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi,
8862							unsigned long *cookie)
8863{
8864	struct i40e_cloud_filter *filter = NULL;
8865	struct hlist_node *node2;
8866
8867	hlist_for_each_entry_safe(filter, node2,
8868				  &vsi->back->cloud_filter_list, cloud_node)
8869		if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
8870			return filter;
8871	return NULL;
8872}
8873
8874/**
8875 * i40e_delete_clsflower - Remove tc flower filters
8876 * @vsi: Pointer to VSI
8877 * @cls_flower: Pointer to struct flow_cls_offload
8878 *
8879 **/
8880static int i40e_delete_clsflower(struct i40e_vsi *vsi,
8881				 struct flow_cls_offload *cls_flower)
8882{
8883	struct i40e_cloud_filter *filter = NULL;
8884	struct i40e_pf *pf = vsi->back;
8885	int err = 0;
8886
8887	filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie);
8888
8889	if (!filter)
8890		return -EINVAL;
8891
8892	hash_del(&filter->cloud_node);
8893
8894	if (filter->dst_port)
8895		err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false);
8896	else
8897		err = i40e_add_del_cloud_filter(vsi, filter, false);
8898
8899	kfree(filter);
8900	if (err) {
8901		dev_err(&pf->pdev->dev,
8902			"Failed to delete cloud filter, err %pe\n",
8903			ERR_PTR(err));
8904		return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
8905	}
8906
8907	pf->num_cloud_filters--;
8908	if (!pf->num_cloud_filters)
8909		if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
8910		    !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
8911			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
8912			pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
8913			pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
8914		}
8915	return 0;
8916}
8917
8918/**
8919 * i40e_setup_tc_cls_flower - flower classifier offloads
8920 * @np: net device to configure
8921 * @cls_flower: offload data
8922 **/
8923static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
8924				    struct flow_cls_offload *cls_flower)
8925{
8926	struct i40e_vsi *vsi = np->vsi;
8927
8928	switch (cls_flower->command) {
8929	case FLOW_CLS_REPLACE:
8930		return i40e_configure_clsflower(vsi, cls_flower);
8931	case FLOW_CLS_DESTROY:
8932		return i40e_delete_clsflower(vsi, cls_flower);
8933	case FLOW_CLS_STATS:
8934		return -EOPNOTSUPP;
8935	default:
8936		return -EOPNOTSUPP;
8937	}
8938}
8939
8940static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
8941				  void *cb_priv)
8942{
8943	struct i40e_netdev_priv *np = cb_priv;
8944
8945	if (!tc_cls_can_offload_and_chain0(np->vsi->netdev, type_data))
8946		return -EOPNOTSUPP;
8947
8948	switch (type) {
8949	case TC_SETUP_CLSFLOWER:
8950		return i40e_setup_tc_cls_flower(np, type_data);
8951
8952	default:
8953		return -EOPNOTSUPP;
8954	}
8955}
8956
8957static LIST_HEAD(i40e_block_cb_list);
8958
8959static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
8960			   void *type_data)
8961{
8962	struct i40e_netdev_priv *np = netdev_priv(netdev);
8963
8964	switch (type) {
8965	case TC_SETUP_QDISC_MQPRIO:
8966		return i40e_setup_tc(netdev, type_data);
8967	case TC_SETUP_BLOCK:
8968		return flow_block_cb_setup_simple(type_data,
8969						  &i40e_block_cb_list,
8970						  i40e_setup_tc_block_cb,
8971						  np, np, true);
8972	default:
8973		return -EOPNOTSUPP;
8974	}
8975}
8976
8977/**
8978 * i40e_open - Called when a network interface is made active
8979 * @netdev: network interface device structure
8980 *
8981 * The open entry point is called when a network interface is made
8982 * active by the system (IFF_UP).  At this point all resources needed
8983 * for transmit and receive operations are allocated, the interrupt
8984 * handler is registered with the OS, the netdev watchdog subtask is
8985 * enabled, and the stack is notified that the interface is ready.
8986 *
8987 * Returns 0 on success, negative value on failure
8988 **/
8989int i40e_open(struct net_device *netdev)
8990{
8991	struct i40e_netdev_priv *np = netdev_priv(netdev);
8992	struct i40e_vsi *vsi = np->vsi;
8993	struct i40e_pf *pf = vsi->back;
8994	int err;
8995
8996	/* disallow open during test or if eeprom is broken */
8997	if (test_bit(__I40E_TESTING, pf->state) ||
8998	    test_bit(__I40E_BAD_EEPROM, pf->state))
8999		return -EBUSY;
9000
9001	netif_carrier_off(netdev);
9002
9003	if (i40e_force_link_state(pf, true))
9004		return -EAGAIN;
9005
9006	err = i40e_vsi_open(vsi);
9007	if (err)
9008		return err;
9009
9010	/* configure global TSO hardware offload settings */
9011	wr32(&pf->hw, I40E_GLLAN_TSOMSK_F, be32_to_cpu(TCP_FLAG_PSH |
9012						       TCP_FLAG_FIN) >> 16);
9013	wr32(&pf->hw, I40E_GLLAN_TSOMSK_M, be32_to_cpu(TCP_FLAG_PSH |
9014						       TCP_FLAG_FIN |
9015						       TCP_FLAG_CWR) >> 16);
9016	wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16);
9017	udp_tunnel_get_rx_info(netdev);
9018
9019	return 0;
9020}
9021
9022/**
9023 * i40e_netif_set_realnum_tx_rx_queues - Update number of tx/rx queues
9024 * @vsi: vsi structure
9025 *
9026 * This updates netdev's number of tx/rx queues
9027 *
9028 * Returns status of setting tx/rx queues
9029 **/
9030static int i40e_netif_set_realnum_tx_rx_queues(struct i40e_vsi *vsi)
9031{
9032	int ret;
9033
9034	ret = netif_set_real_num_rx_queues(vsi->netdev,
9035					   vsi->num_queue_pairs);
9036	if (ret)
9037		return ret;
9038
9039	return netif_set_real_num_tx_queues(vsi->netdev,
9040					    vsi->num_queue_pairs);
9041}
9042
9043/**
9044 * i40e_vsi_open -
9045 * @vsi: the VSI to open
9046 *
9047 * Finish initialization of the VSI.
9048 *
9049 * Returns 0 on success, negative value on failure
9050 *
9051 * Note: expects to be called while under rtnl_lock()
9052 **/
9053int i40e_vsi_open(struct i40e_vsi *vsi)
9054{
9055	struct i40e_pf *pf = vsi->back;
9056	char int_name[I40E_INT_NAME_STR_LEN];
9057	int err;
9058
9059	/* allocate descriptors */
9060	err = i40e_vsi_setup_tx_resources(vsi);
9061	if (err)
9062		goto err_setup_tx;
9063	err = i40e_vsi_setup_rx_resources(vsi);
9064	if (err)
9065		goto err_setup_rx;
9066
9067	err = i40e_vsi_configure(vsi);
9068	if (err)
9069		goto err_setup_rx;
9070
9071	if (vsi->netdev) {
9072		snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
9073			 dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
9074		err = i40e_vsi_request_irq(vsi, int_name);
9075		if (err)
9076			goto err_setup_rx;
9077
9078		/* Notify the stack of the actual queue counts. */
9079		err = i40e_netif_set_realnum_tx_rx_queues(vsi);
9080		if (err)
9081			goto err_set_queues;
9082
9083	} else if (vsi->type == I40E_VSI_FDIR) {
9084		snprintf(int_name, sizeof(int_name) - 1, "%s-%s:fdir",
9085			 dev_driver_string(&pf->pdev->dev),
9086			 dev_name(&pf->pdev->dev));
9087		err = i40e_vsi_request_irq(vsi, int_name);
9088		if (err)
9089			goto err_setup_rx;
9090
9091	} else {
9092		err = -EINVAL;
9093		goto err_setup_rx;
9094	}
9095
9096	err = i40e_up_complete(vsi);
9097	if (err)
9098		goto err_up_complete;
9099
9100	return 0;
9101
9102err_up_complete:
9103	i40e_down(vsi);
9104err_set_queues:
9105	i40e_vsi_free_irq(vsi);
9106err_setup_rx:
9107	i40e_vsi_free_rx_resources(vsi);
9108err_setup_tx:
9109	i40e_vsi_free_tx_resources(vsi);
9110	if (vsi == pf->vsi[pf->lan_vsi])
9111		i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
9112
9113	return err;
9114}
9115
9116/**
9117 * i40e_fdir_filter_exit - Cleans up the Flow Director accounting
9118 * @pf: Pointer to PF
9119 *
9120 * This function destroys the hlist where all the Flow Director
9121 * filters were saved.
9122 **/
9123static void i40e_fdir_filter_exit(struct i40e_pf *pf)
9124{
9125	struct i40e_fdir_filter *filter;
9126	struct i40e_flex_pit *pit_entry, *tmp;
9127	struct hlist_node *node2;
9128
9129	hlist_for_each_entry_safe(filter, node2,
9130				  &pf->fdir_filter_list, fdir_node) {
9131		hlist_del(&filter->fdir_node);
9132		kfree(filter);
9133	}
9134
9135	list_for_each_entry_safe(pit_entry, tmp, &pf->l3_flex_pit_list, list) {
9136		list_del(&pit_entry->list);
9137		kfree(pit_entry);
9138	}
9139	INIT_LIST_HEAD(&pf->l3_flex_pit_list);
9140
9141	list_for_each_entry_safe(pit_entry, tmp, &pf->l4_flex_pit_list, list) {
9142		list_del(&pit_entry->list);
9143		kfree(pit_entry);
9144	}
9145	INIT_LIST_HEAD(&pf->l4_flex_pit_list);
9146
9147	pf->fdir_pf_active_filters = 0;
9148	i40e_reset_fdir_filter_cnt(pf);
9149
9150	/* Reprogram the default input set for TCP/IPv4 */
9151	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
9152				I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
9153				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
9154
9155	/* Reprogram the default input set for TCP/IPv6 */
9156	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_TCP,
9157				I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK |
9158				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
9159
9160	/* Reprogram the default input set for UDP/IPv4 */
9161	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_UDP,
9162				I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
9163				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
9164
9165	/* Reprogram the default input set for UDP/IPv6 */
9166	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_UDP,
9167				I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK |
9168				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
9169
9170	/* Reprogram the default input set for SCTP/IPv4 */
9171	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP,
9172				I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
9173				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
9174
9175	/* Reprogram the default input set for SCTP/IPv6 */
9176	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_SCTP,
9177				I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK |
9178				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
9179
9180	/* Reprogram the default input set for Other/IPv4 */
9181	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER,
9182				I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
9183
9184	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV4,
9185				I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
9186
9187	/* Reprogram the default input set for Other/IPv6 */
9188	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_OTHER,
9189				I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
9190
9191	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV6,
9192				I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
9193}
9194
9195/**
9196 * i40e_cloud_filter_exit - Cleans up the cloud filters
9197 * @pf: Pointer to PF
9198 *
9199 * This function destroys the hlist where all the cloud filters
9200 * were saved.
9201 **/
9202static void i40e_cloud_filter_exit(struct i40e_pf *pf)
9203{
9204	struct i40e_cloud_filter *cfilter;
9205	struct hlist_node *node;
9206
9207	hlist_for_each_entry_safe(cfilter, node,
9208				  &pf->cloud_filter_list, cloud_node) {
9209		hlist_del(&cfilter->cloud_node);
9210		kfree(cfilter);
9211	}
9212	pf->num_cloud_filters = 0;
9213
9214	if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
9215	    !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
9216		pf->flags |= I40E_FLAG_FD_SB_ENABLED;
9217		pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
9218		pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
9219	}
9220}
9221
9222/**
9223 * i40e_close - Disables a network interface
9224 * @netdev: network interface device structure
9225 *
9226 * The close entry point is called when an interface is de-activated
9227 * by the OS.  The hardware is still under the driver's control, but
9228 * this netdev interface is disabled.
9229 *
9230 * Returns 0, this is not allowed to fail
9231 **/
9232int i40e_close(struct net_device *netdev)
9233{
9234	struct i40e_netdev_priv *np = netdev_priv(netdev);
9235	struct i40e_vsi *vsi = np->vsi;
9236
9237	i40e_vsi_close(vsi);
9238
9239	return 0;
9240}
9241
9242/**
9243 * i40e_do_reset - Start a PF or Core Reset sequence
9244 * @pf: board private structure
9245 * @reset_flags: which reset is requested
9246 * @lock_acquired: indicates whether or not the lock has been acquired
9247 * before this function was called.
9248 *
9249 * The essential difference in resets is that the PF Reset
9250 * doesn't clear the packet buffers, doesn't reset the PE
9251 * firmware, and doesn't bother the other PFs on the chip.
9252 **/
9253void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
9254{
9255	u32 val;
9256
9257	/* do the biggest reset indicated */
9258	if (reset_flags & BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED)) {
9259
9260		/* Request a Global Reset
9261		 *
9262		 * This will start the chip's countdown to the actual full
9263		 * chip reset event, and a warning interrupt to be sent
9264		 * to all PFs, including the requestor.  Our handler
9265		 * for the warning interrupt will deal with the shutdown
9266		 * and recovery of the switch setup.
9267		 */
9268		dev_dbg(&pf->pdev->dev, "GlobalR requested\n");
9269		val = rd32(&pf->hw, I40E_GLGEN_RTRIG);
9270		val |= I40E_GLGEN_RTRIG_GLOBR_MASK;
9271		wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
9272
9273	} else if (reset_flags & BIT_ULL(__I40E_CORE_RESET_REQUESTED)) {
9274
9275		/* Request a Core Reset
9276		 *
9277		 * Same as Global Reset, except does *not* include the MAC/PHY
9278		 */
9279		dev_dbg(&pf->pdev->dev, "CoreR requested\n");
9280		val = rd32(&pf->hw, I40E_GLGEN_RTRIG);
9281		val |= I40E_GLGEN_RTRIG_CORER_MASK;
9282		wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
9283		i40e_flush(&pf->hw);
9284
9285	} else if (reset_flags & I40E_PF_RESET_FLAG) {
9286
9287		/* Request a PF Reset
9288		 *
9289		 * Resets only the PF-specific registers
9290		 *
9291		 * This goes directly to the tear-down and rebuild of
9292		 * the switch, since we need to do all the recovery as
9293		 * for the Core Reset.
9294		 */
9295		dev_dbg(&pf->pdev->dev, "PFR requested\n");
9296		i40e_handle_reset_warning(pf, lock_acquired);
9297
9298	} else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) {
9299		/* Request a PF Reset
9300		 *
9301		 * Resets PF and reinitializes PFs VSI.
9302		 */
9303		i40e_prep_for_reset(pf);
9304		i40e_reset_and_rebuild(pf, true, lock_acquired);
9305		dev_info(&pf->pdev->dev,
9306			 pf->flags & I40E_FLAG_DISABLE_FW_LLDP ?
9307			 "FW LLDP is disabled\n" :
9308			 "FW LLDP is enabled\n");
9309
9310	} else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) {
9311		int v;
9312
9313		/* Find the VSI(s) that requested a re-init */
9314		dev_info(&pf->pdev->dev,
9315			 "VSI reinit requested\n");
9316		for (v = 0; v < pf->num_alloc_vsi; v++) {
9317			struct i40e_vsi *vsi = pf->vsi[v];
9318
9319			if (vsi != NULL &&
9320			    test_and_clear_bit(__I40E_VSI_REINIT_REQUESTED,
9321					       vsi->state))
9322				i40e_vsi_reinit_locked(pf->vsi[v]);
9323		}
9324	} else if (reset_flags & BIT_ULL(__I40E_DOWN_REQUESTED)) {
9325		int v;
9326
9327		/* Find the VSI(s) that needs to be brought down */
9328		dev_info(&pf->pdev->dev, "VSI down requested\n");
9329		for (v = 0; v < pf->num_alloc_vsi; v++) {
9330			struct i40e_vsi *vsi = pf->vsi[v];
9331
9332			if (vsi != NULL &&
9333			    test_and_clear_bit(__I40E_VSI_DOWN_REQUESTED,
9334					       vsi->state)) {
9335				set_bit(__I40E_VSI_DOWN, vsi->state);
9336				i40e_down(vsi);
9337			}
9338		}
9339	} else {
9340		dev_info(&pf->pdev->dev,
9341			 "bad reset request 0x%08x\n", reset_flags);
9342	}
9343}
9344
9345#ifdef CONFIG_I40E_DCB
9346/**
9347 * i40e_dcb_need_reconfig - Check if DCB needs reconfig
9348 * @pf: board private structure
9349 * @old_cfg: current DCB config
9350 * @new_cfg: new DCB config
9351 **/
9352bool i40e_dcb_need_reconfig(struct i40e_pf *pf,
9353			    struct i40e_dcbx_config *old_cfg,
9354			    struct i40e_dcbx_config *new_cfg)
9355{
9356	bool need_reconfig = false;
9357
9358	/* Check if ETS configuration has changed */
9359	if (memcmp(&new_cfg->etscfg,
9360		   &old_cfg->etscfg,
9361		   sizeof(new_cfg->etscfg))) {
9362		/* If Priority Table has changed reconfig is needed */
9363		if (memcmp(&new_cfg->etscfg.prioritytable,
9364			   &old_cfg->etscfg.prioritytable,
9365			   sizeof(new_cfg->etscfg.prioritytable))) {
9366			need_reconfig = true;
9367			dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n");
9368		}
9369
9370		if (memcmp(&new_cfg->etscfg.tcbwtable,
9371			   &old_cfg->etscfg.tcbwtable,
9372			   sizeof(new_cfg->etscfg.tcbwtable)))
9373			dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n");
9374
9375		if (memcmp(&new_cfg->etscfg.tsatable,
9376			   &old_cfg->etscfg.tsatable,
9377			   sizeof(new_cfg->etscfg.tsatable)))
9378			dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n");
9379	}
9380
9381	/* Check if PFC configuration has changed */
9382	if (memcmp(&new_cfg->pfc,
9383		   &old_cfg->pfc,
9384		   sizeof(new_cfg->pfc))) {
9385		need_reconfig = true;
9386		dev_dbg(&pf->pdev->dev, "PFC config change detected.\n");
9387	}
9388
9389	/* Check if APP Table has changed */
9390	if (memcmp(&new_cfg->app,
9391		   &old_cfg->app,
9392		   sizeof(new_cfg->app))) {
9393		need_reconfig = true;
9394		dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
9395	}
9396
9397	dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
9398	return need_reconfig;
9399}
9400
9401/**
9402 * i40e_handle_lldp_event - Handle LLDP Change MIB event
9403 * @pf: board private structure
9404 * @e: event info posted on ARQ
9405 **/
9406static int i40e_handle_lldp_event(struct i40e_pf *pf,
9407				  struct i40e_arq_event_info *e)
9408{
9409	struct i40e_aqc_lldp_get_mib *mib =
9410		(struct i40e_aqc_lldp_get_mib *)&e->desc.params.raw;
9411	struct i40e_hw *hw = &pf->hw;
9412	struct i40e_dcbx_config tmp_dcbx_cfg;
9413	bool need_reconfig = false;
9414	int ret = 0;
9415	u8 type;
9416
9417	/* X710-T*L 2.5G and 5G speeds don't support DCB */
9418	if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
9419	    (hw->phy.link_info.link_speed &
9420	     ~(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB)) &&
9421	     !(pf->flags & I40E_FLAG_DCB_CAPABLE))
9422		/* let firmware decide if the DCB should be disabled */
9423		pf->flags |= I40E_FLAG_DCB_CAPABLE;
9424
9425	/* Not DCB capable or capability disabled */
9426	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
9427		return ret;
9428
9429	/* Ignore if event is not for Nearest Bridge */
9430	type = ((mib->type >> I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT)
9431		& I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
9432	dev_dbg(&pf->pdev->dev, "LLDP event mib bridge type 0x%x\n", type);
9433	if (type != I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE)
9434		return ret;
9435
9436	/* Check MIB Type and return if event for Remote MIB update */
9437	type = mib->type & I40E_AQ_LLDP_MIB_TYPE_MASK;
9438	dev_dbg(&pf->pdev->dev,
9439		"LLDP event mib type %s\n", type ? "remote" : "local");
9440	if (type == I40E_AQ_LLDP_MIB_REMOTE) {
9441		/* Update the remote cached instance and return */
9442		ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_REMOTE,
9443				I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE,
9444				&hw->remote_dcbx_config);
9445		goto exit;
9446	}
9447
9448	/* Store the old configuration */
9449	tmp_dcbx_cfg = hw->local_dcbx_config;
9450
9451	/* Reset the old DCBx configuration data */
9452	memset(&hw->local_dcbx_config, 0, sizeof(hw->local_dcbx_config));
9453	/* Get updated DCBX data from firmware */
9454	ret = i40e_get_dcb_config(&pf->hw);
9455	if (ret) {
9456		/* X710-T*L 2.5G and 5G speeds don't support DCB */
9457		if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
9458		    (hw->phy.link_info.link_speed &
9459		     (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) {
9460			dev_warn(&pf->pdev->dev,
9461				 "DCB is not supported for X710-T*L 2.5/5G speeds\n");
9462			pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
9463		} else {
9464			dev_info(&pf->pdev->dev,
9465				 "Failed querying DCB configuration data from firmware, err %pe aq_err %s\n",
9466				 ERR_PTR(ret),
9467				 i40e_aq_str(&pf->hw,
9468					     pf->hw.aq.asq_last_status));
9469		}
9470		goto exit;
9471	}
9472
9473	/* No change detected in DCBX configs */
9474	if (!memcmp(&tmp_dcbx_cfg, &hw->local_dcbx_config,
9475		    sizeof(tmp_dcbx_cfg))) {
9476		dev_dbg(&pf->pdev->dev, "No change detected in DCBX configuration.\n");
9477		goto exit;
9478	}
9479
9480	need_reconfig = i40e_dcb_need_reconfig(pf, &tmp_dcbx_cfg,
9481					       &hw->local_dcbx_config);
9482
9483	i40e_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &hw->local_dcbx_config);
9484
9485	if (!need_reconfig)
9486		goto exit;
9487
9488	/* Enable DCB tagging only when more than one TC */
9489	if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
9490		pf->flags |= I40E_FLAG_DCB_ENABLED;
9491	else
9492		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
9493
9494	set_bit(__I40E_PORT_SUSPENDED, pf->state);
9495	/* Reconfiguration needed quiesce all VSIs */
9496	i40e_pf_quiesce_all_vsi(pf);
9497
9498	/* Changes in configuration update VEB/VSI */
9499	i40e_dcb_reconfigure(pf);
9500
9501	ret = i40e_resume_port_tx(pf);
9502
9503	clear_bit(__I40E_PORT_SUSPENDED, pf->state);
9504	/* In case of error no point in resuming VSIs */
9505	if (ret)
9506		goto exit;
9507
9508	/* Wait for the PF's queues to be disabled */
9509	ret = i40e_pf_wait_queues_disabled(pf);
9510	if (ret) {
9511		/* Schedule PF reset to recover */
9512		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
9513		i40e_service_event_schedule(pf);
9514	} else {
9515		i40e_pf_unquiesce_all_vsi(pf);
9516		set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
9517		set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
9518	}
9519
9520exit:
9521	return ret;
9522}
9523#endif /* CONFIG_I40E_DCB */
9524
9525/**
9526 * i40e_do_reset_safe - Protected reset path for userland calls.
9527 * @pf: board private structure
9528 * @reset_flags: which reset is requested
9529 *
9530 **/
9531void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags)
9532{
9533	rtnl_lock();
9534	i40e_do_reset(pf, reset_flags, true);
9535	rtnl_unlock();
9536}
9537
9538/**
9539 * i40e_handle_lan_overflow_event - Handler for LAN queue overflow event
9540 * @pf: board private structure
9541 * @e: event info posted on ARQ
9542 *
9543 * Handler for LAN Queue Overflow Event generated by the firmware for PF
9544 * and VF queues
9545 **/
9546static void i40e_handle_lan_overflow_event(struct i40e_pf *pf,
9547					   struct i40e_arq_event_info *e)
9548{
9549	struct i40e_aqc_lan_overflow *data =
9550		(struct i40e_aqc_lan_overflow *)&e->desc.params.raw;
9551	u32 queue = le32_to_cpu(data->prtdcb_rupto);
9552	u32 qtx_ctl = le32_to_cpu(data->otx_ctl);
9553	struct i40e_hw *hw = &pf->hw;
9554	struct i40e_vf *vf;
9555	u16 vf_id;
9556
9557	dev_dbg(&pf->pdev->dev, "overflow Rx Queue Number = %d QTX_CTL=0x%08x\n",
9558		queue, qtx_ctl);
9559
9560	/* Queue belongs to VF, find the VF and issue VF reset */
9561	if (((qtx_ctl & I40E_QTX_CTL_PFVF_Q_MASK)
9562	    >> I40E_QTX_CTL_PFVF_Q_SHIFT) == I40E_QTX_CTL_VF_QUEUE) {
9563		vf_id = (u16)((qtx_ctl & I40E_QTX_CTL_VFVM_INDX_MASK)
9564			 >> I40E_QTX_CTL_VFVM_INDX_SHIFT);
9565		vf_id -= hw->func_caps.vf_base_id;
9566		vf = &pf->vf[vf_id];
9567		i40e_vc_notify_vf_reset(vf);
9568		/* Allow VF to process pending reset notification */
9569		msleep(20);
9570		i40e_reset_vf(vf, false);
9571	}
9572}
9573
9574/**
9575 * i40e_get_cur_guaranteed_fd_count - Get the consumed guaranteed FD filters
9576 * @pf: board private structure
9577 **/
9578u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf)
9579{
9580	u32 val, fcnt_prog;
9581
9582	val = rd32(&pf->hw, I40E_PFQF_FDSTAT);
9583	fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK);
9584	return fcnt_prog;
9585}
9586
9587/**
9588 * i40e_get_current_fd_count - Get total FD filters programmed for this PF
9589 * @pf: board private structure
9590 **/
9591u32 i40e_get_current_fd_count(struct i40e_pf *pf)
9592{
9593	u32 val, fcnt_prog;
9594
9595	val = rd32(&pf->hw, I40E_PFQF_FDSTAT);
9596	fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK) +
9597		    ((val & I40E_PFQF_FDSTAT_BEST_CNT_MASK) >>
9598		      I40E_PFQF_FDSTAT_BEST_CNT_SHIFT);
9599	return fcnt_prog;
9600}
9601
9602/**
9603 * i40e_get_global_fd_count - Get total FD filters programmed on device
9604 * @pf: board private structure
9605 **/
9606u32 i40e_get_global_fd_count(struct i40e_pf *pf)
9607{
9608	u32 val, fcnt_prog;
9609
9610	val = rd32(&pf->hw, I40E_GLQF_FDCNT_0);
9611	fcnt_prog = (val & I40E_GLQF_FDCNT_0_GUARANT_CNT_MASK) +
9612		    ((val & I40E_GLQF_FDCNT_0_BESTCNT_MASK) >>
9613		     I40E_GLQF_FDCNT_0_BESTCNT_SHIFT);
9614	return fcnt_prog;
9615}
9616
9617/**
9618 * i40e_reenable_fdir_sb - Restore FDir SB capability
9619 * @pf: board private structure
9620 **/
9621static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
9622{
9623	if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
9624		if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
9625		    (I40E_DEBUG_FD & pf->hw.debug_mask))
9626			dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
9627}
9628
9629/**
9630 * i40e_reenable_fdir_atr - Restore FDir ATR capability
9631 * @pf: board private structure
9632 **/
9633static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
9634{
9635	if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) {
9636		/* ATR uses the same filtering logic as SB rules. It only
9637		 * functions properly if the input set mask is at the default
9638		 * settings. It is safe to restore the default input set
9639		 * because there are no active TCPv4 filter rules.
9640		 */
9641		i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
9642					I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
9643					I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
9644
9645		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
9646		    (I40E_DEBUG_FD & pf->hw.debug_mask))
9647			dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
9648	}
9649}
9650
9651/**
9652 * i40e_delete_invalid_filter - Delete an invalid FDIR filter
9653 * @pf: board private structure
9654 * @filter: FDir filter to remove
9655 */
9656static void i40e_delete_invalid_filter(struct i40e_pf *pf,
9657				       struct i40e_fdir_filter *filter)
9658{
9659	/* Update counters */
9660	pf->fdir_pf_active_filters--;
9661	pf->fd_inv = 0;
9662
9663	switch (filter->flow_type) {
9664	case TCP_V4_FLOW:
9665		pf->fd_tcp4_filter_cnt--;
9666		break;
9667	case UDP_V4_FLOW:
9668		pf->fd_udp4_filter_cnt--;
9669		break;
9670	case SCTP_V4_FLOW:
9671		pf->fd_sctp4_filter_cnt--;
9672		break;
9673	case TCP_V6_FLOW:
9674		pf->fd_tcp6_filter_cnt--;
9675		break;
9676	case UDP_V6_FLOW:
9677		pf->fd_udp6_filter_cnt--;
9678		break;
9679	case SCTP_V6_FLOW:
9680		pf->fd_udp6_filter_cnt--;
9681		break;
9682	case IP_USER_FLOW:
9683		switch (filter->ipl4_proto) {
9684		case IPPROTO_TCP:
9685			pf->fd_tcp4_filter_cnt--;
9686			break;
9687		case IPPROTO_UDP:
9688			pf->fd_udp4_filter_cnt--;
9689			break;
9690		case IPPROTO_SCTP:
9691			pf->fd_sctp4_filter_cnt--;
9692			break;
9693		case IPPROTO_IP:
9694			pf->fd_ip4_filter_cnt--;
9695			break;
9696		}
9697		break;
9698	case IPV6_USER_FLOW:
9699		switch (filter->ipl4_proto) {
9700		case IPPROTO_TCP:
9701			pf->fd_tcp6_filter_cnt--;
9702			break;
9703		case IPPROTO_UDP:
9704			pf->fd_udp6_filter_cnt--;
9705			break;
9706		case IPPROTO_SCTP:
9707			pf->fd_sctp6_filter_cnt--;
9708			break;
9709		case IPPROTO_IP:
9710			pf->fd_ip6_filter_cnt--;
9711			break;
9712		}
9713		break;
9714	}
9715
9716	/* Remove the filter from the list and free memory */
9717	hlist_del(&filter->fdir_node);
9718	kfree(filter);
9719}
9720
9721/**
9722 * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled
9723 * @pf: board private structure
9724 **/
9725void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
9726{
9727	struct i40e_fdir_filter *filter;
9728	u32 fcnt_prog, fcnt_avail;
9729	struct hlist_node *node;
9730
9731	if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
9732		return;
9733
9734	/* Check if we have enough room to re-enable FDir SB capability. */
9735	fcnt_prog = i40e_get_global_fd_count(pf);
9736	fcnt_avail = pf->fdir_pf_filter_count;
9737	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) ||
9738	    (pf->fd_add_err == 0) ||
9739	    (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt))
9740		i40e_reenable_fdir_sb(pf);
9741
9742	/* We should wait for even more space before re-enabling ATR.
9743	 * Additionally, we cannot enable ATR as long as we still have TCP SB
9744	 * rules active.
9745	 */
9746	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) &&
9747	    pf->fd_tcp4_filter_cnt == 0 && pf->fd_tcp6_filter_cnt == 0)
9748		i40e_reenable_fdir_atr(pf);
9749
9750	/* if hw had a problem adding a filter, delete it */
9751	if (pf->fd_inv > 0) {
9752		hlist_for_each_entry_safe(filter, node,
9753					  &pf->fdir_filter_list, fdir_node)
9754			if (filter->fd_id == pf->fd_inv)
9755				i40e_delete_invalid_filter(pf, filter);
9756	}
9757}
9758
9759#define I40E_MIN_FD_FLUSH_INTERVAL 10
9760#define I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE 30
9761/**
9762 * i40e_fdir_flush_and_replay - Function to flush all FD filters and replay SB
9763 * @pf: board private structure
9764 **/
9765static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
9766{
9767	unsigned long min_flush_time;
9768	int flush_wait_retry = 50;
9769	bool disable_atr = false;
9770	int fd_room;
9771	int reg;
9772
9773	if (!time_after(jiffies, pf->fd_flush_timestamp +
9774				 (I40E_MIN_FD_FLUSH_INTERVAL * HZ)))
9775		return;
9776
9777	/* If the flush is happening too quick and we have mostly SB rules we
9778	 * should not re-enable ATR for some time.
9779	 */
9780	min_flush_time = pf->fd_flush_timestamp +
9781			 (I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE * HZ);
9782	fd_room = pf->fdir_pf_filter_count - pf->fdir_pf_active_filters;
9783
9784	if (!(time_after(jiffies, min_flush_time)) &&
9785	    (fd_room < I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) {
9786		if (I40E_DEBUG_FD & pf->hw.debug_mask)
9787			dev_info(&pf->pdev->dev, "ATR disabled, not enough FD filter space.\n");
9788		disable_atr = true;
9789	}
9790
9791	pf->fd_flush_timestamp = jiffies;
9792	set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
9793	/* flush all filters */
9794	wr32(&pf->hw, I40E_PFQF_CTL_1,
9795	     I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
9796	i40e_flush(&pf->hw);
9797	pf->fd_flush_cnt++;
9798	pf->fd_add_err = 0;
9799	do {
9800		/* Check FD flush status every 5-6msec */
9801		usleep_range(5000, 6000);
9802		reg = rd32(&pf->hw, I40E_PFQF_CTL_1);
9803		if (!(reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK))
9804			break;
9805	} while (flush_wait_retry--);
9806	if (reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK) {
9807		dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n");
9808	} else {
9809		/* replay sideband filters */
9810		i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
9811		if (!disable_atr && !pf->fd_tcp4_filter_cnt)
9812			clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
9813		clear_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
9814		if (I40E_DEBUG_FD & pf->hw.debug_mask)
9815			dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
9816	}
9817}
9818
9819/**
9820 * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed
9821 * @pf: board private structure
9822 **/
9823u32 i40e_get_current_atr_cnt(struct i40e_pf *pf)
9824{
9825	return i40e_get_current_fd_count(pf) - pf->fdir_pf_active_filters;
9826}
9827
9828/**
9829 * i40e_fdir_reinit_subtask - Worker thread to reinit FDIR filter table
9830 * @pf: board private structure
9831 **/
9832static void i40e_fdir_reinit_subtask(struct i40e_pf *pf)
9833{
9834
9835	/* if interface is down do nothing */
9836	if (test_bit(__I40E_DOWN, pf->state))
9837		return;
9838
9839	if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
9840		i40e_fdir_flush_and_replay(pf);
9841
9842	i40e_fdir_check_and_reenable(pf);
9843
9844}
9845
9846/**
9847 * i40e_vsi_link_event - notify VSI of a link event
9848 * @vsi: vsi to be notified
9849 * @link_up: link up or down
9850 **/
9851static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up)
9852{
9853	if (!vsi || test_bit(__I40E_VSI_DOWN, vsi->state))
9854		return;
9855
9856	switch (vsi->type) {
9857	case I40E_VSI_MAIN:
9858		if (!vsi->netdev || !vsi->netdev_registered)
9859			break;
9860
9861		if (link_up) {
9862			netif_carrier_on(vsi->netdev);
9863			netif_tx_wake_all_queues(vsi->netdev);
9864		} else {
9865			netif_carrier_off(vsi->netdev);
9866			netif_tx_stop_all_queues(vsi->netdev);
9867		}
9868		break;
9869
9870	case I40E_VSI_SRIOV:
9871	case I40E_VSI_VMDQ2:
9872	case I40E_VSI_CTRL:
9873	case I40E_VSI_IWARP:
9874	case I40E_VSI_MIRROR:
9875	default:
9876		/* there is no notification for other VSIs */
9877		break;
9878	}
9879}
9880
9881/**
9882 * i40e_veb_link_event - notify elements on the veb of a link event
9883 * @veb: veb to be notified
9884 * @link_up: link up or down
9885 **/
9886static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up)
9887{
9888	struct i40e_pf *pf;
9889	int i;
9890
9891	if (!veb || !veb->pf)
9892		return;
9893	pf = veb->pf;
9894
9895	/* depth first... */
9896	for (i = 0; i < I40E_MAX_VEB; i++)
9897		if (pf->veb[i] && (pf->veb[i]->uplink_seid == veb->seid))
9898			i40e_veb_link_event(pf->veb[i], link_up);
9899
9900	/* ... now the local VSIs */
9901	for (i = 0; i < pf->num_alloc_vsi; i++)
9902		if (pf->vsi[i] && (pf->vsi[i]->uplink_seid == veb->seid))
9903			i40e_vsi_link_event(pf->vsi[i], link_up);
9904}
9905
9906/**
9907 * i40e_link_event - Update netif_carrier status
9908 * @pf: board private structure
9909 **/
9910static void i40e_link_event(struct i40e_pf *pf)
9911{
9912	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
9913	u8 new_link_speed, old_link_speed;
9914	bool new_link, old_link;
9915	int status;
9916#ifdef CONFIG_I40E_DCB
9917	int err;
9918#endif /* CONFIG_I40E_DCB */
9919
9920	/* set this to force the get_link_status call to refresh state */
9921	pf->hw.phy.get_link_info = true;
9922	old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP);
9923	status = i40e_get_link_status(&pf->hw, &new_link);
9924
9925	/* On success, disable temp link polling */
9926	if (status == 0) {
9927		clear_bit(__I40E_TEMP_LINK_POLLING, pf->state);
9928	} else {
9929		/* Enable link polling temporarily until i40e_get_link_status
9930		 * returns 0
9931		 */
9932		set_bit(__I40E_TEMP_LINK_POLLING, pf->state);
9933		dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n",
9934			status);
9935		return;
9936	}
9937
9938	old_link_speed = pf->hw.phy.link_info_old.link_speed;
9939	new_link_speed = pf->hw.phy.link_info.link_speed;
9940
9941	if (new_link == old_link &&
9942	    new_link_speed == old_link_speed &&
9943	    (test_bit(__I40E_VSI_DOWN, vsi->state) ||
9944	     new_link == netif_carrier_ok(vsi->netdev)))
9945		return;
9946
9947	i40e_print_link_message(vsi, new_link);
9948
9949	/* Notify the base of the switch tree connected to
9950	 * the link.  Floating VEBs are not notified.
9951	 */
9952	if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb])
9953		i40e_veb_link_event(pf->veb[pf->lan_veb], new_link);
9954	else
9955		i40e_vsi_link_event(vsi, new_link);
9956
9957	if (pf->vf)
9958		i40e_vc_notify_link_state(pf);
9959
9960	if (pf->flags & I40E_FLAG_PTP)
9961		i40e_ptp_set_increment(pf);
9962#ifdef CONFIG_I40E_DCB
9963	if (new_link == old_link)
9964		return;
9965	/* Not SW DCB so firmware will take care of default settings */
9966	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)
9967		return;
9968
9969	/* We cover here only link down, as after link up in case of SW DCB
9970	 * SW LLDP agent will take care of setting it up
9971	 */
9972	if (!new_link) {
9973		dev_dbg(&pf->pdev->dev, "Reconfig DCB to single TC as result of Link Down\n");
9974		memset(&pf->tmp_cfg, 0, sizeof(pf->tmp_cfg));
9975		err = i40e_dcb_sw_default_config(pf);
9976		if (err) {
9977			pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
9978				       I40E_FLAG_DCB_ENABLED);
9979		} else {
9980			pf->dcbx_cap = DCB_CAP_DCBX_HOST |
9981				       DCB_CAP_DCBX_VER_IEEE;
9982			pf->flags |= I40E_FLAG_DCB_CAPABLE;
9983			pf->flags &= ~I40E_FLAG_DCB_ENABLED;
9984		}
9985	}
9986#endif /* CONFIG_I40E_DCB */
9987}
9988
9989/**
9990 * i40e_watchdog_subtask - periodic checks not using event driven response
9991 * @pf: board private structure
9992 **/
9993static void i40e_watchdog_subtask(struct i40e_pf *pf)
9994{
9995	int i;
9996
9997	/* if interface is down do nothing */
9998	if (test_bit(__I40E_DOWN, pf->state) ||
9999	    test_bit(__I40E_CONFIG_BUSY, pf->state))
10000		return;
10001
10002	/* make sure we don't do these things too often */
10003	if (time_before(jiffies, (pf->service_timer_previous +
10004				  pf->service_timer_period)))
10005		return;
10006	pf->service_timer_previous = jiffies;
10007
10008	if ((pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) ||
10009	    test_bit(__I40E_TEMP_LINK_POLLING, pf->state))
10010		i40e_link_event(pf);
10011
10012	/* Update the stats for active netdevs so the network stack
10013	 * can look at updated numbers whenever it cares to
10014	 */
10015	for (i = 0; i < pf->num_alloc_vsi; i++)
10016		if (pf->vsi[i] && pf->vsi[i]->netdev)
10017			i40e_update_stats(pf->vsi[i]);
10018
10019	if (pf->flags & I40E_FLAG_VEB_STATS_ENABLED) {
10020		/* Update the stats for the active switching components */
10021		for (i = 0; i < I40E_MAX_VEB; i++)
10022			if (pf->veb[i])
10023				i40e_update_veb_stats(pf->veb[i]);
10024	}
10025
10026	i40e_ptp_rx_hang(pf);
10027	i40e_ptp_tx_hang(pf);
10028}
10029
10030/**
10031 * i40e_reset_subtask - Set up for resetting the device and driver
10032 * @pf: board private structure
10033 **/
10034static void i40e_reset_subtask(struct i40e_pf *pf)
10035{
10036	u32 reset_flags = 0;
10037
10038	if (test_bit(__I40E_REINIT_REQUESTED, pf->state)) {
10039		reset_flags |= BIT(__I40E_REINIT_REQUESTED);
10040		clear_bit(__I40E_REINIT_REQUESTED, pf->state);
10041	}
10042	if (test_bit(__I40E_PF_RESET_REQUESTED, pf->state)) {
10043		reset_flags |= BIT(__I40E_PF_RESET_REQUESTED);
10044		clear_bit(__I40E_PF_RESET_REQUESTED, pf->state);
10045	}
10046	if (test_bit(__I40E_CORE_RESET_REQUESTED, pf->state)) {
10047		reset_flags |= BIT(__I40E_CORE_RESET_REQUESTED);
10048		clear_bit(__I40E_CORE_RESET_REQUESTED, pf->state);
10049	}
10050	if (test_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state)) {
10051		reset_flags |= BIT(__I40E_GLOBAL_RESET_REQUESTED);
10052		clear_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state);
10053	}
10054	if (test_bit(__I40E_DOWN_REQUESTED, pf->state)) {
10055		reset_flags |= BIT(__I40E_DOWN_REQUESTED);
10056		clear_bit(__I40E_DOWN_REQUESTED, pf->state);
10057	}
10058
10059	/* If there's a recovery already waiting, it takes
10060	 * precedence before starting a new reset sequence.
10061	 */
10062	if (test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) {
10063		i40e_prep_for_reset(pf);
10064		i40e_reset(pf);
10065		i40e_rebuild(pf, false, false);
10066	}
10067
10068	/* If we're already down or resetting, just bail */
10069	if (reset_flags &&
10070	    !test_bit(__I40E_DOWN, pf->state) &&
10071	    !test_bit(__I40E_CONFIG_BUSY, pf->state)) {
10072		i40e_do_reset(pf, reset_flags, false);
10073	}
10074}
10075
10076/**
10077 * i40e_handle_link_event - Handle link event
10078 * @pf: board private structure
10079 * @e: event info posted on ARQ
10080 **/
10081static void i40e_handle_link_event(struct i40e_pf *pf,
10082				   struct i40e_arq_event_info *e)
10083{
10084	struct i40e_aqc_get_link_status *status =
10085		(struct i40e_aqc_get_link_status *)&e->desc.params.raw;
10086
10087	/* Do a new status request to re-enable LSE reporting
10088	 * and load new status information into the hw struct
10089	 * This completely ignores any state information
10090	 * in the ARQ event info, instead choosing to always
10091	 * issue the AQ update link status command.
10092	 */
10093	i40e_link_event(pf);
10094
10095	/* Check if module meets thermal requirements */
10096	if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) {
10097		dev_err(&pf->pdev->dev,
10098			"Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n");
10099		dev_err(&pf->pdev->dev,
10100			"Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
10101	} else {
10102		/* check for unqualified module, if link is down, suppress
10103		 * the message if link was forced to be down.
10104		 */
10105		if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
10106		    (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
10107		    (!(status->link_info & I40E_AQ_LINK_UP)) &&
10108		    (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) {
10109			dev_err(&pf->pdev->dev,
10110				"Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
10111			dev_err(&pf->pdev->dev,
10112				"Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
10113		}
10114	}
10115}
10116
10117/**
10118 * i40e_clean_adminq_subtask - Clean the AdminQ rings
10119 * @pf: board private structure
10120 **/
10121static void i40e_clean_adminq_subtask(struct i40e_pf *pf)
10122{
10123	struct i40e_arq_event_info event;
10124	struct i40e_hw *hw = &pf->hw;
10125	u16 pending, i = 0;
10126	u16 opcode;
10127	u32 oldval;
10128	int ret;
10129	u32 val;
10130
10131	/* Do not run clean AQ when PF reset fails */
10132	if (test_bit(__I40E_RESET_FAILED, pf->state))
10133		return;
10134
10135	/* check for error indications */
10136	val = rd32(&pf->hw, pf->hw.aq.arq.len);
10137	oldval = val;
10138	if (val & I40E_PF_ARQLEN_ARQVFE_MASK) {
10139		if (hw->debug_mask & I40E_DEBUG_AQ)
10140			dev_info(&pf->pdev->dev, "ARQ VF Error detected\n");
10141		val &= ~I40E_PF_ARQLEN_ARQVFE_MASK;
10142	}
10143	if (val & I40E_PF_ARQLEN_ARQOVFL_MASK) {
10144		if (hw->debug_mask & I40E_DEBUG_AQ)
10145			dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n");
10146		val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK;
10147		pf->arq_overflows++;
10148	}
10149	if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) {
10150		if (hw->debug_mask & I40E_DEBUG_AQ)
10151			dev_info(&pf->pdev->dev, "ARQ Critical Error detected\n");
10152		val &= ~I40E_PF_ARQLEN_ARQCRIT_MASK;
10153	}
10154	if (oldval != val)
10155		wr32(&pf->hw, pf->hw.aq.arq.len, val);
10156
10157	val = rd32(&pf->hw, pf->hw.aq.asq.len);
10158	oldval = val;
10159	if (val & I40E_PF_ATQLEN_ATQVFE_MASK) {
10160		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
10161			dev_info(&pf->pdev->dev, "ASQ VF Error detected\n");
10162		val &= ~I40E_PF_ATQLEN_ATQVFE_MASK;
10163	}
10164	if (val & I40E_PF_ATQLEN_ATQOVFL_MASK) {
10165		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
10166			dev_info(&pf->pdev->dev, "ASQ Overflow Error detected\n");
10167		val &= ~I40E_PF_ATQLEN_ATQOVFL_MASK;
10168	}
10169	if (val & I40E_PF_ATQLEN_ATQCRIT_MASK) {
10170		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
10171			dev_info(&pf->pdev->dev, "ASQ Critical Error detected\n");
10172		val &= ~I40E_PF_ATQLEN_ATQCRIT_MASK;
10173	}
10174	if (oldval != val)
10175		wr32(&pf->hw, pf->hw.aq.asq.len, val);
10176
10177	event.buf_len = I40E_MAX_AQ_BUF_SIZE;
10178	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
10179	if (!event.msg_buf)
10180		return;
10181
10182	do {
10183		ret = i40e_clean_arq_element(hw, &event, &pending);
10184		if (ret == -EALREADY)
10185			break;
10186		else if (ret) {
10187			dev_info(&pf->pdev->dev, "ARQ event error %d\n", ret);
10188			break;
10189		}
10190
10191		opcode = le16_to_cpu(event.desc.opcode);
10192		switch (opcode) {
10193
10194		case i40e_aqc_opc_get_link_status:
10195			rtnl_lock();
10196			i40e_handle_link_event(pf, &event);
10197			rtnl_unlock();
10198			break;
10199		case i40e_aqc_opc_send_msg_to_pf:
10200			ret = i40e_vc_process_vf_msg(pf,
10201					le16_to_cpu(event.desc.retval),
10202					le32_to_cpu(event.desc.cookie_high),
10203					le32_to_cpu(event.desc.cookie_low),
10204					event.msg_buf,
10205					event.msg_len);
10206			break;
10207		case i40e_aqc_opc_lldp_update_mib:
10208			dev_dbg(&pf->pdev->dev, "ARQ: Update LLDP MIB event received\n");
10209#ifdef CONFIG_I40E_DCB
10210			rtnl_lock();
10211			i40e_handle_lldp_event(pf, &event);
10212			rtnl_unlock();
10213#endif /* CONFIG_I40E_DCB */
10214			break;
10215		case i40e_aqc_opc_event_lan_overflow:
10216			dev_dbg(&pf->pdev->dev, "ARQ LAN queue overflow event received\n");
10217			i40e_handle_lan_overflow_event(pf, &event);
10218			break;
10219		case i40e_aqc_opc_send_msg_to_peer:
10220			dev_info(&pf->pdev->dev, "ARQ: Msg from other pf\n");
10221			break;
10222		case i40e_aqc_opc_nvm_erase:
10223		case i40e_aqc_opc_nvm_update:
10224		case i40e_aqc_opc_oem_post_update:
10225			i40e_debug(&pf->hw, I40E_DEBUG_NVM,
10226				   "ARQ NVM operation 0x%04x completed\n",
10227				   opcode);
10228			break;
10229		default:
10230			dev_info(&pf->pdev->dev,
10231				 "ARQ: Unknown event 0x%04x ignored\n",
10232				 opcode);
10233			break;
10234		}
10235	} while (i++ < pf->adminq_work_limit);
10236
10237	if (i < pf->adminq_work_limit)
10238		clear_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state);
10239
10240	/* re-enable Admin queue interrupt cause */
10241	val = rd32(hw, I40E_PFINT_ICR0_ENA);
10242	val |=  I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
10243	wr32(hw, I40E_PFINT_ICR0_ENA, val);
10244	i40e_flush(hw);
10245
10246	kfree(event.msg_buf);
10247}
10248
10249/**
10250 * i40e_verify_eeprom - make sure eeprom is good to use
10251 * @pf: board private structure
10252 **/
10253static void i40e_verify_eeprom(struct i40e_pf *pf)
10254{
10255	int err;
10256
10257	err = i40e_diag_eeprom_test(&pf->hw);
10258	if (err) {
10259		/* retry in case of garbage read */
10260		err = i40e_diag_eeprom_test(&pf->hw);
10261		if (err) {
10262			dev_info(&pf->pdev->dev, "eeprom check failed (%d), Tx/Rx traffic disabled\n",
10263				 err);
10264			set_bit(__I40E_BAD_EEPROM, pf->state);
10265		}
10266	}
10267
10268	if (!err && test_bit(__I40E_BAD_EEPROM, pf->state)) {
10269		dev_info(&pf->pdev->dev, "eeprom check passed, Tx/Rx traffic enabled\n");
10270		clear_bit(__I40E_BAD_EEPROM, pf->state);
10271	}
10272}
10273
10274/**
10275 * i40e_enable_pf_switch_lb
10276 * @pf: pointer to the PF structure
10277 *
10278 * enable switch loop back or die - no point in a return value
10279 **/
10280static void i40e_enable_pf_switch_lb(struct i40e_pf *pf)
10281{
10282	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
10283	struct i40e_vsi_context ctxt;
10284	int ret;
10285
10286	ctxt.seid = pf->main_vsi_seid;
10287	ctxt.pf_num = pf->hw.pf_id;
10288	ctxt.vf_num = 0;
10289	ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
10290	if (ret) {
10291		dev_info(&pf->pdev->dev,
10292			 "couldn't get PF vsi config, err %pe aq_err %s\n",
10293			 ERR_PTR(ret),
10294			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
10295		return;
10296	}
10297	ctxt.flags = I40E_AQ_VSI_TYPE_PF;
10298	ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
10299	ctxt.info.switch_id |= cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
10300
10301	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
10302	if (ret) {
10303		dev_info(&pf->pdev->dev,
10304			 "update vsi switch failed, err %pe aq_err %s\n",
10305			 ERR_PTR(ret),
10306			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
10307	}
10308}
10309
10310/**
10311 * i40e_disable_pf_switch_lb
10312 * @pf: pointer to the PF structure
10313 *
10314 * disable switch loop back or die - no point in a return value
10315 **/
10316static void i40e_disable_pf_switch_lb(struct i40e_pf *pf)
10317{
10318	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
10319	struct i40e_vsi_context ctxt;
10320	int ret;
10321
10322	ctxt.seid = pf->main_vsi_seid;
10323	ctxt.pf_num = pf->hw.pf_id;
10324	ctxt.vf_num = 0;
10325	ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
10326	if (ret) {
10327		dev_info(&pf->pdev->dev,
10328			 "couldn't get PF vsi config, err %pe aq_err %s\n",
10329			 ERR_PTR(ret),
10330			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
10331		return;
10332	}
10333	ctxt.flags = I40E_AQ_VSI_TYPE_PF;
10334	ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
10335	ctxt.info.switch_id &= ~cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
10336
10337	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
10338	if (ret) {
10339		dev_info(&pf->pdev->dev,
10340			 "update vsi switch failed, err %pe aq_err %s\n",
10341			 ERR_PTR(ret),
10342			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
10343	}
10344}
10345
10346/**
10347 * i40e_config_bridge_mode - Configure the HW bridge mode
10348 * @veb: pointer to the bridge instance
10349 *
10350 * Configure the loop back mode for the LAN VSI that is downlink to the
10351 * specified HW bridge instance. It is expected this function is called
10352 * when a new HW bridge is instantiated.
10353 **/
10354static void i40e_config_bridge_mode(struct i40e_veb *veb)
10355{
10356	struct i40e_pf *pf = veb->pf;
10357
10358	if (pf->hw.debug_mask & I40E_DEBUG_LAN)
10359		dev_info(&pf->pdev->dev, "enabling bridge mode: %s\n",
10360			 veb->bridge_mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
10361	if (veb->bridge_mode & BRIDGE_MODE_VEPA)
10362		i40e_disable_pf_switch_lb(pf);
10363	else
10364		i40e_enable_pf_switch_lb(pf);
10365}
10366
10367/**
10368 * i40e_reconstitute_veb - rebuild the VEB and anything connected to it
10369 * @veb: pointer to the VEB instance
10370 *
10371 * This is a recursive function that first builds the attached VSIs then
10372 * recurses in to build the next layer of VEB.  We track the connections
10373 * through our own index numbers because the seid's from the HW could
10374 * change across the reset.
10375 **/
10376static int i40e_reconstitute_veb(struct i40e_veb *veb)
10377{
10378	struct i40e_vsi *ctl_vsi = NULL;
10379	struct i40e_pf *pf = veb->pf;
10380	int v, veb_idx;
10381	int ret;
10382
10383	/* build VSI that owns this VEB, temporarily attached to base VEB */
10384	for (v = 0; v < pf->num_alloc_vsi && !ctl_vsi; v++) {
10385		if (pf->vsi[v] &&
10386		    pf->vsi[v]->veb_idx == veb->idx &&
10387		    pf->vsi[v]->flags & I40E_VSI_FLAG_VEB_OWNER) {
10388			ctl_vsi = pf->vsi[v];
10389			break;
10390		}
10391	}
10392	if (!ctl_vsi) {
10393		dev_info(&pf->pdev->dev,
10394			 "missing owner VSI for veb_idx %d\n", veb->idx);
10395		ret = -ENOENT;
10396		goto end_reconstitute;
10397	}
10398	if (ctl_vsi != pf->vsi[pf->lan_vsi])
10399		ctl_vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid;
10400	ret = i40e_add_vsi(ctl_vsi);
10401	if (ret) {
10402		dev_info(&pf->pdev->dev,
10403			 "rebuild of veb_idx %d owner VSI failed: %d\n",
10404			 veb->idx, ret);
10405		goto end_reconstitute;
10406	}
10407	i40e_vsi_reset_stats(ctl_vsi);
10408
10409	/* create the VEB in the switch and move the VSI onto the VEB */
10410	ret = i40e_add_veb(veb, ctl_vsi);
10411	if (ret)
10412		goto end_reconstitute;
10413
10414	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
10415		veb->bridge_mode = BRIDGE_MODE_VEB;
10416	else
10417		veb->bridge_mode = BRIDGE_MODE_VEPA;
10418	i40e_config_bridge_mode(veb);
10419
10420	/* create the remaining VSIs attached to this VEB */
10421	for (v = 0; v < pf->num_alloc_vsi; v++) {
10422		if (!pf->vsi[v] || pf->vsi[v] == ctl_vsi)
10423			continue;
10424
10425		if (pf->vsi[v]->veb_idx == veb->idx) {
10426			struct i40e_vsi *vsi = pf->vsi[v];
10427
10428			vsi->uplink_seid = veb->seid;
10429			ret = i40e_add_vsi(vsi);
10430			if (ret) {
10431				dev_info(&pf->pdev->dev,
10432					 "rebuild of vsi_idx %d failed: %d\n",
10433					 v, ret);
10434				goto end_reconstitute;
10435			}
10436			i40e_vsi_reset_stats(vsi);
10437		}
10438	}
10439
10440	/* create any VEBs attached to this VEB - RECURSION */
10441	for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) {
10442		if (pf->veb[veb_idx] && pf->veb[veb_idx]->veb_idx == veb->idx) {
10443			pf->veb[veb_idx]->uplink_seid = veb->seid;
10444			ret = i40e_reconstitute_veb(pf->veb[veb_idx]);
10445			if (ret)
10446				break;
10447		}
10448	}
10449
10450end_reconstitute:
10451	return ret;
10452}
10453
10454/**
10455 * i40e_get_capabilities - get info about the HW
10456 * @pf: the PF struct
10457 * @list_type: AQ capability to be queried
10458 **/
10459static int i40e_get_capabilities(struct i40e_pf *pf,
10460				 enum i40e_admin_queue_opc list_type)
10461{
10462	struct i40e_aqc_list_capabilities_element_resp *cap_buf;
10463	u16 data_size;
10464	int buf_len;
10465	int err;
10466
10467	buf_len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp);
10468	do {
10469		cap_buf = kzalloc(buf_len, GFP_KERNEL);
10470		if (!cap_buf)
10471			return -ENOMEM;
10472
10473		/* this loads the data into the hw struct for us */
10474		err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len,
10475						    &data_size, list_type,
10476						    NULL);
10477		/* data loaded, buffer no longer needed */
10478		kfree(cap_buf);
10479
10480		if (pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) {
10481			/* retry with a larger buffer */
10482			buf_len = data_size;
10483		} else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK || err) {
10484			dev_info(&pf->pdev->dev,
10485				 "capability discovery failed, err %pe aq_err %s\n",
10486				 ERR_PTR(err),
10487				 i40e_aq_str(&pf->hw,
10488					     pf->hw.aq.asq_last_status));
10489			return -ENODEV;
10490		}
10491	} while (err);
10492
10493	if (pf->hw.debug_mask & I40E_DEBUG_USER) {
10494		if (list_type == i40e_aqc_opc_list_func_capabilities) {
10495			dev_info(&pf->pdev->dev,
10496				 "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
10497				 pf->hw.pf_id, pf->hw.func_caps.num_vfs,
10498				 pf->hw.func_caps.num_msix_vectors,
10499				 pf->hw.func_caps.num_msix_vectors_vf,
10500				 pf->hw.func_caps.fd_filters_guaranteed,
10501				 pf->hw.func_caps.fd_filters_best_effort,
10502				 pf->hw.func_caps.num_tx_qp,
10503				 pf->hw.func_caps.num_vsis);
10504		} else if (list_type == i40e_aqc_opc_list_dev_capabilities) {
10505			dev_info(&pf->pdev->dev,
10506				 "switch_mode=0x%04x, function_valid=0x%08x\n",
10507				 pf->hw.dev_caps.switch_mode,
10508				 pf->hw.dev_caps.valid_functions);
10509			dev_info(&pf->pdev->dev,
10510				 "SR-IOV=%d, num_vfs for all function=%u\n",
10511				 pf->hw.dev_caps.sr_iov_1_1,
10512				 pf->hw.dev_caps.num_vfs);
10513			dev_info(&pf->pdev->dev,
10514				 "num_vsis=%u, num_rx:%u, num_tx=%u\n",
10515				 pf->hw.dev_caps.num_vsis,
10516				 pf->hw.dev_caps.num_rx_qp,
10517				 pf->hw.dev_caps.num_tx_qp);
10518		}
10519	}
10520	if (list_type == i40e_aqc_opc_list_func_capabilities) {
10521#define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \
10522		       + pf->hw.func_caps.num_vfs)
10523		if (pf->hw.revision_id == 0 &&
10524		    pf->hw.func_caps.num_vsis < DEF_NUM_VSI) {
10525			dev_info(&pf->pdev->dev,
10526				 "got num_vsis %d, setting num_vsis to %d\n",
10527				 pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
10528			pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
10529		}
10530	}
10531	return 0;
10532}
10533
10534static int i40e_vsi_clear(struct i40e_vsi *vsi);
10535
10536/**
10537 * i40e_fdir_sb_setup - initialize the Flow Director resources for Sideband
10538 * @pf: board private structure
10539 **/
10540static void i40e_fdir_sb_setup(struct i40e_pf *pf)
10541{
10542	struct i40e_vsi *vsi;
10543
10544	/* quick workaround for an NVM issue that leaves a critical register
10545	 * uninitialized
10546	 */
10547	if (!rd32(&pf->hw, I40E_GLQF_HKEY(0))) {
10548		static const u32 hkey[] = {
10549			0xe640d33f, 0xcdfe98ab, 0x73fa7161, 0x0d7a7d36,
10550			0xeacb7d61, 0xaa4f05b6, 0x9c5c89ed, 0xfc425ddb,
10551			0xa4654832, 0xfc7461d4, 0x8f827619, 0xf5c63c21,
10552			0x95b3a76d};
10553		int i;
10554
10555		for (i = 0; i <= I40E_GLQF_HKEY_MAX_INDEX; i++)
10556			wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]);
10557	}
10558
10559	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
10560		return;
10561
10562	/* find existing VSI and see if it needs configuring */
10563	vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
10564
10565	/* create a new VSI if none exists */
10566	if (!vsi) {
10567		vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR,
10568				     pf->vsi[pf->lan_vsi]->seid, 0);
10569		if (!vsi) {
10570			dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
10571			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
10572			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
10573			return;
10574		}
10575	}
10576
10577	i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring);
10578}
10579
10580/**
10581 * i40e_fdir_teardown - release the Flow Director resources
10582 * @pf: board private structure
10583 **/
10584static void i40e_fdir_teardown(struct i40e_pf *pf)
10585{
10586	struct i40e_vsi *vsi;
10587
10588	i40e_fdir_filter_exit(pf);
10589	vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
10590	if (vsi)
10591		i40e_vsi_release(vsi);
10592}
10593
10594/**
10595 * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs
10596 * @vsi: PF main vsi
10597 * @seid: seid of main or channel VSIs
10598 *
10599 * Rebuilds cloud filters associated with main VSI and channel VSIs if they
10600 * existed before reset
10601 **/
10602static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
10603{
10604	struct i40e_cloud_filter *cfilter;
10605	struct i40e_pf *pf = vsi->back;
10606	struct hlist_node *node;
10607	int ret;
10608
10609	/* Add cloud filters back if they exist */
10610	hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list,
10611				  cloud_node) {
10612		if (cfilter->seid != seid)
10613			continue;
10614
10615		if (cfilter->dst_port)
10616			ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
10617								true);
10618		else
10619			ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
10620
10621		if (ret) {
10622			dev_dbg(&pf->pdev->dev,
10623				"Failed to rebuild cloud filter, err %pe aq_err %s\n",
10624				ERR_PTR(ret),
10625				i40e_aq_str(&pf->hw,
10626					    pf->hw.aq.asq_last_status));
10627			return ret;
10628		}
10629	}
10630	return 0;
10631}
10632
10633/**
10634 * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset
10635 * @vsi: PF main vsi
10636 *
10637 * Rebuilds channel VSIs if they existed before reset
10638 **/
10639static int i40e_rebuild_channels(struct i40e_vsi *vsi)
10640{
10641	struct i40e_channel *ch, *ch_tmp;
10642	int ret;
10643
10644	if (list_empty(&vsi->ch_list))
10645		return 0;
10646
10647	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
10648		if (!ch->initialized)
10649			break;
10650		/* Proceed with creation of channel (VMDq2) VSI */
10651		ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch);
10652		if (ret) {
10653			dev_info(&vsi->back->pdev->dev,
10654				 "failed to rebuild channels using uplink_seid %u\n",
10655				 vsi->uplink_seid);
10656			return ret;
10657		}
10658		/* Reconfigure TX queues using QTX_CTL register */
10659		ret = i40e_channel_config_tx_ring(vsi->back, vsi, ch);
10660		if (ret) {
10661			dev_info(&vsi->back->pdev->dev,
10662				 "failed to configure TX rings for channel %u\n",
10663				 ch->seid);
10664			return ret;
10665		}
10666		/* update 'next_base_queue' */
10667		vsi->next_base_queue = vsi->next_base_queue +
10668							ch->num_queue_pairs;
10669		if (ch->max_tx_rate) {
10670			u64 credits = ch->max_tx_rate;
10671
10672			if (i40e_set_bw_limit(vsi, ch->seid,
10673					      ch->max_tx_rate))
10674				return -EINVAL;
10675
10676			do_div(credits, I40E_BW_CREDIT_DIVISOR);
10677			dev_dbg(&vsi->back->pdev->dev,
10678				"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
10679				ch->max_tx_rate,
10680				credits,
10681				ch->seid);
10682		}
10683		ret = i40e_rebuild_cloud_filters(vsi, ch->seid);
10684		if (ret) {
10685			dev_dbg(&vsi->back->pdev->dev,
10686				"Failed to rebuild cloud filters for channel VSI %u\n",
10687				ch->seid);
10688			return ret;
10689		}
10690	}
10691	return 0;
10692}
10693
10694/**
10695 * i40e_clean_xps_state - clean xps state for every tx_ring
10696 * @vsi: ptr to the VSI
10697 **/
10698static void i40e_clean_xps_state(struct i40e_vsi *vsi)
10699{
10700	int i;
10701
10702	if (vsi->tx_rings)
10703		for (i = 0; i < vsi->num_queue_pairs; i++)
10704			if (vsi->tx_rings[i])
10705				clear_bit(__I40E_TX_XPS_INIT_DONE,
10706					  vsi->tx_rings[i]->state);
10707}
10708
10709/**
10710 * i40e_prep_for_reset - prep for the core to reset
10711 * @pf: board private structure
10712 *
10713 * Close up the VFs and other things in prep for PF Reset.
10714  **/
10715static void i40e_prep_for_reset(struct i40e_pf *pf)
10716{
10717	struct i40e_hw *hw = &pf->hw;
10718	int ret = 0;
10719	u32 v;
10720
10721	clear_bit(__I40E_RESET_INTR_RECEIVED, pf->state);
10722	if (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
10723		return;
10724	if (i40e_check_asq_alive(&pf->hw))
10725		i40e_vc_notify_reset(pf);
10726
10727	dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n");
10728
10729	/* quiesce the VSIs and their queues that are not already DOWN */
10730	i40e_pf_quiesce_all_vsi(pf);
10731
10732	for (v = 0; v < pf->num_alloc_vsi; v++) {
10733		if (pf->vsi[v]) {
10734			i40e_clean_xps_state(pf->vsi[v]);
10735			pf->vsi[v]->seid = 0;
10736		}
10737	}
10738
10739	i40e_shutdown_adminq(&pf->hw);
10740
10741	/* call shutdown HMC */
10742	if (hw->hmc.hmc_obj) {
10743		ret = i40e_shutdown_lan_hmc(hw);
10744		if (ret)
10745			dev_warn(&pf->pdev->dev,
10746				 "shutdown_lan_hmc failed: %d\n", ret);
10747	}
10748
10749	/* Save the current PTP time so that we can restore the time after the
10750	 * reset completes.
10751	 */
10752	i40e_ptp_save_hw_time(pf);
10753}
10754
10755/**
10756 * i40e_send_version - update firmware with driver version
10757 * @pf: PF struct
10758 */
10759static void i40e_send_version(struct i40e_pf *pf)
10760{
10761	struct i40e_driver_version dv;
10762
10763	dv.major_version = 0xff;
10764	dv.minor_version = 0xff;
10765	dv.build_version = 0xff;
10766	dv.subbuild_version = 0;
10767	strscpy(dv.driver_string, UTS_RELEASE, sizeof(dv.driver_string));
10768	i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
10769}
10770
10771/**
10772 * i40e_get_oem_version - get OEM specific version information
10773 * @hw: pointer to the hardware structure
10774 **/
10775static void i40e_get_oem_version(struct i40e_hw *hw)
10776{
10777	u16 block_offset = 0xffff;
10778	u16 block_length = 0;
10779	u16 capabilities = 0;
10780	u16 gen_snap = 0;
10781	u16 release = 0;
10782
10783#define I40E_SR_NVM_OEM_VERSION_PTR		0x1B
10784#define I40E_NVM_OEM_LENGTH_OFFSET		0x00
10785#define I40E_NVM_OEM_CAPABILITIES_OFFSET	0x01
10786#define I40E_NVM_OEM_GEN_OFFSET			0x02
10787#define I40E_NVM_OEM_RELEASE_OFFSET		0x03
10788#define I40E_NVM_OEM_CAPABILITIES_MASK		0x000F
10789#define I40E_NVM_OEM_LENGTH			3
10790
10791	/* Check if pointer to OEM version block is valid. */
10792	i40e_read_nvm_word(hw, I40E_SR_NVM_OEM_VERSION_PTR, &block_offset);
10793	if (block_offset == 0xffff)
10794		return;
10795
10796	/* Check if OEM version block has correct length. */
10797	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_LENGTH_OFFSET,
10798			   &block_length);
10799	if (block_length < I40E_NVM_OEM_LENGTH)
10800		return;
10801
10802	/* Check if OEM version format is as expected. */
10803	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_CAPABILITIES_OFFSET,
10804			   &capabilities);
10805	if ((capabilities & I40E_NVM_OEM_CAPABILITIES_MASK) != 0)
10806		return;
10807
10808	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_GEN_OFFSET,
10809			   &gen_snap);
10810	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_RELEASE_OFFSET,
10811			   &release);
10812	hw->nvm.oem_ver = (gen_snap << I40E_OEM_SNAP_SHIFT) | release;
10813	hw->nvm.eetrack = I40E_OEM_EETRACK_ID;
10814}
10815
10816/**
10817 * i40e_reset - wait for core reset to finish reset, reset pf if corer not seen
10818 * @pf: board private structure
10819 **/
10820static int i40e_reset(struct i40e_pf *pf)
10821{
10822	struct i40e_hw *hw = &pf->hw;
10823	int ret;
10824
10825	ret = i40e_pf_reset(hw);
10826	if (ret) {
10827		dev_info(&pf->pdev->dev, "PF reset failed, %d\n", ret);
10828		set_bit(__I40E_RESET_FAILED, pf->state);
10829		clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state);
10830	} else {
10831		pf->pfr_count++;
10832	}
10833	return ret;
10834}
10835
10836/**
10837 * i40e_rebuild - rebuild using a saved config
10838 * @pf: board private structure
10839 * @reinit: if the Main VSI needs to re-initialized.
10840 * @lock_acquired: indicates whether or not the lock has been acquired
10841 * before this function was called.
10842 **/
10843static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
10844{
10845	const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
10846	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
10847	struct i40e_hw *hw = &pf->hw;
10848	int ret;
10849	u32 val;
10850	int v;
10851
10852	if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
10853	    is_recovery_mode_reported)
10854		i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
10855
10856	if (test_bit(__I40E_DOWN, pf->state) &&
10857	    !test_bit(__I40E_RECOVERY_MODE, pf->state))
10858		goto clear_recovery;
10859	dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
10860
10861	/* rebuild the basics for the AdminQ, HMC, and initial HW switch */
10862	ret = i40e_init_adminq(&pf->hw);
10863	if (ret) {
10864		dev_info(&pf->pdev->dev, "Rebuild AdminQ failed, err %pe aq_err %s\n",
10865			 ERR_PTR(ret),
10866			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
10867		goto clear_recovery;
10868	}
10869	i40e_get_oem_version(&pf->hw);
10870
10871	if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) {
10872		/* The following delay is necessary for firmware update. */
10873		mdelay(1000);
10874	}
10875
10876	/* re-verify the eeprom if we just had an EMP reset */
10877	if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
10878		i40e_verify_eeprom(pf);
10879
10880	/* if we are going out of or into recovery mode we have to act
10881	 * accordingly with regard to resources initialization
10882	 * and deinitialization
10883	 */
10884	if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
10885		if (i40e_get_capabilities(pf,
10886					  i40e_aqc_opc_list_func_capabilities))
10887			goto end_unlock;
10888
10889		if (is_recovery_mode_reported) {
10890			/* we're staying in recovery mode so we'll reinitialize
10891			 * misc vector here
10892			 */
10893			if (i40e_setup_misc_vector_for_recovery_mode(pf))
10894				goto end_unlock;
10895		} else {
10896			if (!lock_acquired)
10897				rtnl_lock();
10898			/* we're going out of recovery mode so we'll free
10899			 * the IRQ allocated specifically for recovery mode
10900			 * and restore the interrupt scheme
10901			 */
10902			free_irq(pf->pdev->irq, pf);
10903			i40e_clear_interrupt_scheme(pf);
10904			if (i40e_restore_interrupt_scheme(pf))
10905				goto end_unlock;
10906		}
10907
10908		/* tell the firmware that we're starting */
10909		i40e_send_version(pf);
10910
10911		/* bail out in case recovery mode was detected, as there is
10912		 * no need for further configuration.
10913		 */
10914		goto end_unlock;
10915	}
10916
10917	i40e_clear_pxe_mode(hw);
10918	ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
10919	if (ret)
10920		goto end_core_reset;
10921
10922	ret = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
10923				hw->func_caps.num_rx_qp, 0, 0);
10924	if (ret) {
10925		dev_info(&pf->pdev->dev, "init_lan_hmc failed: %d\n", ret);
10926		goto end_core_reset;
10927	}
10928	ret = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY);
10929	if (ret) {
10930		dev_info(&pf->pdev->dev, "configure_lan_hmc failed: %d\n", ret);
10931		goto end_core_reset;
10932	}
10933
10934#ifdef CONFIG_I40E_DCB
10935	/* Enable FW to write a default DCB config on link-up
10936	 * unless I40E_FLAG_TC_MQPRIO was enabled or DCB
10937	 * is not supported with new link speed
10938	 */
10939	if (i40e_is_tc_mqprio_enabled(pf)) {
10940		i40e_aq_set_dcb_parameters(hw, false, NULL);
10941	} else {
10942		if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
10943		    (hw->phy.link_info.link_speed &
10944		     (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) {
10945			i40e_aq_set_dcb_parameters(hw, false, NULL);
10946			dev_warn(&pf->pdev->dev,
10947				 "DCB is not supported for X710-T*L 2.5/5G speeds\n");
10948			pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
10949		} else {
10950			i40e_aq_set_dcb_parameters(hw, true, NULL);
10951			ret = i40e_init_pf_dcb(pf);
10952			if (ret) {
10953				dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n",
10954					 ret);
10955				pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
10956				/* Continue without DCB enabled */
10957			}
10958		}
10959	}
10960
10961#endif /* CONFIG_I40E_DCB */
10962	if (!lock_acquired)
10963		rtnl_lock();
10964	ret = i40e_setup_pf_switch(pf, reinit, true);
10965	if (ret)
10966		goto end_unlock;
10967
10968	/* The driver only wants link up/down and module qualification
10969	 * reports from firmware.  Note the negative logic.
10970	 */
10971	ret = i40e_aq_set_phy_int_mask(&pf->hw,
10972				       ~(I40E_AQ_EVENT_LINK_UPDOWN |
10973					 I40E_AQ_EVENT_MEDIA_NA |
10974					 I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
10975	if (ret)
10976		dev_info(&pf->pdev->dev, "set phy mask fail, err %pe aq_err %s\n",
10977			 ERR_PTR(ret),
10978			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
10979
10980	/* Rebuild the VSIs and VEBs that existed before reset.
10981	 * They are still in our local switch element arrays, so only
10982	 * need to rebuild the switch model in the HW.
10983	 *
10984	 * If there were VEBs but the reconstitution failed, we'll try
10985	 * to recover minimal use by getting the basic PF VSI working.
10986	 */
10987	if (vsi->uplink_seid != pf->mac_seid) {
10988		dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
10989		/* find the one VEB connected to the MAC, and find orphans */
10990		for (v = 0; v < I40E_MAX_VEB; v++) {
10991			if (!pf->veb[v])
10992				continue;
10993
10994			if (pf->veb[v]->uplink_seid == pf->mac_seid ||
10995			    pf->veb[v]->uplink_seid == 0) {
10996				ret = i40e_reconstitute_veb(pf->veb[v]);
10997
10998				if (!ret)
10999					continue;
11000
11001				/* If Main VEB failed, we're in deep doodoo,
11002				 * so give up rebuilding the switch and set up
11003				 * for minimal rebuild of PF VSI.
11004				 * If orphan failed, we'll report the error
11005				 * but try to keep going.
11006				 */
11007				if (pf->veb[v]->uplink_seid == pf->mac_seid) {
11008					dev_info(&pf->pdev->dev,
11009						 "rebuild of switch failed: %d, will try to set up simple PF connection\n",
11010						 ret);
11011					vsi->uplink_seid = pf->mac_seid;
11012					break;
11013				} else if (pf->veb[v]->uplink_seid == 0) {
11014					dev_info(&pf->pdev->dev,
11015						 "rebuild of orphan VEB failed: %d\n",
11016						 ret);
11017				}
11018			}
11019		}
11020	}
11021
11022	if (vsi->uplink_seid == pf->mac_seid) {
11023		dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n");
11024		/* no VEB, so rebuild only the Main VSI */
11025		ret = i40e_add_vsi(vsi);
11026		if (ret) {
11027			dev_info(&pf->pdev->dev,
11028				 "rebuild of Main VSI failed: %d\n", ret);
11029			goto end_unlock;
11030		}
11031	}
11032
11033	if (vsi->mqprio_qopt.max_rate[0]) {
11034		u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
11035						  vsi->mqprio_qopt.max_rate[0]);
11036		u64 credits = 0;
11037
11038		ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
11039		if (ret)
11040			goto end_unlock;
11041
11042		credits = max_tx_rate;
11043		do_div(credits, I40E_BW_CREDIT_DIVISOR);
11044		dev_dbg(&vsi->back->pdev->dev,
11045			"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
11046			max_tx_rate,
11047			credits,
11048			vsi->seid);
11049	}
11050
11051	ret = i40e_rebuild_cloud_filters(vsi, vsi->seid);
11052	if (ret)
11053		goto end_unlock;
11054
11055	/* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs
11056	 * for this main VSI if they exist
11057	 */
11058	ret = i40e_rebuild_channels(vsi);
11059	if (ret)
11060		goto end_unlock;
11061
11062	/* Reconfigure hardware for allowing smaller MSS in the case
11063	 * of TSO, so that we avoid the MDD being fired and causing
11064	 * a reset in the case of small MSS+TSO.
11065	 */
11066#define I40E_REG_MSS          0x000E64DC
11067#define I40E_REG_MSS_MIN_MASK 0x3FF0000
11068#define I40E_64BYTE_MSS       0x400000
11069	val = rd32(hw, I40E_REG_MSS);
11070	if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) {
11071		val &= ~I40E_REG_MSS_MIN_MASK;
11072		val |= I40E_64BYTE_MSS;
11073		wr32(hw, I40E_REG_MSS, val);
11074	}
11075
11076	if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
11077		msleep(75);
11078		ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
11079		if (ret)
11080			dev_info(&pf->pdev->dev, "link restart failed, err %pe aq_err %s\n",
11081				 ERR_PTR(ret),
11082				 i40e_aq_str(&pf->hw,
11083					     pf->hw.aq.asq_last_status));
11084	}
11085	/* reinit the misc interrupt */
11086	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
11087		ret = i40e_setup_misc_vector(pf);
11088		if (ret)
11089			goto end_unlock;
11090	}
11091
11092	/* Add a filter to drop all Flow control frames from any VSI from being
11093	 * transmitted. By doing so we stop a malicious VF from sending out
11094	 * PAUSE or PFC frames and potentially controlling traffic for other
11095	 * PF/VF VSIs.
11096	 * The FW can still send Flow control frames if enabled.
11097	 */
11098	i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
11099						       pf->main_vsi_seid);
11100
11101	/* restart the VSIs that were rebuilt and running before the reset */
11102	i40e_pf_unquiesce_all_vsi(pf);
11103
11104	/* Release the RTNL lock before we start resetting VFs */
11105	if (!lock_acquired)
11106		rtnl_unlock();
11107
11108	/* Restore promiscuous settings */
11109	ret = i40e_set_promiscuous(pf, pf->cur_promisc);
11110	if (ret)
11111		dev_warn(&pf->pdev->dev,
11112			 "Failed to restore promiscuous setting: %s, err %pe aq_err %s\n",
11113			 pf->cur_promisc ? "on" : "off",
11114			 ERR_PTR(ret),
11115			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
11116
11117	i40e_reset_all_vfs(pf, true);
11118
11119	/* tell the firmware that we're starting */
11120	i40e_send_version(pf);
11121
11122	/* We've already released the lock, so don't do it again */
11123	goto end_core_reset;
11124
11125end_unlock:
11126	if (!lock_acquired)
11127		rtnl_unlock();
11128end_core_reset:
11129	clear_bit(__I40E_RESET_FAILED, pf->state);
11130clear_recovery:
11131	clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state);
11132	clear_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state);
11133}
11134
11135/**
11136 * i40e_reset_and_rebuild - reset and rebuild using a saved config
11137 * @pf: board private structure
11138 * @reinit: if the Main VSI needs to re-initialized.
11139 * @lock_acquired: indicates whether or not the lock has been acquired
11140 * before this function was called.
11141 **/
11142static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
11143				   bool lock_acquired)
11144{
11145	int ret;
11146
11147	if (test_bit(__I40E_IN_REMOVE, pf->state))
11148		return;
11149	/* Now we wait for GRST to settle out.
11150	 * We don't have to delete the VEBs or VSIs from the hw switch
11151	 * because the reset will make them disappear.
11152	 */
11153	ret = i40e_reset(pf);
11154	if (!ret)
11155		i40e_rebuild(pf, reinit, lock_acquired);
11156}
11157
11158/**
11159 * i40e_handle_reset_warning - prep for the PF to reset, reset and rebuild
11160 * @pf: board private structure
11161 *
11162 * Close up the VFs and other things in prep for a Core Reset,
11163 * then get ready to rebuild the world.
11164 * @lock_acquired: indicates whether or not the lock has been acquired
11165 * before this function was called.
11166 **/
11167static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
11168{
11169	i40e_prep_for_reset(pf);
11170	i40e_reset_and_rebuild(pf, false, lock_acquired);
11171}
11172
11173/**
11174 * i40e_handle_mdd_event
11175 * @pf: pointer to the PF structure
11176 *
11177 * Called from the MDD irq handler to identify possibly malicious vfs
11178 **/
11179static void i40e_handle_mdd_event(struct i40e_pf *pf)
11180{
11181	struct i40e_hw *hw = &pf->hw;
11182	bool mdd_detected = false;
11183	struct i40e_vf *vf;
11184	u32 reg;
11185	int i;
11186
11187	if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
11188		return;
11189
11190	/* find what triggered the MDD event */
11191	reg = rd32(hw, I40E_GL_MDET_TX);
11192	if (reg & I40E_GL_MDET_TX_VALID_MASK) {
11193		u8 pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >>
11194				I40E_GL_MDET_TX_PF_NUM_SHIFT;
11195		u16 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >>
11196				I40E_GL_MDET_TX_VF_NUM_SHIFT;
11197		u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >>
11198				I40E_GL_MDET_TX_EVENT_SHIFT;
11199		u16 queue = ((reg & I40E_GL_MDET_TX_QUEUE_MASK) >>
11200				I40E_GL_MDET_TX_QUEUE_SHIFT) -
11201				pf->hw.func_caps.base_queue;
11202		if (netif_msg_tx_err(pf))
11203			dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on TX queue %d PF number 0x%02x VF number 0x%02x\n",
11204				 event, queue, pf_num, vf_num);
11205		wr32(hw, I40E_GL_MDET_TX, 0xffffffff);
11206		mdd_detected = true;
11207	}
11208	reg = rd32(hw, I40E_GL_MDET_RX);
11209	if (reg & I40E_GL_MDET_RX_VALID_MASK) {
11210		u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >>
11211				I40E_GL_MDET_RX_FUNCTION_SHIFT;
11212		u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >>
11213				I40E_GL_MDET_RX_EVENT_SHIFT;
11214		u16 queue = ((reg & I40E_GL_MDET_RX_QUEUE_MASK) >>
11215				I40E_GL_MDET_RX_QUEUE_SHIFT) -
11216				pf->hw.func_caps.base_queue;
11217		if (netif_msg_rx_err(pf))
11218			dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on RX queue %d of function 0x%02x\n",
11219				 event, queue, func);
11220		wr32(hw, I40E_GL_MDET_RX, 0xffffffff);
11221		mdd_detected = true;
11222	}
11223
11224	if (mdd_detected) {
11225		reg = rd32(hw, I40E_PF_MDET_TX);
11226		if (reg & I40E_PF_MDET_TX_VALID_MASK) {
11227			wr32(hw, I40E_PF_MDET_TX, 0xFFFF);
11228			dev_dbg(&pf->pdev->dev, "TX driver issue detected on PF\n");
11229		}
11230		reg = rd32(hw, I40E_PF_MDET_RX);
11231		if (reg & I40E_PF_MDET_RX_VALID_MASK) {
11232			wr32(hw, I40E_PF_MDET_RX, 0xFFFF);
11233			dev_dbg(&pf->pdev->dev, "RX driver issue detected on PF\n");
11234		}
11235	}
11236
11237	/* see if one of the VFs needs its hand slapped */
11238	for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
11239		vf = &(pf->vf[i]);
11240		reg = rd32(hw, I40E_VP_MDET_TX(i));
11241		if (reg & I40E_VP_MDET_TX_VALID_MASK) {
11242			wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
11243			vf->num_mdd_events++;
11244			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
11245				 i);
11246			dev_info(&pf->pdev->dev,
11247				 "Use PF Control I/F to re-enable the VF\n");
11248			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
11249		}
11250
11251		reg = rd32(hw, I40E_VP_MDET_RX(i));
11252		if (reg & I40E_VP_MDET_RX_VALID_MASK) {
11253			wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
11254			vf->num_mdd_events++;
11255			dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
11256				 i);
11257			dev_info(&pf->pdev->dev,
11258				 "Use PF Control I/F to re-enable the VF\n");
11259			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
11260		}
11261	}
11262
11263	/* re-enable mdd interrupt cause */
11264	clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
11265	reg = rd32(hw, I40E_PFINT_ICR0_ENA);
11266	reg |=  I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
11267	wr32(hw, I40E_PFINT_ICR0_ENA, reg);
11268	i40e_flush(hw);
11269}
11270
11271/**
11272 * i40e_service_task - Run the driver's async subtasks
11273 * @work: pointer to work_struct containing our data
11274 **/
11275static void i40e_service_task(struct work_struct *work)
11276{
11277	struct i40e_pf *pf = container_of(work,
11278					  struct i40e_pf,
11279					  service_task);
11280	unsigned long start_time = jiffies;
11281
11282	/* don't bother with service tasks if a reset is in progress */
11283	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
11284	    test_bit(__I40E_SUSPENDED, pf->state))
11285		return;
11286
11287	if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state))
11288		return;
11289
11290	if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) {
11291		i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]);
11292		i40e_sync_filters_subtask(pf);
11293		i40e_reset_subtask(pf);
11294		i40e_handle_mdd_event(pf);
11295		i40e_vc_process_vflr_event(pf);
11296		i40e_watchdog_subtask(pf);
11297		i40e_fdir_reinit_subtask(pf);
11298		if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) {
11299			/* Client subtask will reopen next time through. */
11300			i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi],
11301							   true);
11302		} else {
11303			i40e_client_subtask(pf);
11304			if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE,
11305					       pf->state))
11306				i40e_notify_client_of_l2_param_changes(
11307								pf->vsi[pf->lan_vsi]);
11308		}
11309		i40e_sync_filters_subtask(pf);
11310	} else {
11311		i40e_reset_subtask(pf);
11312	}
11313
11314	i40e_clean_adminq_subtask(pf);
11315
11316	/* flush memory to make sure state is correct before next watchdog */
11317	smp_mb__before_atomic();
11318	clear_bit(__I40E_SERVICE_SCHED, pf->state);
11319
11320	/* If the tasks have taken longer than one timer cycle or there
11321	 * is more work to be done, reschedule the service task now
11322	 * rather than wait for the timer to tick again.
11323	 */
11324	if (time_after(jiffies, (start_time + pf->service_timer_period)) ||
11325	    test_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state)		 ||
11326	    test_bit(__I40E_MDD_EVENT_PENDING, pf->state)		 ||
11327	    test_bit(__I40E_VFLR_EVENT_PENDING, pf->state))
11328		i40e_service_event_schedule(pf);
11329}
11330
11331/**
11332 * i40e_service_timer - timer callback
11333 * @t: timer list pointer
11334 **/
11335static void i40e_service_timer(struct timer_list *t)
11336{
11337	struct i40e_pf *pf = from_timer(pf, t, service_timer);
11338
11339	mod_timer(&pf->service_timer,
11340		  round_jiffies(jiffies + pf->service_timer_period));
11341	i40e_service_event_schedule(pf);
11342}
11343
11344/**
11345 * i40e_set_num_rings_in_vsi - Determine number of rings in the VSI
11346 * @vsi: the VSI being configured
11347 **/
11348static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
11349{
11350	struct i40e_pf *pf = vsi->back;
11351
11352	switch (vsi->type) {
11353	case I40E_VSI_MAIN:
11354		vsi->alloc_queue_pairs = pf->num_lan_qps;
11355		if (!vsi->num_tx_desc)
11356			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
11357						 I40E_REQ_DESCRIPTOR_MULTIPLE);
11358		if (!vsi->num_rx_desc)
11359			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
11360						 I40E_REQ_DESCRIPTOR_MULTIPLE);
11361		if (pf->flags & I40E_FLAG_MSIX_ENABLED)
11362			vsi->num_q_vectors = pf->num_lan_msix;
11363		else
11364			vsi->num_q_vectors = 1;
11365
11366		break;
11367
11368	case I40E_VSI_FDIR:
11369		vsi->alloc_queue_pairs = 1;
11370		vsi->num_tx_desc = ALIGN(I40E_FDIR_RING_COUNT,
11371					 I40E_REQ_DESCRIPTOR_MULTIPLE);
11372		vsi->num_rx_desc = ALIGN(I40E_FDIR_RING_COUNT,
11373					 I40E_REQ_DESCRIPTOR_MULTIPLE);
11374		vsi->num_q_vectors = pf->num_fdsb_msix;
11375		break;
11376
11377	case I40E_VSI_VMDQ2:
11378		vsi->alloc_queue_pairs = pf->num_vmdq_qps;
11379		if (!vsi->num_tx_desc)
11380			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
11381						 I40E_REQ_DESCRIPTOR_MULTIPLE);
11382		if (!vsi->num_rx_desc)
11383			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
11384						 I40E_REQ_DESCRIPTOR_MULTIPLE);
11385		vsi->num_q_vectors = pf->num_vmdq_msix;
11386		break;
11387
11388	case I40E_VSI_SRIOV:
11389		vsi->alloc_queue_pairs = pf->num_vf_qps;
11390		if (!vsi->num_tx_desc)
11391			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
11392						 I40E_REQ_DESCRIPTOR_MULTIPLE);
11393		if (!vsi->num_rx_desc)
11394			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
11395						 I40E_REQ_DESCRIPTOR_MULTIPLE);
11396		break;
11397
11398	default:
11399		WARN_ON(1);
11400		return -ENODATA;
11401	}
11402
11403	if (is_kdump_kernel()) {
11404		vsi->num_tx_desc = I40E_MIN_NUM_DESCRIPTORS;
11405		vsi->num_rx_desc = I40E_MIN_NUM_DESCRIPTORS;
11406	}
11407
11408	return 0;
11409}
11410
11411/**
11412 * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
11413 * @vsi: VSI pointer
11414 * @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
11415 *
11416 * On error: returns error code (negative)
11417 * On success: returns 0
11418 **/
11419static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors)
11420{
11421	struct i40e_ring **next_rings;
11422	int size;
11423	int ret = 0;
11424
11425	/* allocate memory for both Tx, XDP Tx and Rx ring pointers */
11426	size = sizeof(struct i40e_ring *) * vsi->alloc_queue_pairs *
11427	       (i40e_enabled_xdp_vsi(vsi) ? 3 : 2);
11428	vsi->tx_rings = kzalloc(size, GFP_KERNEL);
11429	if (!vsi->tx_rings)
11430		return -ENOMEM;
11431	next_rings = vsi->tx_rings + vsi->alloc_queue_pairs;
11432	if (i40e_enabled_xdp_vsi(vsi)) {
11433		vsi->xdp_rings = next_rings;
11434		next_rings += vsi->alloc_queue_pairs;
11435	}
11436	vsi->rx_rings = next_rings;
11437
11438	if (alloc_qvectors) {
11439		/* allocate memory for q_vector pointers */
11440		size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors;
11441		vsi->q_vectors = kzalloc(size, GFP_KERNEL);
11442		if (!vsi->q_vectors) {
11443			ret = -ENOMEM;
11444			goto err_vectors;
11445		}
11446	}
11447	return ret;
11448
11449err_vectors:
11450	kfree(vsi->tx_rings);
11451	return ret;
11452}
11453
11454/**
11455 * i40e_vsi_mem_alloc - Allocates the next available struct vsi in the PF
11456 * @pf: board private structure
11457 * @type: type of VSI
11458 *
11459 * On error: returns error code (negative)
11460 * On success: returns vsi index in PF (positive)
11461 **/
11462static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
11463{
11464	int ret = -ENODEV;
11465	struct i40e_vsi *vsi;
11466	int vsi_idx;
11467	int i;
11468
11469	/* Need to protect the allocation of the VSIs at the PF level */
11470	mutex_lock(&pf->switch_mutex);
11471
11472	/* VSI list may be fragmented if VSI creation/destruction has
11473	 * been happening.  We can afford to do a quick scan to look
11474	 * for any free VSIs in the list.
11475	 *
11476	 * find next empty vsi slot, looping back around if necessary
11477	 */
11478	i = pf->next_vsi;
11479	while (i < pf->num_alloc_vsi && pf->vsi[i])
11480		i++;
11481	if (i >= pf->num_alloc_vsi) {
11482		i = 0;
11483		while (i < pf->next_vsi && pf->vsi[i])
11484			i++;
11485	}
11486
11487	if (i < pf->num_alloc_vsi && !pf->vsi[i]) {
11488		vsi_idx = i;             /* Found one! */
11489	} else {
11490		ret = -ENODEV;
11491		goto unlock_pf;  /* out of VSI slots! */
11492	}
11493	pf->next_vsi = ++i;
11494
11495	vsi = kzalloc(sizeof(*vsi), GFP_KERNEL);
11496	if (!vsi) {
11497		ret = -ENOMEM;
11498		goto unlock_pf;
11499	}
11500	vsi->type = type;
11501	vsi->back = pf;
11502	set_bit(__I40E_VSI_DOWN, vsi->state);
11503	vsi->flags = 0;
11504	vsi->idx = vsi_idx;
11505	vsi->int_rate_limit = 0;
11506	vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ?
11507				pf->rss_table_size : 64;
11508	vsi->netdev_registered = false;
11509	vsi->work_limit = I40E_DEFAULT_IRQ_WORK;
11510	hash_init(vsi->mac_filter_hash);
11511	vsi->irqs_ready = false;
11512
11513	if (type == I40E_VSI_MAIN) {
11514		vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL);
11515		if (!vsi->af_xdp_zc_qps)
11516			goto err_rings;
11517	}
11518
11519	ret = i40e_set_num_rings_in_vsi(vsi);
11520	if (ret)
11521		goto err_rings;
11522
11523	ret = i40e_vsi_alloc_arrays(vsi, true);
11524	if (ret)
11525		goto err_rings;
11526
11527	/* Setup default MSIX irq handler for VSI */
11528	i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings);
11529
11530	/* Initialize VSI lock */
11531	spin_lock_init(&vsi->mac_filter_hash_lock);
11532	pf->vsi[vsi_idx] = vsi;
11533	ret = vsi_idx;
11534	goto unlock_pf;
11535
11536err_rings:
11537	bitmap_free(vsi->af_xdp_zc_qps);
11538	pf->next_vsi = i - 1;
11539	kfree(vsi);
11540unlock_pf:
11541	mutex_unlock(&pf->switch_mutex);
11542	return ret;
11543}
11544
11545/**
11546 * i40e_vsi_free_arrays - Free queue and vector pointer arrays for the VSI
11547 * @vsi: VSI pointer
11548 * @free_qvectors: a bool to specify if q_vectors need to be freed.
11549 *
11550 * On error: returns error code (negative)
11551 * On success: returns 0
11552 **/
11553static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors)
11554{
11555	/* free the ring and vector containers */
11556	if (free_qvectors) {
11557		kfree(vsi->q_vectors);
11558		vsi->q_vectors = NULL;
11559	}
11560	kfree(vsi->tx_rings);
11561	vsi->tx_rings = NULL;
11562	vsi->rx_rings = NULL;
11563	vsi->xdp_rings = NULL;
11564}
11565
11566/**
11567 * i40e_clear_rss_config_user - clear the user configured RSS hash keys
11568 * and lookup table
11569 * @vsi: Pointer to VSI structure
11570 */
11571static void i40e_clear_rss_config_user(struct i40e_vsi *vsi)
11572{
11573	if (!vsi)
11574		return;
11575
11576	kfree(vsi->rss_hkey_user);
11577	vsi->rss_hkey_user = NULL;
11578
11579	kfree(vsi->rss_lut_user);
11580	vsi->rss_lut_user = NULL;
11581}
11582
11583/**
11584 * i40e_vsi_clear - Deallocate the VSI provided
11585 * @vsi: the VSI being un-configured
11586 **/
11587static int i40e_vsi_clear(struct i40e_vsi *vsi)
11588{
11589	struct i40e_pf *pf;
11590
11591	if (!vsi)
11592		return 0;
11593
11594	if (!vsi->back)
11595		goto free_vsi;
11596	pf = vsi->back;
11597
11598	mutex_lock(&pf->switch_mutex);
11599	if (!pf->vsi[vsi->idx]) {
11600		dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n",
11601			vsi->idx, vsi->idx, vsi->type);
11602		goto unlock_vsi;
11603	}
11604
11605	if (pf->vsi[vsi->idx] != vsi) {
11606		dev_err(&pf->pdev->dev,
11607			"pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n",
11608			pf->vsi[vsi->idx]->idx,
11609			pf->vsi[vsi->idx]->type,
11610			vsi->idx, vsi->type);
11611		goto unlock_vsi;
11612	}
11613
11614	/* updates the PF for this cleared vsi */
11615	i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
11616	i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
11617
11618	bitmap_free(vsi->af_xdp_zc_qps);
11619	i40e_vsi_free_arrays(vsi, true);
11620	i40e_clear_rss_config_user(vsi);
11621
11622	pf->vsi[vsi->idx] = NULL;
11623	if (vsi->idx < pf->next_vsi)
11624		pf->next_vsi = vsi->idx;
11625
11626unlock_vsi:
11627	mutex_unlock(&pf->switch_mutex);
11628free_vsi:
11629	kfree(vsi);
11630
11631	return 0;
11632}
11633
11634/**
11635 * i40e_vsi_clear_rings - Deallocates the Rx and Tx rings for the provided VSI
11636 * @vsi: the VSI being cleaned
11637 **/
11638static void i40e_vsi_clear_rings(struct i40e_vsi *vsi)
11639{
11640	int i;
11641
11642	if (vsi->tx_rings && vsi->tx_rings[0]) {
11643		for (i = 0; i < vsi->alloc_queue_pairs; i++) {
11644			kfree_rcu(vsi->tx_rings[i], rcu);
11645			WRITE_ONCE(vsi->tx_rings[i], NULL);
11646			WRITE_ONCE(vsi->rx_rings[i], NULL);
11647			if (vsi->xdp_rings)
11648				WRITE_ONCE(vsi->xdp_rings[i], NULL);
11649		}
11650	}
11651}
11652
11653/**
11654 * i40e_alloc_rings - Allocates the Rx and Tx rings for the provided VSI
11655 * @vsi: the VSI being configured
11656 **/
11657static int i40e_alloc_rings(struct i40e_vsi *vsi)
11658{
11659	int i, qpv = i40e_enabled_xdp_vsi(vsi) ? 3 : 2;
11660	struct i40e_pf *pf = vsi->back;
11661	struct i40e_ring *ring;
11662
11663	/* Set basic values in the rings to be used later during open() */
11664	for (i = 0; i < vsi->alloc_queue_pairs; i++) {
11665		/* allocate space for both Tx and Rx in one shot */
11666		ring = kcalloc(qpv, sizeof(struct i40e_ring), GFP_KERNEL);
11667		if (!ring)
11668			goto err_out;
11669
11670		ring->queue_index = i;
11671		ring->reg_idx = vsi->base_queue + i;
11672		ring->ring_active = false;
11673		ring->vsi = vsi;
11674		ring->netdev = vsi->netdev;
11675		ring->dev = &pf->pdev->dev;
11676		ring->count = vsi->num_tx_desc;
11677		ring->size = 0;
11678		ring->dcb_tc = 0;
11679		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
11680			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
11681		ring->itr_setting = pf->tx_itr_default;
11682		WRITE_ONCE(vsi->tx_rings[i], ring++);
11683
11684		if (!i40e_enabled_xdp_vsi(vsi))
11685			goto setup_rx;
11686
11687		ring->queue_index = vsi->alloc_queue_pairs + i;
11688		ring->reg_idx = vsi->base_queue + ring->queue_index;
11689		ring->ring_active = false;
11690		ring->vsi = vsi;
11691		ring->netdev = NULL;
11692		ring->dev = &pf->pdev->dev;
11693		ring->count = vsi->num_tx_desc;
11694		ring->size = 0;
11695		ring->dcb_tc = 0;
11696		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
11697			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
11698		set_ring_xdp(ring);
11699		ring->itr_setting = pf->tx_itr_default;
11700		WRITE_ONCE(vsi->xdp_rings[i], ring++);
11701
11702setup_rx:
11703		ring->queue_index = i;
11704		ring->reg_idx = vsi->base_queue + i;
11705		ring->ring_active = false;
11706		ring->vsi = vsi;
11707		ring->netdev = vsi->netdev;
11708		ring->dev = &pf->pdev->dev;
11709		ring->count = vsi->num_rx_desc;
11710		ring->size = 0;
11711		ring->dcb_tc = 0;
11712		ring->itr_setting = pf->rx_itr_default;
11713		WRITE_ONCE(vsi->rx_rings[i], ring);
11714	}
11715
11716	return 0;
11717
11718err_out:
11719	i40e_vsi_clear_rings(vsi);
11720	return -ENOMEM;
11721}
11722
11723/**
11724 * i40e_reserve_msix_vectors - Reserve MSI-X vectors in the kernel
11725 * @pf: board private structure
11726 * @vectors: the number of MSI-X vectors to request
11727 *
11728 * Returns the number of vectors reserved, or error
11729 **/
11730static int i40e_reserve_msix_vectors(struct i40e_pf *pf, int vectors)
11731{
11732	vectors = pci_enable_msix_range(pf->pdev, pf->msix_entries,
11733					I40E_MIN_MSIX, vectors);
11734	if (vectors < 0) {
11735		dev_info(&pf->pdev->dev,
11736			 "MSI-X vector reservation failed: %d\n", vectors);
11737		vectors = 0;
11738	}
11739
11740	return vectors;
11741}
11742
11743/**
11744 * i40e_init_msix - Setup the MSIX capability
11745 * @pf: board private structure
11746 *
11747 * Work with the OS to set up the MSIX vectors needed.
11748 *
11749 * Returns the number of vectors reserved or negative on failure
11750 **/
11751static int i40e_init_msix(struct i40e_pf *pf)
11752{
11753	struct i40e_hw *hw = &pf->hw;
11754	int cpus, extra_vectors;
11755	int vectors_left;
11756	int v_budget, i;
11757	int v_actual;
11758	int iwarp_requested = 0;
11759
11760	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
11761		return -ENODEV;
11762
11763	/* The number of vectors we'll request will be comprised of:
11764	 *   - Add 1 for "other" cause for Admin Queue events, etc.
11765	 *   - The number of LAN queue pairs
11766	 *	- Queues being used for RSS.
11767	 *		We don't need as many as max_rss_size vectors.
11768	 *		use rss_size instead in the calculation since that
11769	 *		is governed by number of cpus in the system.
11770	 *	- assumes symmetric Tx/Rx pairing
11771	 *   - The number of VMDq pairs
11772	 *   - The CPU count within the NUMA node if iWARP is enabled
11773	 * Once we count this up, try the request.
11774	 *
11775	 * If we can't get what we want, we'll simplify to nearly nothing
11776	 * and try again.  If that still fails, we punt.
11777	 */
11778	vectors_left = hw->func_caps.num_msix_vectors;
11779	v_budget = 0;
11780
11781	/* reserve one vector for miscellaneous handler */
11782	if (vectors_left) {
11783		v_budget++;
11784		vectors_left--;
11785	}
11786
11787	/* reserve some vectors for the main PF traffic queues. Initially we
11788	 * only reserve at most 50% of the available vectors, in the case that
11789	 * the number of online CPUs is large. This ensures that we can enable
11790	 * extra features as well. Once we've enabled the other features, we
11791	 * will use any remaining vectors to reach as close as we can to the
11792	 * number of online CPUs.
11793	 */
11794	cpus = num_online_cpus();
11795	pf->num_lan_msix = min_t(int, cpus, vectors_left / 2);
11796	vectors_left -= pf->num_lan_msix;
11797
11798	/* reserve one vector for sideband flow director */
11799	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
11800		if (vectors_left) {
11801			pf->num_fdsb_msix = 1;
11802			v_budget++;
11803			vectors_left--;
11804		} else {
11805			pf->num_fdsb_msix = 0;
11806		}
11807	}
11808
11809	/* can we reserve enough for iWARP? */
11810	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
11811		iwarp_requested = pf->num_iwarp_msix;
11812
11813		if (!vectors_left)
11814			pf->num_iwarp_msix = 0;
11815		else if (vectors_left < pf->num_iwarp_msix)
11816			pf->num_iwarp_msix = 1;
11817		v_budget += pf->num_iwarp_msix;
11818		vectors_left -= pf->num_iwarp_msix;
11819	}
11820
11821	/* any vectors left over go for VMDq support */
11822	if (pf->flags & I40E_FLAG_VMDQ_ENABLED) {
11823		if (!vectors_left) {
11824			pf->num_vmdq_msix = 0;
11825			pf->num_vmdq_qps = 0;
11826		} else {
11827			int vmdq_vecs_wanted =
11828				pf->num_vmdq_vsis * pf->num_vmdq_qps;
11829			int vmdq_vecs =
11830				min_t(int, vectors_left, vmdq_vecs_wanted);
11831
11832			/* if we're short on vectors for what's desired, we limit
11833			 * the queues per vmdq.  If this is still more than are
11834			 * available, the user will need to change the number of
11835			 * queues/vectors used by the PF later with the ethtool
11836			 * channels command
11837			 */
11838			if (vectors_left < vmdq_vecs_wanted) {
11839				pf->num_vmdq_qps = 1;
11840				vmdq_vecs_wanted = pf->num_vmdq_vsis;
11841				vmdq_vecs = min_t(int,
11842						  vectors_left,
11843						  vmdq_vecs_wanted);
11844			}
11845			pf->num_vmdq_msix = pf->num_vmdq_qps;
11846
11847			v_budget += vmdq_vecs;
11848			vectors_left -= vmdq_vecs;
11849		}
11850	}
11851
11852	/* On systems with a large number of SMP cores, we previously limited
11853	 * the number of vectors for num_lan_msix to be at most 50% of the
11854	 * available vectors, to allow for other features. Now, we add back
11855	 * the remaining vectors. However, we ensure that the total
11856	 * num_lan_msix will not exceed num_online_cpus(). To do this, we
11857	 * calculate the number of vectors we can add without going over the
11858	 * cap of CPUs. For systems with a small number of CPUs this will be
11859	 * zero.
11860	 */
11861	extra_vectors = min_t(int, cpus - pf->num_lan_msix, vectors_left);
11862	pf->num_lan_msix += extra_vectors;
11863	vectors_left -= extra_vectors;
11864
11865	WARN(vectors_left < 0,
11866	     "Calculation of remaining vectors underflowed. This is an accounting bug when determining total MSI-X vectors.\n");
11867
11868	v_budget += pf->num_lan_msix;
11869	pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
11870				   GFP_KERNEL);
11871	if (!pf->msix_entries)
11872		return -ENOMEM;
11873
11874	for (i = 0; i < v_budget; i++)
11875		pf->msix_entries[i].entry = i;
11876	v_actual = i40e_reserve_msix_vectors(pf, v_budget);
11877
11878	if (v_actual < I40E_MIN_MSIX) {
11879		pf->flags &= ~I40E_FLAG_MSIX_ENABLED;
11880		kfree(pf->msix_entries);
11881		pf->msix_entries = NULL;
11882		pci_disable_msix(pf->pdev);
11883		return -ENODEV;
11884
11885	} else if (v_actual == I40E_MIN_MSIX) {
11886		/* Adjust for minimal MSIX use */
11887		pf->num_vmdq_vsis = 0;
11888		pf->num_vmdq_qps = 0;
11889		pf->num_lan_qps = 1;
11890		pf->num_lan_msix = 1;
11891
11892	} else if (v_actual != v_budget) {
11893		/* If we have limited resources, we will start with no vectors
11894		 * for the special features and then allocate vectors to some
11895		 * of these features based on the policy and at the end disable
11896		 * the features that did not get any vectors.
11897		 */
11898		int vec;
11899
11900		dev_info(&pf->pdev->dev,
11901			 "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n",
11902			 v_actual, v_budget);
11903		/* reserve the misc vector */
11904		vec = v_actual - 1;
11905
11906		/* Scale vector usage down */
11907		pf->num_vmdq_msix = 1;    /* force VMDqs to only one vector */
11908		pf->num_vmdq_vsis = 1;
11909		pf->num_vmdq_qps = 1;
11910
11911		/* partition out the remaining vectors */
11912		switch (vec) {
11913		case 2:
11914			pf->num_lan_msix = 1;
11915			break;
11916		case 3:
11917			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
11918				pf->num_lan_msix = 1;
11919				pf->num_iwarp_msix = 1;
11920			} else {
11921				pf->num_lan_msix = 2;
11922			}
11923			break;
11924		default:
11925			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
11926				pf->num_iwarp_msix = min_t(int, (vec / 3),
11927						 iwarp_requested);
11928				pf->num_vmdq_vsis = min_t(int, (vec / 3),
11929						  I40E_DEFAULT_NUM_VMDQ_VSI);
11930			} else {
11931				pf->num_vmdq_vsis = min_t(int, (vec / 2),
11932						  I40E_DEFAULT_NUM_VMDQ_VSI);
11933			}
11934			if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
11935				pf->num_fdsb_msix = 1;
11936				vec--;
11937			}
11938			pf->num_lan_msix = min_t(int,
11939			       (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)),
11940							      pf->num_lan_msix);
11941			pf->num_lan_qps = pf->num_lan_msix;
11942			break;
11943		}
11944	}
11945
11946	if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
11947	    (pf->num_fdsb_msix == 0)) {
11948		dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n");
11949		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
11950		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
11951	}
11952	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
11953	    (pf->num_vmdq_msix == 0)) {
11954		dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
11955		pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
11956	}
11957
11958	if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
11959	    (pf->num_iwarp_msix == 0)) {
11960		dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
11961		pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
11962	}
11963	i40e_debug(&pf->hw, I40E_DEBUG_INIT,
11964		   "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n",
11965		   pf->num_lan_msix,
11966		   pf->num_vmdq_msix * pf->num_vmdq_vsis,
11967		   pf->num_fdsb_msix,
11968		   pf->num_iwarp_msix);
11969
11970	return v_actual;
11971}
11972
11973/**
11974 * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
11975 * @vsi: the VSI being configured
11976 * @v_idx: index of the vector in the vsi struct
11977 *
11978 * We allocate one q_vector.  If allocation fails we return -ENOMEM.
11979 **/
11980static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
11981{
11982	struct i40e_q_vector *q_vector;
11983
11984	/* allocate q_vector */
11985	q_vector = kzalloc(sizeof(struct i40e_q_vector), GFP_KERNEL);
11986	if (!q_vector)
11987		return -ENOMEM;
11988
11989	q_vector->vsi = vsi;
11990	q_vector->v_idx = v_idx;
11991	cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
11992
11993	if (vsi->netdev)
11994		netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll);
11995
11996	/* tie q_vector and vsi together */
11997	vsi->q_vectors[v_idx] = q_vector;
11998
11999	return 0;
12000}
12001
12002/**
12003 * i40e_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
12004 * @vsi: the VSI being configured
12005 *
12006 * We allocate one q_vector per queue interrupt.  If allocation fails we
12007 * return -ENOMEM.
12008 **/
12009static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
12010{
12011	struct i40e_pf *pf = vsi->back;
12012	int err, v_idx, num_q_vectors;
12013
12014	/* if not MSIX, give the one vector only to the LAN VSI */
12015	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
12016		num_q_vectors = vsi->num_q_vectors;
12017	else if (vsi == pf->vsi[pf->lan_vsi])
12018		num_q_vectors = 1;
12019	else
12020		return -EINVAL;
12021
12022	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
12023		err = i40e_vsi_alloc_q_vector(vsi, v_idx);
12024		if (err)
12025			goto err_out;
12026	}
12027
12028	return 0;
12029
12030err_out:
12031	while (v_idx--)
12032		i40e_free_q_vector(vsi, v_idx);
12033
12034	return err;
12035}
12036
12037/**
12038 * i40e_init_interrupt_scheme - Determine proper interrupt scheme
12039 * @pf: board private structure to initialize
12040 **/
12041static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
12042{
12043	int vectors = 0;
12044	ssize_t size;
12045
12046	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
12047		vectors = i40e_init_msix(pf);
12048		if (vectors < 0) {
12049			pf->flags &= ~(I40E_FLAG_MSIX_ENABLED	|
12050				       I40E_FLAG_IWARP_ENABLED	|
12051				       I40E_FLAG_RSS_ENABLED	|
12052				       I40E_FLAG_DCB_CAPABLE	|
12053				       I40E_FLAG_DCB_ENABLED	|
12054				       I40E_FLAG_SRIOV_ENABLED	|
12055				       I40E_FLAG_FD_SB_ENABLED	|
12056				       I40E_FLAG_FD_ATR_ENABLED	|
12057				       I40E_FLAG_VMDQ_ENABLED);
12058			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
12059
12060			/* rework the queue expectations without MSIX */
12061			i40e_determine_queue_usage(pf);
12062		}
12063	}
12064
12065	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED) &&
12066	    (pf->flags & I40E_FLAG_MSI_ENABLED)) {
12067		dev_info(&pf->pdev->dev, "MSI-X not available, trying MSI\n");
12068		vectors = pci_enable_msi(pf->pdev);
12069		if (vectors < 0) {
12070			dev_info(&pf->pdev->dev, "MSI init failed - %d\n",
12071				 vectors);
12072			pf->flags &= ~I40E_FLAG_MSI_ENABLED;
12073		}
12074		vectors = 1;  /* one MSI or Legacy vector */
12075	}
12076
12077	if (!(pf->flags & (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED)))
12078		dev_info(&pf->pdev->dev, "MSI-X and MSI not available, falling back to Legacy IRQ\n");
12079
12080	/* set up vector assignment tracking */
12081	size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * vectors);
12082	pf->irq_pile = kzalloc(size, GFP_KERNEL);
12083	if (!pf->irq_pile)
12084		return -ENOMEM;
12085
12086	pf->irq_pile->num_entries = vectors;
12087
12088	/* track first vector for misc interrupts, ignore return */
12089	(void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1);
12090
12091	return 0;
12092}
12093
12094/**
12095 * i40e_restore_interrupt_scheme - Restore the interrupt scheme
12096 * @pf: private board data structure
12097 *
12098 * Restore the interrupt scheme that was cleared when we suspended the
12099 * device. This should be called during resume to re-allocate the q_vectors
12100 * and reacquire IRQs.
12101 */
12102static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
12103{
12104	int err, i;
12105
12106	/* We cleared the MSI and MSI-X flags when disabling the old interrupt
12107	 * scheme. We need to re-enabled them here in order to attempt to
12108	 * re-acquire the MSI or MSI-X vectors
12109	 */
12110	pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
12111
12112	err = i40e_init_interrupt_scheme(pf);
12113	if (err)
12114		return err;
12115
12116	/* Now that we've re-acquired IRQs, we need to remap the vectors and
12117	 * rings together again.
12118	 */
12119	for (i = 0; i < pf->num_alloc_vsi; i++) {
12120		if (pf->vsi[i]) {
12121			err = i40e_vsi_alloc_q_vectors(pf->vsi[i]);
12122			if (err)
12123				goto err_unwind;
12124			i40e_vsi_map_rings_to_vectors(pf->vsi[i]);
12125		}
12126	}
12127
12128	err = i40e_setup_misc_vector(pf);
12129	if (err)
12130		goto err_unwind;
12131
12132	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
12133		i40e_client_update_msix_info(pf);
12134
12135	return 0;
12136
12137err_unwind:
12138	while (i--) {
12139		if (pf->vsi[i])
12140			i40e_vsi_free_q_vectors(pf->vsi[i]);
12141	}
12142
12143	return err;
12144}
12145
12146/**
12147 * i40e_setup_misc_vector_for_recovery_mode - Setup the misc vector to handle
12148 * non queue events in recovery mode
12149 * @pf: board private structure
12150 *
12151 * This sets up the handler for MSIX 0 or MSI/legacy, which is used to manage
12152 * the non-queue interrupts, e.g. AdminQ and errors in recovery mode.
12153 * This is handled differently than in recovery mode since no Tx/Rx resources
12154 * are being allocated.
12155 **/
12156static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf)
12157{
12158	int err;
12159
12160	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
12161		err = i40e_setup_misc_vector(pf);
12162
12163		if (err) {
12164			dev_info(&pf->pdev->dev,
12165				 "MSI-X misc vector request failed, error %d\n",
12166				 err);
12167			return err;
12168		}
12169	} else {
12170		u32 flags = pf->flags & I40E_FLAG_MSI_ENABLED ? 0 : IRQF_SHARED;
12171
12172		err = request_irq(pf->pdev->irq, i40e_intr, flags,
12173				  pf->int_name, pf);
12174
12175		if (err) {
12176			dev_info(&pf->pdev->dev,
12177				 "MSI/legacy misc vector request failed, error %d\n",
12178				 err);
12179			return err;
12180		}
12181		i40e_enable_misc_int_causes(pf);
12182		i40e_irq_dynamic_enable_icr0(pf);
12183	}
12184
12185	return 0;
12186}
12187
12188/**
12189 * i40e_setup_misc_vector - Setup the misc vector to handle non queue events
12190 * @pf: board private structure
12191 *
12192 * This sets up the handler for MSIX 0, which is used to manage the
12193 * non-queue interrupts, e.g. AdminQ and errors.  This is not used
12194 * when in MSI or Legacy interrupt mode.
12195 **/
12196static int i40e_setup_misc_vector(struct i40e_pf *pf)
12197{
12198	struct i40e_hw *hw = &pf->hw;
12199	int err = 0;
12200
12201	/* Only request the IRQ once, the first time through. */
12202	if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) {
12203		err = request_irq(pf->msix_entries[0].vector,
12204				  i40e_intr, 0, pf->int_name, pf);
12205		if (err) {
12206			clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
12207			dev_info(&pf->pdev->dev,
12208				 "request_irq for %s failed: %d\n",
12209				 pf->int_name, err);
12210			return -EFAULT;
12211		}
12212	}
12213
12214	i40e_enable_misc_int_causes(pf);
12215
12216	/* associate no queues to the misc vector */
12217	wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST);
12218	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K >> 1);
12219
12220	i40e_flush(hw);
12221
12222	i40e_irq_dynamic_enable_icr0(pf);
12223
12224	return err;
12225}
12226
12227/**
12228 * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands
12229 * @vsi: Pointer to vsi structure
12230 * @seed: Buffter to store the hash keys
12231 * @lut: Buffer to store the lookup table entries
12232 * @lut_size: Size of buffer to store the lookup table entries
12233 *
12234 * Return 0 on success, negative on failure
12235 */
12236static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
12237			   u8 *lut, u16 lut_size)
12238{
12239	struct i40e_pf *pf = vsi->back;
12240	struct i40e_hw *hw = &pf->hw;
12241	int ret = 0;
12242
12243	if (seed) {
12244		ret = i40e_aq_get_rss_key(hw, vsi->id,
12245			(struct i40e_aqc_get_set_rss_key_data *)seed);
12246		if (ret) {
12247			dev_info(&pf->pdev->dev,
12248				 "Cannot get RSS key, err %pe aq_err %s\n",
12249				 ERR_PTR(ret),
12250				 i40e_aq_str(&pf->hw,
12251					     pf->hw.aq.asq_last_status));
12252			return ret;
12253		}
12254	}
12255
12256	if (lut) {
12257		bool pf_lut = vsi->type == I40E_VSI_MAIN;
12258
12259		ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
12260		if (ret) {
12261			dev_info(&pf->pdev->dev,
12262				 "Cannot get RSS lut, err %pe aq_err %s\n",
12263				 ERR_PTR(ret),
12264				 i40e_aq_str(&pf->hw,
12265					     pf->hw.aq.asq_last_status));
12266			return ret;
12267		}
12268	}
12269
12270	return ret;
12271}
12272
12273/**
12274 * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
12275 * @vsi: Pointer to vsi structure
12276 * @seed: RSS hash seed
12277 * @lut: Lookup table
12278 * @lut_size: Lookup table size
12279 *
12280 * Returns 0 on success, negative on failure
12281 **/
12282static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed,
12283			       const u8 *lut, u16 lut_size)
12284{
12285	struct i40e_pf *pf = vsi->back;
12286	struct i40e_hw *hw = &pf->hw;
12287	u16 vf_id = vsi->vf_id;
12288	u8 i;
12289
12290	/* Fill out hash function seed */
12291	if (seed) {
12292		u32 *seed_dw = (u32 *)seed;
12293
12294		if (vsi->type == I40E_VSI_MAIN) {
12295			for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
12296				wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
12297		} else if (vsi->type == I40E_VSI_SRIOV) {
12298			for (i = 0; i <= I40E_VFQF_HKEY1_MAX_INDEX; i++)
12299				wr32(hw, I40E_VFQF_HKEY1(i, vf_id), seed_dw[i]);
12300		} else {
12301			dev_err(&pf->pdev->dev, "Cannot set RSS seed - invalid VSI type\n");
12302		}
12303	}
12304
12305	if (lut) {
12306		u32 *lut_dw = (u32 *)lut;
12307
12308		if (vsi->type == I40E_VSI_MAIN) {
12309			if (lut_size != I40E_HLUT_ARRAY_SIZE)
12310				return -EINVAL;
12311			for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
12312				wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]);
12313		} else if (vsi->type == I40E_VSI_SRIOV) {
12314			if (lut_size != I40E_VF_HLUT_ARRAY_SIZE)
12315				return -EINVAL;
12316			for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
12317				wr32(hw, I40E_VFQF_HLUT1(i, vf_id), lut_dw[i]);
12318		} else {
12319			dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
12320		}
12321	}
12322	i40e_flush(hw);
12323
12324	return 0;
12325}
12326
12327/**
12328 * i40e_get_rss_reg - Get the RSS keys and lut by reading registers
12329 * @vsi: Pointer to VSI structure
12330 * @seed: Buffer to store the keys
12331 * @lut: Buffer to store the lookup table entries
12332 * @lut_size: Size of buffer to store the lookup table entries
12333 *
12334 * Returns 0 on success, negative on failure
12335 */
12336static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed,
12337			    u8 *lut, u16 lut_size)
12338{
12339	struct i40e_pf *pf = vsi->back;
12340	struct i40e_hw *hw = &pf->hw;
12341	u16 i;
12342
12343	if (seed) {
12344		u32 *seed_dw = (u32 *)seed;
12345
12346		for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
12347			seed_dw[i] = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i));
12348	}
12349	if (lut) {
12350		u32 *lut_dw = (u32 *)lut;
12351
12352		if (lut_size != I40E_HLUT_ARRAY_SIZE)
12353			return -EINVAL;
12354		for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
12355			lut_dw[i] = rd32(hw, I40E_PFQF_HLUT(i));
12356	}
12357
12358	return 0;
12359}
12360
12361/**
12362 * i40e_config_rss - Configure RSS keys and lut
12363 * @vsi: Pointer to VSI structure
12364 * @seed: RSS hash seed
12365 * @lut: Lookup table
12366 * @lut_size: Lookup table size
12367 *
12368 * Returns 0 on success, negative on failure
12369 */
12370int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
12371{
12372	struct i40e_pf *pf = vsi->back;
12373
12374	if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
12375		return i40e_config_rss_aq(vsi, seed, lut, lut_size);
12376	else
12377		return i40e_config_rss_reg(vsi, seed, lut, lut_size);
12378}
12379
12380/**
12381 * i40e_get_rss - Get RSS keys and lut
12382 * @vsi: Pointer to VSI structure
12383 * @seed: Buffer to store the keys
12384 * @lut: Buffer to store the lookup table entries
12385 * @lut_size: Size of buffer to store the lookup table entries
12386 *
12387 * Returns 0 on success, negative on failure
12388 */
12389int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
12390{
12391	struct i40e_pf *pf = vsi->back;
12392
12393	if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
12394		return i40e_get_rss_aq(vsi, seed, lut, lut_size);
12395	else
12396		return i40e_get_rss_reg(vsi, seed, lut, lut_size);
12397}
12398
12399/**
12400 * i40e_fill_rss_lut - Fill the RSS lookup table with default values
12401 * @pf: Pointer to board private structure
12402 * @lut: Lookup table
12403 * @rss_table_size: Lookup table size
12404 * @rss_size: Range of queue number for hashing
12405 */
12406void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
12407		       u16 rss_table_size, u16 rss_size)
12408{
12409	u16 i;
12410
12411	for (i = 0; i < rss_table_size; i++)
12412		lut[i] = i % rss_size;
12413}
12414
12415/**
12416 * i40e_pf_config_rss - Prepare for RSS if used
12417 * @pf: board private structure
12418 **/
12419static int i40e_pf_config_rss(struct i40e_pf *pf)
12420{
12421	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
12422	u8 seed[I40E_HKEY_ARRAY_SIZE];
12423	u8 *lut;
12424	struct i40e_hw *hw = &pf->hw;
12425	u32 reg_val;
12426	u64 hena;
12427	int ret;
12428
12429	/* By default we enable TCP/UDP with IPv4/IPv6 ptypes */
12430	hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) |
12431		((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32);
12432	hena |= i40e_pf_get_default_rss_hena(pf);
12433
12434	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena);
12435	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
12436
12437	/* Determine the RSS table size based on the hardware capabilities */
12438	reg_val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0);
12439	reg_val = (pf->rss_table_size == 512) ?
12440			(reg_val | I40E_PFQF_CTL_0_HASHLUTSIZE_512) :
12441			(reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512);
12442	i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val);
12443
12444	/* Determine the RSS size of the VSI */
12445	if (!vsi->rss_size) {
12446		u16 qcount;
12447		/* If the firmware does something weird during VSI init, we
12448		 * could end up with zero TCs. Check for that to avoid
12449		 * divide-by-zero. It probably won't pass traffic, but it also
12450		 * won't panic.
12451		 */
12452		qcount = vsi->num_queue_pairs /
12453			 (vsi->tc_config.numtc ? vsi->tc_config.numtc : 1);
12454		vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
12455	}
12456	if (!vsi->rss_size)
12457		return -EINVAL;
12458
12459	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
12460	if (!lut)
12461		return -ENOMEM;
12462
12463	/* Use user configured lut if there is one, otherwise use default */
12464	if (vsi->rss_lut_user)
12465		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
12466	else
12467		i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
12468
12469	/* Use user configured hash key if there is one, otherwise
12470	 * use default.
12471	 */
12472	if (vsi->rss_hkey_user)
12473		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
12474	else
12475		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
12476	ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
12477	kfree(lut);
12478
12479	return ret;
12480}
12481
12482/**
12483 * i40e_reconfig_rss_queues - change number of queues for rss and rebuild
12484 * @pf: board private structure
12485 * @queue_count: the requested queue count for rss.
12486 *
12487 * returns 0 if rss is not enabled, if enabled returns the final rss queue
12488 * count which may be different from the requested queue count.
12489 * Note: expects to be called while under rtnl_lock()
12490 **/
12491int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
12492{
12493	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
12494	int new_rss_size;
12495
12496	if (!(pf->flags & I40E_FLAG_RSS_ENABLED))
12497		return 0;
12498
12499	queue_count = min_t(int, queue_count, num_online_cpus());
12500	new_rss_size = min_t(int, queue_count, pf->rss_size_max);
12501
12502	if (queue_count != vsi->num_queue_pairs) {
12503		u16 qcount;
12504
12505		vsi->req_queue_pairs = queue_count;
12506		i40e_prep_for_reset(pf);
12507		if (test_bit(__I40E_IN_REMOVE, pf->state))
12508			return pf->alloc_rss_size;
12509
12510		pf->alloc_rss_size = new_rss_size;
12511
12512		i40e_reset_and_rebuild(pf, true, true);
12513
12514		/* Discard the user configured hash keys and lut, if less
12515		 * queues are enabled.
12516		 */
12517		if (queue_count < vsi->rss_size) {
12518			i40e_clear_rss_config_user(vsi);
12519			dev_dbg(&pf->pdev->dev,
12520				"discard user configured hash keys and lut\n");
12521		}
12522
12523		/* Reset vsi->rss_size, as number of enabled queues changed */
12524		qcount = vsi->num_queue_pairs / vsi->tc_config.numtc;
12525		vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
12526
12527		i40e_pf_config_rss(pf);
12528	}
12529	dev_info(&pf->pdev->dev, "User requested queue count/HW max RSS count:  %d/%d\n",
12530		 vsi->req_queue_pairs, pf->rss_size_max);
12531	return pf->alloc_rss_size;
12532}
12533
12534/**
12535 * i40e_get_partition_bw_setting - Retrieve BW settings for this PF partition
12536 * @pf: board private structure
12537 **/
12538int i40e_get_partition_bw_setting(struct i40e_pf *pf)
12539{
12540	bool min_valid, max_valid;
12541	u32 max_bw, min_bw;
12542	int status;
12543
12544	status = i40e_read_bw_from_alt_ram(&pf->hw, &max_bw, &min_bw,
12545					   &min_valid, &max_valid);
12546
12547	if (!status) {
12548		if (min_valid)
12549			pf->min_bw = min_bw;
12550		if (max_valid)
12551			pf->max_bw = max_bw;
12552	}
12553
12554	return status;
12555}
12556
12557/**
12558 * i40e_set_partition_bw_setting - Set BW settings for this PF partition
12559 * @pf: board private structure
12560 **/
12561int i40e_set_partition_bw_setting(struct i40e_pf *pf)
12562{
12563	struct i40e_aqc_configure_partition_bw_data bw_data;
12564	int status;
12565
12566	memset(&bw_data, 0, sizeof(bw_data));
12567
12568	/* Set the valid bit for this PF */
12569	bw_data.pf_valid_bits = cpu_to_le16(BIT(pf->hw.pf_id));
12570	bw_data.max_bw[pf->hw.pf_id] = pf->max_bw & I40E_ALT_BW_VALUE_MASK;
12571	bw_data.min_bw[pf->hw.pf_id] = pf->min_bw & I40E_ALT_BW_VALUE_MASK;
12572
12573	/* Set the new bandwidths */
12574	status = i40e_aq_configure_partition_bw(&pf->hw, &bw_data, NULL);
12575
12576	return status;
12577}
12578
12579/**
12580 * i40e_commit_partition_bw_setting - Commit BW settings for this PF partition
12581 * @pf: board private structure
12582 **/
12583int i40e_commit_partition_bw_setting(struct i40e_pf *pf)
12584{
12585	/* Commit temporary BW setting to permanent NVM image */
12586	enum i40e_admin_queue_err last_aq_status;
12587	u16 nvm_word;
12588	int ret;
12589
12590	if (pf->hw.partition_id != 1) {
12591		dev_info(&pf->pdev->dev,
12592			 "Commit BW only works on partition 1! This is partition %d",
12593			 pf->hw.partition_id);
12594		ret = -EOPNOTSUPP;
12595		goto bw_commit_out;
12596	}
12597
12598	/* Acquire NVM for read access */
12599	ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_READ);
12600	last_aq_status = pf->hw.aq.asq_last_status;
12601	if (ret) {
12602		dev_info(&pf->pdev->dev,
12603			 "Cannot acquire NVM for read access, err %pe aq_err %s\n",
12604			 ERR_PTR(ret),
12605			 i40e_aq_str(&pf->hw, last_aq_status));
12606		goto bw_commit_out;
12607	}
12608
12609	/* Read word 0x10 of NVM - SW compatibility word 1 */
12610	ret = i40e_aq_read_nvm(&pf->hw,
12611			       I40E_SR_NVM_CONTROL_WORD,
12612			       0x10, sizeof(nvm_word), &nvm_word,
12613			       false, NULL);
12614	/* Save off last admin queue command status before releasing
12615	 * the NVM
12616	 */
12617	last_aq_status = pf->hw.aq.asq_last_status;
12618	i40e_release_nvm(&pf->hw);
12619	if (ret) {
12620		dev_info(&pf->pdev->dev, "NVM read error, err %pe aq_err %s\n",
12621			 ERR_PTR(ret),
12622			 i40e_aq_str(&pf->hw, last_aq_status));
12623		goto bw_commit_out;
12624	}
12625
12626	/* Wait a bit for NVM release to complete */
12627	msleep(50);
12628
12629	/* Acquire NVM for write access */
12630	ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_WRITE);
12631	last_aq_status = pf->hw.aq.asq_last_status;
12632	if (ret) {
12633		dev_info(&pf->pdev->dev,
12634			 "Cannot acquire NVM for write access, err %pe aq_err %s\n",
12635			 ERR_PTR(ret),
12636			 i40e_aq_str(&pf->hw, last_aq_status));
12637		goto bw_commit_out;
12638	}
12639	/* Write it back out unchanged to initiate update NVM,
12640	 * which will force a write of the shadow (alt) RAM to
12641	 * the NVM - thus storing the bandwidth values permanently.
12642	 */
12643	ret = i40e_aq_update_nvm(&pf->hw,
12644				 I40E_SR_NVM_CONTROL_WORD,
12645				 0x10, sizeof(nvm_word),
12646				 &nvm_word, true, 0, NULL);
12647	/* Save off last admin queue command status before releasing
12648	 * the NVM
12649	 */
12650	last_aq_status = pf->hw.aq.asq_last_status;
12651	i40e_release_nvm(&pf->hw);
12652	if (ret)
12653		dev_info(&pf->pdev->dev,
12654			 "BW settings NOT SAVED, err %pe aq_err %s\n",
12655			 ERR_PTR(ret),
12656			 i40e_aq_str(&pf->hw, last_aq_status));
12657bw_commit_out:
12658
12659	return ret;
12660}
12661
12662/**
12663 * i40e_is_total_port_shutdown_enabled - read NVM and return value
12664 * if total port shutdown feature is enabled for this PF
12665 * @pf: board private structure
12666 **/
12667static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf)
12668{
12669#define I40E_TOTAL_PORT_SHUTDOWN_ENABLED	BIT(4)
12670#define I40E_FEATURES_ENABLE_PTR		0x2A
12671#define I40E_CURRENT_SETTING_PTR		0x2B
12672#define I40E_LINK_BEHAVIOR_WORD_OFFSET		0x2D
12673#define I40E_LINK_BEHAVIOR_WORD_LENGTH		0x1
12674#define I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED	BIT(0)
12675#define I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH	4
12676	u16 sr_emp_sr_settings_ptr = 0;
12677	u16 features_enable = 0;
12678	u16 link_behavior = 0;
12679	int read_status = 0;
12680	bool ret = false;
12681
12682	read_status = i40e_read_nvm_word(&pf->hw,
12683					 I40E_SR_EMP_SR_SETTINGS_PTR,
12684					 &sr_emp_sr_settings_ptr);
12685	if (read_status)
12686		goto err_nvm;
12687	read_status = i40e_read_nvm_word(&pf->hw,
12688					 sr_emp_sr_settings_ptr +
12689					 I40E_FEATURES_ENABLE_PTR,
12690					 &features_enable);
12691	if (read_status)
12692		goto err_nvm;
12693	if (I40E_TOTAL_PORT_SHUTDOWN_ENABLED & features_enable) {
12694		read_status = i40e_read_nvm_module_data(&pf->hw,
12695							I40E_SR_EMP_SR_SETTINGS_PTR,
12696							I40E_CURRENT_SETTING_PTR,
12697							I40E_LINK_BEHAVIOR_WORD_OFFSET,
12698							I40E_LINK_BEHAVIOR_WORD_LENGTH,
12699							&link_behavior);
12700		if (read_status)
12701			goto err_nvm;
12702		link_behavior >>= (pf->hw.port * I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH);
12703		ret = I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED & link_behavior;
12704	}
12705	return ret;
12706
12707err_nvm:
12708	dev_warn(&pf->pdev->dev,
12709		 "total-port-shutdown feature is off due to read nvm error: %pe\n",
12710		 ERR_PTR(read_status));
12711	return ret;
12712}
12713
12714/**
12715 * i40e_sw_init - Initialize general software structures (struct i40e_pf)
12716 * @pf: board private structure to initialize
12717 *
12718 * i40e_sw_init initializes the Adapter private data structure.
12719 * Fields are initialized based on PCI device information and
12720 * OS network device settings (MTU size).
12721 **/
12722static int i40e_sw_init(struct i40e_pf *pf)
12723{
12724	int err = 0;
12725	int size;
12726	u16 pow;
12727
12728	/* Set default capability flags */
12729	pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
12730		    I40E_FLAG_MSI_ENABLED     |
12731		    I40E_FLAG_MSIX_ENABLED;
12732
12733	/* Set default ITR */
12734	pf->rx_itr_default = I40E_ITR_RX_DEF;
12735	pf->tx_itr_default = I40E_ITR_TX_DEF;
12736
12737	/* Depending on PF configurations, it is possible that the RSS
12738	 * maximum might end up larger than the available queues
12739	 */
12740	pf->rss_size_max = BIT(pf->hw.func_caps.rss_table_entry_width);
12741	pf->alloc_rss_size = 1;
12742	pf->rss_table_size = pf->hw.func_caps.rss_table_size;
12743	pf->rss_size_max = min_t(int, pf->rss_size_max,
12744				 pf->hw.func_caps.num_tx_qp);
12745
12746	/* find the next higher power-of-2 of num cpus */
12747	pow = roundup_pow_of_two(num_online_cpus());
12748	pf->rss_size_max = min_t(int, pf->rss_size_max, pow);
12749
12750	if (pf->hw.func_caps.rss) {
12751		pf->flags |= I40E_FLAG_RSS_ENABLED;
12752		pf->alloc_rss_size = min_t(int, pf->rss_size_max,
12753					   num_online_cpus());
12754	}
12755
12756	/* MFP mode enabled */
12757	if (pf->hw.func_caps.npar_enable || pf->hw.func_caps.flex10_enable) {
12758		pf->flags |= I40E_FLAG_MFP_ENABLED;
12759		dev_info(&pf->pdev->dev, "MFP mode Enabled\n");
12760		if (i40e_get_partition_bw_setting(pf)) {
12761			dev_warn(&pf->pdev->dev,
12762				 "Could not get partition bw settings\n");
12763		} else {
12764			dev_info(&pf->pdev->dev,
12765				 "Partition BW Min = %8.8x, Max = %8.8x\n",
12766				 pf->min_bw, pf->max_bw);
12767
12768			/* nudge the Tx scheduler */
12769			i40e_set_partition_bw_setting(pf);
12770		}
12771	}
12772
12773	if ((pf->hw.func_caps.fd_filters_guaranteed > 0) ||
12774	    (pf->hw.func_caps.fd_filters_best_effort > 0)) {
12775		pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
12776		pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE;
12777		if (pf->flags & I40E_FLAG_MFP_ENABLED &&
12778		    pf->hw.num_partitions > 1)
12779			dev_info(&pf->pdev->dev,
12780				 "Flow Director Sideband mode Disabled in MFP mode\n");
12781		else
12782			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
12783		pf->fdir_pf_filter_count =
12784				 pf->hw.func_caps.fd_filters_guaranteed;
12785		pf->hw.fdir_shared_filter_count =
12786				 pf->hw.func_caps.fd_filters_best_effort;
12787	}
12788
12789	if (pf->hw.mac.type == I40E_MAC_X722) {
12790		pf->hw_features |= (I40E_HW_RSS_AQ_CAPABLE |
12791				    I40E_HW_128_QP_RSS_CAPABLE |
12792				    I40E_HW_ATR_EVICT_CAPABLE |
12793				    I40E_HW_WB_ON_ITR_CAPABLE |
12794				    I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE |
12795				    I40E_HW_NO_PCI_LINK_CHECK |
12796				    I40E_HW_USE_SET_LLDP_MIB |
12797				    I40E_HW_GENEVE_OFFLOAD_CAPABLE |
12798				    I40E_HW_PTP_L4_CAPABLE |
12799				    I40E_HW_WOL_MC_MAGIC_PKT_WAKE |
12800				    I40E_HW_OUTER_UDP_CSUM_CAPABLE);
12801
12802#define I40E_FDEVICT_PCTYPE_DEFAULT 0xc03
12803		if (rd32(&pf->hw, I40E_GLQF_FDEVICTENA(1)) !=
12804		    I40E_FDEVICT_PCTYPE_DEFAULT) {
12805			dev_warn(&pf->pdev->dev,
12806				 "FD EVICT PCTYPES are not right, disable FD HW EVICT\n");
12807			pf->hw_features &= ~I40E_HW_ATR_EVICT_CAPABLE;
12808		}
12809	} else if ((pf->hw.aq.api_maj_ver > 1) ||
12810		   ((pf->hw.aq.api_maj_ver == 1) &&
12811		    (pf->hw.aq.api_min_ver > 4))) {
12812		/* Supported in FW API version higher than 1.4 */
12813		pf->hw_features |= I40E_HW_GENEVE_OFFLOAD_CAPABLE;
12814	}
12815
12816	/* Enable HW ATR eviction if possible */
12817	if (pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)
12818		pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
12819
12820	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
12821	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
12822	    (pf->hw.aq.fw_maj_ver < 4))) {
12823		pf->hw_features |= I40E_HW_RESTART_AUTONEG;
12824		/* No DCB support  for FW < v4.33 */
12825		pf->hw_features |= I40E_HW_NO_DCB_SUPPORT;
12826	}
12827
12828	/* Disable FW LLDP if FW < v4.3 */
12829	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
12830	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) ||
12831	    (pf->hw.aq.fw_maj_ver < 4)))
12832		pf->hw_features |= I40E_HW_STOP_FW_LLDP;
12833
12834	/* Use the FW Set LLDP MIB API if FW > v4.40 */
12835	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
12836	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) ||
12837	    (pf->hw.aq.fw_maj_ver >= 5)))
12838		pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
12839
12840	/* Enable PTP L4 if FW > v6.0 */
12841	if (pf->hw.mac.type == I40E_MAC_XL710 &&
12842	    pf->hw.aq.fw_maj_ver >= 6)
12843		pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
12844
12845	if (pf->hw.func_caps.vmdq && num_online_cpus() != 1) {
12846		pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
12847		pf->flags |= I40E_FLAG_VMDQ_ENABLED;
12848		pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
12849	}
12850
12851	if (pf->hw.func_caps.iwarp && num_online_cpus() != 1) {
12852		pf->flags |= I40E_FLAG_IWARP_ENABLED;
12853		/* IWARP needs one extra vector for CQP just like MISC.*/
12854		pf->num_iwarp_msix = (int)num_online_cpus() + 1;
12855	}
12856	/* Stopping FW LLDP engine is supported on XL710 and X722
12857	 * starting from FW versions determined in i40e_init_adminq.
12858	 * Stopping the FW LLDP engine is not supported on XL710
12859	 * if NPAR is functioning so unset this hw flag in this case.
12860	 */
12861	if (pf->hw.mac.type == I40E_MAC_XL710 &&
12862	    pf->hw.func_caps.npar_enable &&
12863	    (pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
12864		pf->hw.flags &= ~I40E_HW_FLAG_FW_LLDP_STOPPABLE;
12865
12866#ifdef CONFIG_PCI_IOV
12867	if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
12868		pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
12869		pf->flags |= I40E_FLAG_SRIOV_ENABLED;
12870		pf->num_req_vfs = min_t(int,
12871					pf->hw.func_caps.num_vfs,
12872					I40E_MAX_VF_COUNT);
12873	}
12874#endif /* CONFIG_PCI_IOV */
12875	pf->eeprom_version = 0xDEAD;
12876	pf->lan_veb = I40E_NO_VEB;
12877	pf->lan_vsi = I40E_NO_VSI;
12878
12879	/* By default FW has this off for performance reasons */
12880	pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
12881
12882	/* set up queue assignment tracking */
12883	size = sizeof(struct i40e_lump_tracking)
12884		+ (sizeof(u16) * pf->hw.func_caps.num_tx_qp);
12885	pf->qp_pile = kzalloc(size, GFP_KERNEL);
12886	if (!pf->qp_pile) {
12887		err = -ENOMEM;
12888		goto sw_init_done;
12889	}
12890	pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp;
12891
12892	pf->tx_timeout_recovery_level = 1;
12893
12894	if (pf->hw.mac.type != I40E_MAC_X722 &&
12895	    i40e_is_total_port_shutdown_enabled(pf)) {
12896		/* Link down on close must be on when total port shutdown
12897		 * is enabled for a given port
12898		 */
12899		pf->flags |= (I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED |
12900			      I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED);
12901		dev_info(&pf->pdev->dev,
12902			 "total-port-shutdown was enabled, link-down-on-close is forced on\n");
12903	}
12904	mutex_init(&pf->switch_mutex);
12905
12906sw_init_done:
12907	return err;
12908}
12909
12910/**
12911 * i40e_set_ntuple - set the ntuple feature flag and take action
12912 * @pf: board private structure to initialize
12913 * @features: the feature set that the stack is suggesting
12914 *
12915 * returns a bool to indicate if reset needs to happen
12916 **/
12917bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
12918{
12919	bool need_reset = false;
12920
12921	/* Check if Flow Director n-tuple support was enabled or disabled.  If
12922	 * the state changed, we need to reset.
12923	 */
12924	if (features & NETIF_F_NTUPLE) {
12925		/* Enable filters and mark for reset */
12926		if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
12927			need_reset = true;
12928		/* enable FD_SB only if there is MSI-X vector and no cloud
12929		 * filters exist
12930		 */
12931		if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) {
12932			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
12933			pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
12934		}
12935	} else {
12936		/* turn off filters, mark for reset and clear SW filter list */
12937		if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
12938			need_reset = true;
12939			i40e_fdir_filter_exit(pf);
12940		}
12941		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
12942		clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state);
12943		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
12944
12945		/* reset fd counters */
12946		pf->fd_add_err = 0;
12947		pf->fd_atr_cnt = 0;
12948		/* if ATR was auto disabled it can be re-enabled. */
12949		if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
12950			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
12951			    (I40E_DEBUG_FD & pf->hw.debug_mask))
12952				dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
12953	}
12954	return need_reset;
12955}
12956
12957/**
12958 * i40e_clear_rss_lut - clear the rx hash lookup table
12959 * @vsi: the VSI being configured
12960 **/
12961static void i40e_clear_rss_lut(struct i40e_vsi *vsi)
12962{
12963	struct i40e_pf *pf = vsi->back;
12964	struct i40e_hw *hw = &pf->hw;
12965	u16 vf_id = vsi->vf_id;
12966	u8 i;
12967
12968	if (vsi->type == I40E_VSI_MAIN) {
12969		for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
12970			wr32(hw, I40E_PFQF_HLUT(i), 0);
12971	} else if (vsi->type == I40E_VSI_SRIOV) {
12972		for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
12973			i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, vf_id), 0);
12974	} else {
12975		dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
12976	}
12977}
12978
12979/**
12980 * i40e_set_loopback - turn on/off loopback mode on underlying PF
12981 * @vsi: ptr to VSI
12982 * @ena: flag to indicate the on/off setting
12983 */
12984static int i40e_set_loopback(struct i40e_vsi *vsi, bool ena)
12985{
12986	bool if_running = netif_running(vsi->netdev) &&
12987			  !test_and_set_bit(__I40E_VSI_DOWN, vsi->state);
12988	int ret;
12989
12990	if (if_running)
12991		i40e_down(vsi);
12992
12993	ret = i40e_aq_set_mac_loopback(&vsi->back->hw, ena, NULL);
12994	if (ret)
12995		netdev_err(vsi->netdev, "Failed to toggle loopback state\n");
12996	if (if_running)
12997		i40e_up(vsi);
12998
12999	return ret;
13000}
13001
13002/**
13003 * i40e_set_features - set the netdev feature flags
13004 * @netdev: ptr to the netdev being adjusted
13005 * @features: the feature set that the stack is suggesting
13006 * Note: expects to be called while under rtnl_lock()
13007 **/
13008static int i40e_set_features(struct net_device *netdev,
13009			     netdev_features_t features)
13010{
13011	struct i40e_netdev_priv *np = netdev_priv(netdev);
13012	struct i40e_vsi *vsi = np->vsi;
13013	struct i40e_pf *pf = vsi->back;
13014	bool need_reset;
13015
13016	if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH))
13017		i40e_pf_config_rss(pf);
13018	else if (!(features & NETIF_F_RXHASH) &&
13019		 netdev->features & NETIF_F_RXHASH)
13020		i40e_clear_rss_lut(vsi);
13021
13022	if (features & NETIF_F_HW_VLAN_CTAG_RX)
13023		i40e_vlan_stripping_enable(vsi);
13024	else
13025		i40e_vlan_stripping_disable(vsi);
13026
13027	if (!(features & NETIF_F_HW_TC) &&
13028	    (netdev->features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
13029		dev_err(&pf->pdev->dev,
13030			"Offloaded tc filters active, can't turn hw_tc_offload off");
13031		return -EINVAL;
13032	}
13033
13034	if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt)
13035		i40e_del_all_macvlans(vsi);
13036
13037	need_reset = i40e_set_ntuple(pf, features);
13038
13039	if (need_reset)
13040		i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
13041
13042	if ((features ^ netdev->features) & NETIF_F_LOOPBACK)
13043		return i40e_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK));
13044
13045	return 0;
13046}
13047
13048static int i40e_udp_tunnel_set_port(struct net_device *netdev,
13049				    unsigned int table, unsigned int idx,
13050				    struct udp_tunnel_info *ti)
13051{
13052	struct i40e_netdev_priv *np = netdev_priv(netdev);
13053	struct i40e_hw *hw = &np->vsi->back->hw;
13054	u8 type, filter_index;
13055	int ret;
13056
13057	type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN :
13058						   I40E_AQC_TUNNEL_TYPE_NGE;
13059
13060	ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index,
13061				     NULL);
13062	if (ret) {
13063		netdev_info(netdev, "add UDP port failed, err %pe aq_err %s\n",
13064			    ERR_PTR(ret),
13065			    i40e_aq_str(hw, hw->aq.asq_last_status));
13066		return -EIO;
13067	}
13068
13069	udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index);
13070	return 0;
13071}
13072
13073static int i40e_udp_tunnel_unset_port(struct net_device *netdev,
13074				      unsigned int table, unsigned int idx,
13075				      struct udp_tunnel_info *ti)
13076{
13077	struct i40e_netdev_priv *np = netdev_priv(netdev);
13078	struct i40e_hw *hw = &np->vsi->back->hw;
13079	int ret;
13080
13081	ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL);
13082	if (ret) {
13083		netdev_info(netdev, "delete UDP port failed, err %pe aq_err %s\n",
13084			    ERR_PTR(ret),
13085			    i40e_aq_str(hw, hw->aq.asq_last_status));
13086		return -EIO;
13087	}
13088
13089	return 0;
13090}
13091
13092static int i40e_get_phys_port_id(struct net_device *netdev,
13093				 struct netdev_phys_item_id *ppid)
13094{
13095	struct i40e_netdev_priv *np = netdev_priv(netdev);
13096	struct i40e_pf *pf = np->vsi->back;
13097	struct i40e_hw *hw = &pf->hw;
13098
13099	if (!(pf->hw_features & I40E_HW_PORT_ID_VALID))
13100		return -EOPNOTSUPP;
13101
13102	ppid->id_len = min_t(int, sizeof(hw->mac.port_addr), sizeof(ppid->id));
13103	memcpy(ppid->id, hw->mac.port_addr, ppid->id_len);
13104
13105	return 0;
13106}
13107
13108/**
13109 * i40e_ndo_fdb_add - add an entry to the hardware database
13110 * @ndm: the input from the stack
13111 * @tb: pointer to array of nladdr (unused)
13112 * @dev: the net device pointer
13113 * @addr: the MAC address entry being added
13114 * @vid: VLAN ID
13115 * @flags: instructions from stack about fdb operation
13116 * @extack: netlink extended ack, unused currently
13117 */
13118static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
13119			    struct net_device *dev,
13120			    const unsigned char *addr, u16 vid,
13121			    u16 flags,
13122			    struct netlink_ext_ack *extack)
13123{
13124	struct i40e_netdev_priv *np = netdev_priv(dev);
13125	struct i40e_pf *pf = np->vsi->back;
13126	int err = 0;
13127
13128	if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED))
13129		return -EOPNOTSUPP;
13130
13131	if (vid) {
13132		pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
13133		return -EINVAL;
13134	}
13135
13136	/* Hardware does not support aging addresses so if a
13137	 * ndm_state is given only allow permanent addresses
13138	 */
13139	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
13140		netdev_info(dev, "FDB only supports static addresses\n");
13141		return -EINVAL;
13142	}
13143
13144	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
13145		err = dev_uc_add_excl(dev, addr);
13146	else if (is_multicast_ether_addr(addr))
13147		err = dev_mc_add_excl(dev, addr);
13148	else
13149		err = -EINVAL;
13150
13151	/* Only return duplicate errors if NLM_F_EXCL is set */
13152	if (err == -EEXIST && !(flags & NLM_F_EXCL))
13153		err = 0;
13154
13155	return err;
13156}
13157
13158/**
13159 * i40e_ndo_bridge_setlink - Set the hardware bridge mode
13160 * @dev: the netdev being configured
13161 * @nlh: RTNL message
13162 * @flags: bridge flags
13163 * @extack: netlink extended ack
13164 *
13165 * Inserts a new hardware bridge if not already created and
13166 * enables the bridging mode requested (VEB or VEPA). If the
13167 * hardware bridge has already been inserted and the request
13168 * is to change the mode then that requires a PF reset to
13169 * allow rebuild of the components with required hardware
13170 * bridge mode enabled.
13171 *
13172 * Note: expects to be called while under rtnl_lock()
13173 **/
13174static int i40e_ndo_bridge_setlink(struct net_device *dev,
13175				   struct nlmsghdr *nlh,
13176				   u16 flags,
13177				   struct netlink_ext_ack *extack)
13178{
13179	struct i40e_netdev_priv *np = netdev_priv(dev);
13180	struct i40e_vsi *vsi = np->vsi;
13181	struct i40e_pf *pf = vsi->back;
13182	struct i40e_veb *veb = NULL;
13183	struct nlattr *attr, *br_spec;
13184	int i, rem;
13185
13186	/* Only for PF VSI for now */
13187	if (vsi->seid != pf->vsi[pf->lan_vsi]->seid)
13188		return -EOPNOTSUPP;
13189
13190	/* Find the HW bridge for PF VSI */
13191	for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
13192		if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
13193			veb = pf->veb[i];
13194	}
13195
13196	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
13197	if (!br_spec)
13198		return -EINVAL;
13199
13200	nla_for_each_nested(attr, br_spec, rem) {
13201		__u16 mode;
13202
13203		if (nla_type(attr) != IFLA_BRIDGE_MODE)
13204			continue;
13205
13206		mode = nla_get_u16(attr);
13207		if ((mode != BRIDGE_MODE_VEPA) &&
13208		    (mode != BRIDGE_MODE_VEB))
13209			return -EINVAL;
13210
13211		/* Insert a new HW bridge */
13212		if (!veb) {
13213			veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid,
13214					     vsi->tc_config.enabled_tc);
13215			if (veb) {
13216				veb->bridge_mode = mode;
13217				i40e_config_bridge_mode(veb);
13218			} else {
13219				/* No Bridge HW offload available */
13220				return -ENOENT;
13221			}
13222			break;
13223		} else if (mode != veb->bridge_mode) {
13224			/* Existing HW bridge but different mode needs reset */
13225			veb->bridge_mode = mode;
13226			/* TODO: If no VFs or VMDq VSIs, disallow VEB mode */
13227			if (mode == BRIDGE_MODE_VEB)
13228				pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
13229			else
13230				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
13231			i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
13232			break;
13233		}
13234	}
13235
13236	return 0;
13237}
13238
13239/**
13240 * i40e_ndo_bridge_getlink - Get the hardware bridge mode
13241 * @skb: skb buff
13242 * @pid: process id
13243 * @seq: RTNL message seq #
13244 * @dev: the netdev being configured
13245 * @filter_mask: unused
13246 * @nlflags: netlink flags passed in
13247 *
13248 * Return the mode in which the hardware bridge is operating in
13249 * i.e VEB or VEPA.
13250 **/
13251static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
13252				   struct net_device *dev,
13253				   u32 __always_unused filter_mask,
13254				   int nlflags)
13255{
13256	struct i40e_netdev_priv *np = netdev_priv(dev);
13257	struct i40e_vsi *vsi = np->vsi;
13258	struct i40e_pf *pf = vsi->back;
13259	struct i40e_veb *veb = NULL;
13260	int i;
13261
13262	/* Only for PF VSI for now */
13263	if (vsi->seid != pf->vsi[pf->lan_vsi]->seid)
13264		return -EOPNOTSUPP;
13265
13266	/* Find the HW bridge for the PF VSI */
13267	for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
13268		if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
13269			veb = pf->veb[i];
13270	}
13271
13272	if (!veb)
13273		return 0;
13274
13275	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode,
13276				       0, 0, nlflags, filter_mask, NULL);
13277}
13278
13279/**
13280 * i40e_features_check - Validate encapsulated packet conforms to limits
13281 * @skb: skb buff
13282 * @dev: This physical port's netdev
13283 * @features: Offload features that the stack believes apply
13284 **/
13285static netdev_features_t i40e_features_check(struct sk_buff *skb,
13286					     struct net_device *dev,
13287					     netdev_features_t features)
13288{
13289	size_t len;
13290
13291	/* No point in doing any of this if neither checksum nor GSO are
13292	 * being requested for this frame.  We can rule out both by just
13293	 * checking for CHECKSUM_PARTIAL
13294	 */
13295	if (skb->ip_summed != CHECKSUM_PARTIAL)
13296		return features;
13297
13298	/* We cannot support GSO if the MSS is going to be less than
13299	 * 64 bytes.  If it is then we need to drop support for GSO.
13300	 */
13301	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
13302		features &= ~NETIF_F_GSO_MASK;
13303
13304	/* MACLEN can support at most 63 words */
13305	len = skb_network_header(skb) - skb->data;
13306	if (len & ~(63 * 2))
13307		goto out_err;
13308
13309	/* IPLEN and EIPLEN can support at most 127 dwords */
13310	len = skb_transport_header(skb) - skb_network_header(skb);
13311	if (len & ~(127 * 4))
13312		goto out_err;
13313
13314	if (skb->encapsulation) {
13315		/* L4TUNLEN can support 127 words */
13316		len = skb_inner_network_header(skb) - skb_transport_header(skb);
13317		if (len & ~(127 * 2))
13318			goto out_err;
13319
13320		/* IPLEN can support at most 127 dwords */
13321		len = skb_inner_transport_header(skb) -
13322		      skb_inner_network_header(skb);
13323		if (len & ~(127 * 4))
13324			goto out_err;
13325	}
13326
13327	/* No need to validate L4LEN as TCP is the only protocol with a
13328	 * flexible value and we support all possible values supported
13329	 * by TCP, which is at most 15 dwords
13330	 */
13331
13332	return features;
13333out_err:
13334	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
13335}
13336
13337/**
13338 * i40e_xdp_setup - add/remove an XDP program
13339 * @vsi: VSI to changed
13340 * @prog: XDP program
13341 * @extack: netlink extended ack
13342 **/
13343static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
13344			  struct netlink_ext_ack *extack)
13345{
13346	int frame_size = i40e_max_vsi_frame_size(vsi, prog);
13347	struct i40e_pf *pf = vsi->back;
13348	struct bpf_prog *old_prog;
13349	bool need_reset;
13350	int i;
13351
13352	/* Don't allow frames that span over multiple buffers */
13353	if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) {
13354		NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags");
13355		return -EINVAL;
13356	}
13357
13358	/* When turning XDP on->off/off->on we reset and rebuild the rings. */
13359	need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
13360
13361	if (need_reset)
13362		i40e_prep_for_reset(pf);
13363
13364	/* VSI shall be deleted in a moment, just return EINVAL */
13365	if (test_bit(__I40E_IN_REMOVE, pf->state))
13366		return -EINVAL;
13367
13368	old_prog = xchg(&vsi->xdp_prog, prog);
13369
13370	if (need_reset) {
13371		if (!prog) {
13372			xdp_features_clear_redirect_target(vsi->netdev);
13373			/* Wait until ndo_xsk_wakeup completes. */
13374			synchronize_rcu();
13375		}
13376		i40e_reset_and_rebuild(pf, true, true);
13377	}
13378
13379	if (!i40e_enabled_xdp_vsi(vsi) && prog) {
13380		if (i40e_realloc_rx_bi_zc(vsi, true))
13381			return -ENOMEM;
13382	} else if (i40e_enabled_xdp_vsi(vsi) && !prog) {
13383		if (i40e_realloc_rx_bi_zc(vsi, false))
13384			return -ENOMEM;
13385	}
13386
13387	for (i = 0; i < vsi->num_queue_pairs; i++)
13388		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
13389
13390	if (old_prog)
13391		bpf_prog_put(old_prog);
13392
13393	/* Kick start the NAPI context if there is an AF_XDP socket open
13394	 * on that queue id. This so that receiving will start.
13395	 */
13396	if (need_reset && prog) {
13397		for (i = 0; i < vsi->num_queue_pairs; i++)
13398			if (vsi->xdp_rings[i]->xsk_pool)
13399				(void)i40e_xsk_wakeup(vsi->netdev, i,
13400						      XDP_WAKEUP_RX);
13401		xdp_features_set_redirect_target(vsi->netdev, true);
13402	}
13403
13404	return 0;
13405}
13406
13407/**
13408 * i40e_enter_busy_conf - Enters busy config state
13409 * @vsi: vsi
13410 *
13411 * Returns 0 on success, <0 for failure.
13412 **/
13413static int i40e_enter_busy_conf(struct i40e_vsi *vsi)
13414{
13415	struct i40e_pf *pf = vsi->back;
13416	int timeout = 50;
13417
13418	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
13419		timeout--;
13420		if (!timeout)
13421			return -EBUSY;
13422		usleep_range(1000, 2000);
13423	}
13424
13425	return 0;
13426}
13427
13428/**
13429 * i40e_exit_busy_conf - Exits busy config state
13430 * @vsi: vsi
13431 **/
13432static void i40e_exit_busy_conf(struct i40e_vsi *vsi)
13433{
13434	struct i40e_pf *pf = vsi->back;
13435
13436	clear_bit(__I40E_CONFIG_BUSY, pf->state);
13437}
13438
13439/**
13440 * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair
13441 * @vsi: vsi
13442 * @queue_pair: queue pair
13443 **/
13444static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair)
13445{
13446	memset(&vsi->rx_rings[queue_pair]->rx_stats, 0,
13447	       sizeof(vsi->rx_rings[queue_pair]->rx_stats));
13448	memset(&vsi->tx_rings[queue_pair]->stats, 0,
13449	       sizeof(vsi->tx_rings[queue_pair]->stats));
13450	if (i40e_enabled_xdp_vsi(vsi)) {
13451		memset(&vsi->xdp_rings[queue_pair]->stats, 0,
13452		       sizeof(vsi->xdp_rings[queue_pair]->stats));
13453	}
13454}
13455
13456/**
13457 * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair
13458 * @vsi: vsi
13459 * @queue_pair: queue pair
13460 **/
13461static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair)
13462{
13463	i40e_clean_tx_ring(vsi->tx_rings[queue_pair]);
13464	if (i40e_enabled_xdp_vsi(vsi)) {
13465		/* Make sure that in-progress ndo_xdp_xmit calls are
13466		 * completed.
13467		 */
13468		synchronize_rcu();
13469		i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]);
13470	}
13471	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
13472}
13473
13474/**
13475 * i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair
13476 * @vsi: vsi
13477 * @queue_pair: queue pair
13478 * @enable: true for enable, false for disable
13479 **/
13480static void i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair,
13481					bool enable)
13482{
13483	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
13484	struct i40e_q_vector *q_vector = rxr->q_vector;
13485
13486	if (!vsi->netdev)
13487		return;
13488
13489	/* All rings in a qp belong to the same qvector. */
13490	if (q_vector->rx.ring || q_vector->tx.ring) {
13491		if (enable)
13492			napi_enable(&q_vector->napi);
13493		else
13494			napi_disable(&q_vector->napi);
13495	}
13496}
13497
13498/**
13499 * i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair
13500 * @vsi: vsi
13501 * @queue_pair: queue pair
13502 * @enable: true for enable, false for disable
13503 *
13504 * Returns 0 on success, <0 on failure.
13505 **/
13506static int i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair,
13507					bool enable)
13508{
13509	struct i40e_pf *pf = vsi->back;
13510	int pf_q, ret = 0;
13511
13512	pf_q = vsi->base_queue + queue_pair;
13513	ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q,
13514				     false /*is xdp*/, enable);
13515	if (ret) {
13516		dev_info(&pf->pdev->dev,
13517			 "VSI seid %d Tx ring %d %sable timeout\n",
13518			 vsi->seid, pf_q, (enable ? "en" : "dis"));
13519		return ret;
13520	}
13521
13522	i40e_control_rx_q(pf, pf_q, enable);
13523	ret = i40e_pf_rxq_wait(pf, pf_q, enable);
13524	if (ret) {
13525		dev_info(&pf->pdev->dev,
13526			 "VSI seid %d Rx ring %d %sable timeout\n",
13527			 vsi->seid, pf_q, (enable ? "en" : "dis"));
13528		return ret;
13529	}
13530
13531	/* Due to HW errata, on Rx disable only, the register can
13532	 * indicate done before it really is. Needs 50ms to be sure
13533	 */
13534	if (!enable)
13535		mdelay(50);
13536
13537	if (!i40e_enabled_xdp_vsi(vsi))
13538		return ret;
13539
13540	ret = i40e_control_wait_tx_q(vsi->seid, pf,
13541				     pf_q + vsi->alloc_queue_pairs,
13542				     true /*is xdp*/, enable);
13543	if (ret) {
13544		dev_info(&pf->pdev->dev,
13545			 "VSI seid %d XDP Tx ring %d %sable timeout\n",
13546			 vsi->seid, pf_q, (enable ? "en" : "dis"));
13547	}
13548
13549	return ret;
13550}
13551
13552/**
13553 * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair
13554 * @vsi: vsi
13555 * @queue_pair: queue_pair
13556 **/
13557static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair)
13558{
13559	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
13560	struct i40e_pf *pf = vsi->back;
13561	struct i40e_hw *hw = &pf->hw;
13562
13563	/* All rings in a qp belong to the same qvector. */
13564	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
13565		i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx);
13566	else
13567		i40e_irq_dynamic_enable_icr0(pf);
13568
13569	i40e_flush(hw);
13570}
13571
13572/**
13573 * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair
13574 * @vsi: vsi
13575 * @queue_pair: queue_pair
13576 **/
13577static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair)
13578{
13579	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
13580	struct i40e_pf *pf = vsi->back;
13581	struct i40e_hw *hw = &pf->hw;
13582
13583	/* For simplicity, instead of removing the qp interrupt causes
13584	 * from the interrupt linked list, we simply disable the interrupt, and
13585	 * leave the list intact.
13586	 *
13587	 * All rings in a qp belong to the same qvector.
13588	 */
13589	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
13590		u32 intpf = vsi->base_vector + rxr->q_vector->v_idx;
13591
13592		wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0);
13593		i40e_flush(hw);
13594		synchronize_irq(pf->msix_entries[intpf].vector);
13595	} else {
13596		/* Legacy and MSI mode - this stops all interrupt handling */
13597		wr32(hw, I40E_PFINT_ICR0_ENA, 0);
13598		wr32(hw, I40E_PFINT_DYN_CTL0, 0);
13599		i40e_flush(hw);
13600		synchronize_irq(pf->pdev->irq);
13601	}
13602}
13603
13604/**
13605 * i40e_queue_pair_disable - Disables a queue pair
13606 * @vsi: vsi
13607 * @queue_pair: queue pair
13608 *
13609 * Returns 0 on success, <0 on failure.
13610 **/
13611int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
13612{
13613	int err;
13614
13615	err = i40e_enter_busy_conf(vsi);
13616	if (err)
13617		return err;
13618
13619	i40e_queue_pair_disable_irq(vsi, queue_pair);
13620	i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
13621	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
13622	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
13623	i40e_queue_pair_clean_rings(vsi, queue_pair);
13624	i40e_queue_pair_reset_stats(vsi, queue_pair);
13625
13626	return err;
13627}
13628
13629/**
13630 * i40e_queue_pair_enable - Enables a queue pair
13631 * @vsi: vsi
13632 * @queue_pair: queue pair
13633 *
13634 * Returns 0 on success, <0 on failure.
13635 **/
13636int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair)
13637{
13638	int err;
13639
13640	err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]);
13641	if (err)
13642		return err;
13643
13644	if (i40e_enabled_xdp_vsi(vsi)) {
13645		err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]);
13646		if (err)
13647			return err;
13648	}
13649
13650	err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]);
13651	if (err)
13652		return err;
13653
13654	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true /* on */);
13655	i40e_queue_pair_toggle_napi(vsi, queue_pair, true /* on */);
13656	i40e_queue_pair_enable_irq(vsi, queue_pair);
13657
13658	i40e_exit_busy_conf(vsi);
13659
13660	return err;
13661}
13662
13663/**
13664 * i40e_xdp - implements ndo_bpf for i40e
13665 * @dev: netdevice
13666 * @xdp: XDP command
13667 **/
13668static int i40e_xdp(struct net_device *dev,
13669		    struct netdev_bpf *xdp)
13670{
13671	struct i40e_netdev_priv *np = netdev_priv(dev);
13672	struct i40e_vsi *vsi = np->vsi;
13673
13674	if (vsi->type != I40E_VSI_MAIN)
13675		return -EINVAL;
13676
13677	switch (xdp->command) {
13678	case XDP_SETUP_PROG:
13679		return i40e_xdp_setup(vsi, xdp->prog, xdp->extack);
13680	case XDP_SETUP_XSK_POOL:
13681		return i40e_xsk_pool_setup(vsi, xdp->xsk.pool,
13682					   xdp->xsk.queue_id);
13683	default:
13684		return -EINVAL;
13685	}
13686}
13687
13688static const struct net_device_ops i40e_netdev_ops = {
13689	.ndo_open		= i40e_open,
13690	.ndo_stop		= i40e_close,
13691	.ndo_start_xmit		= i40e_lan_xmit_frame,
13692	.ndo_get_stats64	= i40e_get_netdev_stats_struct,
13693	.ndo_set_rx_mode	= i40e_set_rx_mode,
13694	.ndo_validate_addr	= eth_validate_addr,
13695	.ndo_set_mac_address	= i40e_set_mac,
13696	.ndo_change_mtu		= i40e_change_mtu,
13697	.ndo_eth_ioctl		= i40e_ioctl,
13698	.ndo_tx_timeout		= i40e_tx_timeout,
13699	.ndo_vlan_rx_add_vid	= i40e_vlan_rx_add_vid,
13700	.ndo_vlan_rx_kill_vid	= i40e_vlan_rx_kill_vid,
13701#ifdef CONFIG_NET_POLL_CONTROLLER
13702	.ndo_poll_controller	= i40e_netpoll,
13703#endif
13704	.ndo_setup_tc		= __i40e_setup_tc,
13705	.ndo_select_queue	= i40e_lan_select_queue,
13706	.ndo_set_features	= i40e_set_features,
13707	.ndo_set_vf_mac		= i40e_ndo_set_vf_mac,
13708	.ndo_set_vf_vlan	= i40e_ndo_set_vf_port_vlan,
13709	.ndo_get_vf_stats	= i40e_get_vf_stats,
13710	.ndo_set_vf_rate	= i40e_ndo_set_vf_bw,
13711	.ndo_get_vf_config	= i40e_ndo_get_vf_config,
13712	.ndo_set_vf_link_state	= i40e_ndo_set_vf_link_state,
13713	.ndo_set_vf_spoofchk	= i40e_ndo_set_vf_spoofchk,
13714	.ndo_set_vf_trust	= i40e_ndo_set_vf_trust,
13715	.ndo_get_phys_port_id	= i40e_get_phys_port_id,
13716	.ndo_fdb_add		= i40e_ndo_fdb_add,
13717	.ndo_features_check	= i40e_features_check,
13718	.ndo_bridge_getlink	= i40e_ndo_bridge_getlink,
13719	.ndo_bridge_setlink	= i40e_ndo_bridge_setlink,
13720	.ndo_bpf		= i40e_xdp,
13721	.ndo_xdp_xmit		= i40e_xdp_xmit,
13722	.ndo_xsk_wakeup	        = i40e_xsk_wakeup,
13723	.ndo_dfwd_add_station	= i40e_fwd_add,
13724	.ndo_dfwd_del_station	= i40e_fwd_del,
13725};
13726
13727/**
13728 * i40e_config_netdev - Setup the netdev flags
13729 * @vsi: the VSI being configured
13730 *
13731 * Returns 0 on success, negative value on failure
13732 **/
13733static int i40e_config_netdev(struct i40e_vsi *vsi)
13734{
13735	struct i40e_pf *pf = vsi->back;
13736	struct i40e_hw *hw = &pf->hw;
13737	struct i40e_netdev_priv *np;
13738	struct net_device *netdev;
13739	u8 broadcast[ETH_ALEN];
13740	u8 mac_addr[ETH_ALEN];
13741	int etherdev_size;
13742	netdev_features_t hw_enc_features;
13743	netdev_features_t hw_features;
13744
13745	etherdev_size = sizeof(struct i40e_netdev_priv);
13746	netdev = alloc_etherdev_mq(etherdev_size, vsi->alloc_queue_pairs);
13747	if (!netdev)
13748		return -ENOMEM;
13749
13750	vsi->netdev = netdev;
13751	np = netdev_priv(netdev);
13752	np->vsi = vsi;
13753
13754	hw_enc_features = NETIF_F_SG			|
13755			  NETIF_F_HW_CSUM		|
13756			  NETIF_F_HIGHDMA		|
13757			  NETIF_F_SOFT_FEATURES		|
13758			  NETIF_F_TSO			|
13759			  NETIF_F_TSO_ECN		|
13760			  NETIF_F_TSO6			|
13761			  NETIF_F_GSO_GRE		|
13762			  NETIF_F_GSO_GRE_CSUM		|
13763			  NETIF_F_GSO_PARTIAL		|
13764			  NETIF_F_GSO_IPXIP4		|
13765			  NETIF_F_GSO_IPXIP6		|
13766			  NETIF_F_GSO_UDP_TUNNEL	|
13767			  NETIF_F_GSO_UDP_TUNNEL_CSUM	|
13768			  NETIF_F_GSO_UDP_L4		|
13769			  NETIF_F_SCTP_CRC		|
13770			  NETIF_F_RXHASH		|
13771			  NETIF_F_RXCSUM		|
13772			  0;
13773
13774	if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE))
13775		netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
13776
13777	netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic;
13778
13779	netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
13780
13781	netdev->hw_enc_features |= hw_enc_features;
13782
13783	/* record features VLANs can make use of */
13784	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
13785
13786#define I40E_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE |		\
13787				   NETIF_F_GSO_GRE_CSUM |	\
13788				   NETIF_F_GSO_IPXIP4 |		\
13789				   NETIF_F_GSO_IPXIP6 |		\
13790				   NETIF_F_GSO_UDP_TUNNEL |	\
13791				   NETIF_F_GSO_UDP_TUNNEL_CSUM)
13792
13793	netdev->gso_partial_features = I40E_GSO_PARTIAL_FEATURES;
13794	netdev->features |= NETIF_F_GSO_PARTIAL |
13795			    I40E_GSO_PARTIAL_FEATURES;
13796
13797	netdev->mpls_features |= NETIF_F_SG;
13798	netdev->mpls_features |= NETIF_F_HW_CSUM;
13799	netdev->mpls_features |= NETIF_F_TSO;
13800	netdev->mpls_features |= NETIF_F_TSO6;
13801	netdev->mpls_features |= I40E_GSO_PARTIAL_FEATURES;
13802
13803	/* enable macvlan offloads */
13804	netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
13805
13806	hw_features = hw_enc_features		|
13807		      NETIF_F_HW_VLAN_CTAG_TX	|
13808		      NETIF_F_HW_VLAN_CTAG_RX;
13809
13810	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
13811		hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
13812
13813	netdev->hw_features |= hw_features | NETIF_F_LOOPBACK;
13814
13815	netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
13816	netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
13817
13818	netdev->features &= ~NETIF_F_HW_TC;
13819
13820	if (vsi->type == I40E_VSI_MAIN) {
13821		SET_NETDEV_DEV(netdev, &pf->pdev->dev);
13822		ether_addr_copy(mac_addr, hw->mac.perm_addr);
13823		/* The following steps are necessary for two reasons. First,
13824		 * some older NVM configurations load a default MAC-VLAN
13825		 * filter that will accept any tagged packet, and we want to
13826		 * replace this with a normal filter. Additionally, it is
13827		 * possible our MAC address was provided by the platform using
13828		 * Open Firmware or similar.
13829		 *
13830		 * Thus, we need to remove the default filter and install one
13831		 * specific to the MAC address.
13832		 */
13833		i40e_rm_default_mac_filter(vsi, mac_addr);
13834		spin_lock_bh(&vsi->mac_filter_hash_lock);
13835		i40e_add_mac_filter(vsi, mac_addr);
13836		spin_unlock_bh(&vsi->mac_filter_hash_lock);
13837
13838		netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
13839				       NETDEV_XDP_ACT_REDIRECT |
13840				       NETDEV_XDP_ACT_XSK_ZEROCOPY |
13841				       NETDEV_XDP_ACT_RX_SG;
13842		netdev->xdp_zc_max_segs = I40E_MAX_BUFFER_TXD;
13843	} else {
13844		/* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we
13845		 * are still limited by IFNAMSIZ, but we're adding 'v%d\0' to
13846		 * the end, which is 4 bytes long, so force truncation of the
13847		 * original name by IFNAMSIZ - 4
13848		 */
13849		snprintf(netdev->name, IFNAMSIZ, "%.*sv%%d",
13850			 IFNAMSIZ - 4,
13851			 pf->vsi[pf->lan_vsi]->netdev->name);
13852		eth_random_addr(mac_addr);
13853
13854		spin_lock_bh(&vsi->mac_filter_hash_lock);
13855		i40e_add_mac_filter(vsi, mac_addr);
13856		spin_unlock_bh(&vsi->mac_filter_hash_lock);
13857	}
13858
13859	/* Add the broadcast filter so that we initially will receive
13860	 * broadcast packets. Note that when a new VLAN is first added the
13861	 * driver will convert all filters marked I40E_VLAN_ANY into VLAN
13862	 * specific filters as part of transitioning into "vlan" operation.
13863	 * When more VLANs are added, the driver will copy each existing MAC
13864	 * filter and add it for the new VLAN.
13865	 *
13866	 * Broadcast filters are handled specially by
13867	 * i40e_sync_filters_subtask, as the driver must to set the broadcast
13868	 * promiscuous bit instead of adding this directly as a MAC/VLAN
13869	 * filter. The subtask will update the correct broadcast promiscuous
13870	 * bits as VLANs become active or inactive.
13871	 */
13872	eth_broadcast_addr(broadcast);
13873	spin_lock_bh(&vsi->mac_filter_hash_lock);
13874	i40e_add_mac_filter(vsi, broadcast);
13875	spin_unlock_bh(&vsi->mac_filter_hash_lock);
13876
13877	eth_hw_addr_set(netdev, mac_addr);
13878	ether_addr_copy(netdev->perm_addr, mac_addr);
13879
13880	/* i40iw_net_event() reads 16 bytes from neigh->primary_key */
13881	netdev->neigh_priv_len = sizeof(u32) * 4;
13882
13883	netdev->priv_flags |= IFF_UNICAST_FLT;
13884	netdev->priv_flags |= IFF_SUPP_NOFCS;
13885	/* Setup netdev TC information */
13886	i40e_vsi_config_netdev_tc(vsi, vsi->tc_config.enabled_tc);
13887
13888	netdev->netdev_ops = &i40e_netdev_ops;
13889	netdev->watchdog_timeo = 5 * HZ;
13890	i40e_set_ethtool_ops(netdev);
13891
13892	/* MTU range: 68 - 9706 */
13893	netdev->min_mtu = ETH_MIN_MTU;
13894	netdev->max_mtu = I40E_MAX_RXBUFFER - I40E_PACKET_HDR_PAD;
13895
13896	return 0;
13897}
13898
13899/**
13900 * i40e_vsi_delete - Delete a VSI from the switch
13901 * @vsi: the VSI being removed
13902 *
13903 * Returns 0 on success, negative value on failure
13904 **/
13905static void i40e_vsi_delete(struct i40e_vsi *vsi)
13906{
13907	/* remove default VSI is not allowed */
13908	if (vsi == vsi->back->vsi[vsi->back->lan_vsi])
13909		return;
13910
13911	i40e_aq_delete_element(&vsi->back->hw, vsi->seid, NULL);
13912}
13913
13914/**
13915 * i40e_is_vsi_uplink_mode_veb - Check if the VSI's uplink bridge mode is VEB
13916 * @vsi: the VSI being queried
13917 *
13918 * Returns 1 if HW bridge mode is VEB and return 0 in case of VEPA mode
13919 **/
13920int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi)
13921{
13922	struct i40e_veb *veb;
13923	struct i40e_pf *pf = vsi->back;
13924
13925	/* Uplink is not a bridge so default to VEB */
13926	if (vsi->veb_idx >= I40E_MAX_VEB)
13927		return 1;
13928
13929	veb = pf->veb[vsi->veb_idx];
13930	if (!veb) {
13931		dev_info(&pf->pdev->dev,
13932			 "There is no veb associated with the bridge\n");
13933		return -ENOENT;
13934	}
13935
13936	/* Uplink is a bridge in VEPA mode */
13937	if (veb->bridge_mode & BRIDGE_MODE_VEPA) {
13938		return 0;
13939	} else {
13940		/* Uplink is a bridge in VEB mode */
13941		return 1;
13942	}
13943
13944	/* VEPA is now default bridge, so return 0 */
13945	return 0;
13946}
13947
13948/**
13949 * i40e_add_vsi - Add a VSI to the switch
13950 * @vsi: the VSI being configured
13951 *
13952 * This initializes a VSI context depending on the VSI type to be added and
13953 * passes it down to the add_vsi aq command.
13954 **/
13955static int i40e_add_vsi(struct i40e_vsi *vsi)
13956{
13957	int ret = -ENODEV;
13958	struct i40e_pf *pf = vsi->back;
13959	struct i40e_hw *hw = &pf->hw;
13960	struct i40e_vsi_context ctxt;
13961	struct i40e_mac_filter *f;
13962	struct hlist_node *h;
13963	int bkt;
13964
13965	u8 enabled_tc = 0x1; /* TC0 enabled */
13966	int f_count = 0;
13967
13968	memset(&ctxt, 0, sizeof(ctxt));
13969	switch (vsi->type) {
13970	case I40E_VSI_MAIN:
13971		/* The PF's main VSI is already setup as part of the
13972		 * device initialization, so we'll not bother with
13973		 * the add_vsi call, but we will retrieve the current
13974		 * VSI context.
13975		 */
13976		ctxt.seid = pf->main_vsi_seid;
13977		ctxt.pf_num = pf->hw.pf_id;
13978		ctxt.vf_num = 0;
13979		ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
13980		ctxt.flags = I40E_AQ_VSI_TYPE_PF;
13981		if (ret) {
13982			dev_info(&pf->pdev->dev,
13983				 "couldn't get PF vsi config, err %pe aq_err %s\n",
13984				 ERR_PTR(ret),
13985				 i40e_aq_str(&pf->hw,
13986					     pf->hw.aq.asq_last_status));
13987			return -ENOENT;
13988		}
13989		vsi->info = ctxt.info;
13990		vsi->info.valid_sections = 0;
13991
13992		vsi->seid = ctxt.seid;
13993		vsi->id = ctxt.vsi_number;
13994
13995		enabled_tc = i40e_pf_get_tc_map(pf);
13996
13997		/* Source pruning is enabled by default, so the flag is
13998		 * negative logic - if it's set, we need to fiddle with
13999		 * the VSI to disable source pruning.
14000		 */
14001		if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) {
14002			memset(&ctxt, 0, sizeof(ctxt));
14003			ctxt.seid = pf->main_vsi_seid;
14004			ctxt.pf_num = pf->hw.pf_id;
14005			ctxt.vf_num = 0;
14006			ctxt.info.valid_sections |=
14007				     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
14008			ctxt.info.switch_id =
14009				   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
14010			ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
14011			if (ret) {
14012				dev_info(&pf->pdev->dev,
14013					 "update vsi failed, err %d aq_err %s\n",
14014					 ret,
14015					 i40e_aq_str(&pf->hw,
14016						     pf->hw.aq.asq_last_status));
14017				ret = -ENOENT;
14018				goto err;
14019			}
14020		}
14021
14022		/* MFP mode setup queue map and update VSI */
14023		if ((pf->flags & I40E_FLAG_MFP_ENABLED) &&
14024		    !(pf->hw.func_caps.iscsi)) { /* NIC type PF */
14025			memset(&ctxt, 0, sizeof(ctxt));
14026			ctxt.seid = pf->main_vsi_seid;
14027			ctxt.pf_num = pf->hw.pf_id;
14028			ctxt.vf_num = 0;
14029			i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
14030			ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
14031			if (ret) {
14032				dev_info(&pf->pdev->dev,
14033					 "update vsi failed, err %pe aq_err %s\n",
14034					 ERR_PTR(ret),
14035					 i40e_aq_str(&pf->hw,
14036						    pf->hw.aq.asq_last_status));
14037				ret = -ENOENT;
14038				goto err;
14039			}
14040			/* update the local VSI info queue map */
14041			i40e_vsi_update_queue_map(vsi, &ctxt);
14042			vsi->info.valid_sections = 0;
14043		} else {
14044			/* Default/Main VSI is only enabled for TC0
14045			 * reconfigure it to enable all TCs that are
14046			 * available on the port in SFP mode.
14047			 * For MFP case the iSCSI PF would use this
14048			 * flow to enable LAN+iSCSI TC.
14049			 */
14050			ret = i40e_vsi_config_tc(vsi, enabled_tc);
14051			if (ret) {
14052				/* Single TC condition is not fatal,
14053				 * message and continue
14054				 */
14055				dev_info(&pf->pdev->dev,
14056					 "failed to configure TCs for main VSI tc_map 0x%08x, err %pe aq_err %s\n",
14057					 enabled_tc,
14058					 ERR_PTR(ret),
14059					 i40e_aq_str(&pf->hw,
14060						    pf->hw.aq.asq_last_status));
14061			}
14062		}
14063		break;
14064
14065	case I40E_VSI_FDIR:
14066		ctxt.pf_num = hw->pf_id;
14067		ctxt.vf_num = 0;
14068		ctxt.uplink_seid = vsi->uplink_seid;
14069		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
14070		ctxt.flags = I40E_AQ_VSI_TYPE_PF;
14071		if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) &&
14072		    (i40e_is_vsi_uplink_mode_veb(vsi))) {
14073			ctxt.info.valid_sections |=
14074			     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
14075			ctxt.info.switch_id =
14076			   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
14077		}
14078		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
14079		break;
14080
14081	case I40E_VSI_VMDQ2:
14082		ctxt.pf_num = hw->pf_id;
14083		ctxt.vf_num = 0;
14084		ctxt.uplink_seid = vsi->uplink_seid;
14085		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
14086		ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
14087
14088		/* This VSI is connected to VEB so the switch_id
14089		 * should be set to zero by default.
14090		 */
14091		if (i40e_is_vsi_uplink_mode_veb(vsi)) {
14092			ctxt.info.valid_sections |=
14093				cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
14094			ctxt.info.switch_id =
14095				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
14096		}
14097
14098		/* Setup the VSI tx/rx queue map for TC0 only for now */
14099		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
14100		break;
14101
14102	case I40E_VSI_SRIOV:
14103		ctxt.pf_num = hw->pf_id;
14104		ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id;
14105		ctxt.uplink_seid = vsi->uplink_seid;
14106		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
14107		ctxt.flags = I40E_AQ_VSI_TYPE_VF;
14108
14109		/* This VSI is connected to VEB so the switch_id
14110		 * should be set to zero by default.
14111		 */
14112		if (i40e_is_vsi_uplink_mode_veb(vsi)) {
14113			ctxt.info.valid_sections |=
14114				cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
14115			ctxt.info.switch_id =
14116				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
14117		}
14118
14119		if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
14120			ctxt.info.valid_sections |=
14121				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
14122			ctxt.info.queueing_opt_flags |=
14123				(I40E_AQ_VSI_QUE_OPT_TCP_ENA |
14124				 I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI);
14125		}
14126
14127		ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
14128		ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL;
14129		if (pf->vf[vsi->vf_id].spoofchk) {
14130			ctxt.info.valid_sections |=
14131				cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
14132			ctxt.info.sec_flags |=
14133				(I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK |
14134				 I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK);
14135		}
14136		/* Setup the VSI tx/rx queue map for TC0 only for now */
14137		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
14138		break;
14139
14140	case I40E_VSI_IWARP:
14141		/* send down message to iWARP */
14142		break;
14143
14144	default:
14145		return -ENODEV;
14146	}
14147
14148	if (vsi->type != I40E_VSI_MAIN) {
14149		ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
14150		if (ret) {
14151			dev_info(&vsi->back->pdev->dev,
14152				 "add vsi failed, err %pe aq_err %s\n",
14153				 ERR_PTR(ret),
14154				 i40e_aq_str(&pf->hw,
14155					     pf->hw.aq.asq_last_status));
14156			ret = -ENOENT;
14157			goto err;
14158		}
14159		vsi->info = ctxt.info;
14160		vsi->info.valid_sections = 0;
14161		vsi->seid = ctxt.seid;
14162		vsi->id = ctxt.vsi_number;
14163	}
14164
14165	spin_lock_bh(&vsi->mac_filter_hash_lock);
14166	vsi->active_filters = 0;
14167	/* If macvlan filters already exist, force them to get loaded */
14168	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
14169		f->state = I40E_FILTER_NEW;
14170		f_count++;
14171	}
14172	spin_unlock_bh(&vsi->mac_filter_hash_lock);
14173	clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
14174
14175	if (f_count) {
14176		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
14177		set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
14178	}
14179
14180	/* Update VSI BW information */
14181	ret = i40e_vsi_get_bw_info(vsi);
14182	if (ret) {
14183		dev_info(&pf->pdev->dev,
14184			 "couldn't get vsi bw info, err %pe aq_err %s\n",
14185			 ERR_PTR(ret),
14186			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
14187		/* VSI is already added so not tearing that up */
14188		ret = 0;
14189	}
14190
14191err:
14192	return ret;
14193}
14194
14195/**
14196 * i40e_vsi_release - Delete a VSI and free its resources
14197 * @vsi: the VSI being removed
14198 *
14199 * Returns 0 on success or < 0 on error
14200 **/
14201int i40e_vsi_release(struct i40e_vsi *vsi)
14202{
14203	struct i40e_mac_filter *f;
14204	struct hlist_node *h;
14205	struct i40e_veb *veb = NULL;
14206	struct i40e_pf *pf;
14207	u16 uplink_seid;
14208	int i, n, bkt;
14209
14210	pf = vsi->back;
14211
14212	/* release of a VEB-owner or last VSI is not allowed */
14213	if (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) {
14214		dev_info(&pf->pdev->dev, "VSI %d has existing VEB %d\n",
14215			 vsi->seid, vsi->uplink_seid);
14216		return -ENODEV;
14217	}
14218	if (vsi == pf->vsi[pf->lan_vsi] &&
14219	    !test_bit(__I40E_DOWN, pf->state)) {
14220		dev_info(&pf->pdev->dev, "Can't remove PF VSI\n");
14221		return -ENODEV;
14222	}
14223	set_bit(__I40E_VSI_RELEASING, vsi->state);
14224	uplink_seid = vsi->uplink_seid;
14225	if (vsi->type != I40E_VSI_SRIOV) {
14226		if (vsi->netdev_registered) {
14227			vsi->netdev_registered = false;
14228			if (vsi->netdev) {
14229				/* results in a call to i40e_close() */
14230				unregister_netdev(vsi->netdev);
14231			}
14232		} else {
14233			i40e_vsi_close(vsi);
14234		}
14235		i40e_vsi_disable_irq(vsi);
14236	}
14237
14238	spin_lock_bh(&vsi->mac_filter_hash_lock);
14239
14240	/* clear the sync flag on all filters */
14241	if (vsi->netdev) {
14242		__dev_uc_unsync(vsi->netdev, NULL);
14243		__dev_mc_unsync(vsi->netdev, NULL);
14244	}
14245
14246	/* make sure any remaining filters are marked for deletion */
14247	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
14248		__i40e_del_filter(vsi, f);
14249
14250	spin_unlock_bh(&vsi->mac_filter_hash_lock);
14251
14252	i40e_sync_vsi_filters(vsi);
14253
14254	i40e_vsi_delete(vsi);
14255	i40e_vsi_free_q_vectors(vsi);
14256	if (vsi->netdev) {
14257		free_netdev(vsi->netdev);
14258		vsi->netdev = NULL;
14259	}
14260	i40e_vsi_clear_rings(vsi);
14261	i40e_vsi_clear(vsi);
14262
14263	/* If this was the last thing on the VEB, except for the
14264	 * controlling VSI, remove the VEB, which puts the controlling
14265	 * VSI onto the next level down in the switch.
14266	 *
14267	 * Well, okay, there's one more exception here: don't remove
14268	 * the orphan VEBs yet.  We'll wait for an explicit remove request
14269	 * from up the network stack.
14270	 */
14271	for (n = 0, i = 0; i < pf->num_alloc_vsi; i++) {
14272		if (pf->vsi[i] &&
14273		    pf->vsi[i]->uplink_seid == uplink_seid &&
14274		    (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
14275			n++;      /* count the VSIs */
14276		}
14277	}
14278	for (i = 0; i < I40E_MAX_VEB; i++) {
14279		if (!pf->veb[i])
14280			continue;
14281		if (pf->veb[i]->uplink_seid == uplink_seid)
14282			n++;     /* count the VEBs */
14283		if (pf->veb[i]->seid == uplink_seid)
14284			veb = pf->veb[i];
14285	}
14286	if (n == 0 && veb && veb->uplink_seid != 0)
14287		i40e_veb_release(veb);
14288
14289	return 0;
14290}
14291
14292/**
14293 * i40e_vsi_setup_vectors - Set up the q_vectors for the given VSI
14294 * @vsi: ptr to the VSI
14295 *
14296 * This should only be called after i40e_vsi_mem_alloc() which allocates the
14297 * corresponding SW VSI structure and initializes num_queue_pairs for the
14298 * newly allocated VSI.
14299 *
14300 * Returns 0 on success or negative on failure
14301 **/
14302static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi)
14303{
14304	int ret = -ENOENT;
14305	struct i40e_pf *pf = vsi->back;
14306
14307	if (vsi->q_vectors[0]) {
14308		dev_info(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
14309			 vsi->seid);
14310		return -EEXIST;
14311	}
14312
14313	if (vsi->base_vector) {
14314		dev_info(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
14315			 vsi->seid, vsi->base_vector);
14316		return -EEXIST;
14317	}
14318
14319	ret = i40e_vsi_alloc_q_vectors(vsi);
14320	if (ret) {
14321		dev_info(&pf->pdev->dev,
14322			 "failed to allocate %d q_vector for VSI %d, ret=%d\n",
14323			 vsi->num_q_vectors, vsi->seid, ret);
14324		vsi->num_q_vectors = 0;
14325		goto vector_setup_out;
14326	}
14327
14328	/* In Legacy mode, we do not have to get any other vector since we
14329	 * piggyback on the misc/ICR0 for queue interrupts.
14330	*/
14331	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
14332		return ret;
14333	if (vsi->num_q_vectors)
14334		vsi->base_vector = i40e_get_lump(pf, pf->irq_pile,
14335						 vsi->num_q_vectors, vsi->idx);
14336	if (vsi->base_vector < 0) {
14337		dev_info(&pf->pdev->dev,
14338			 "failed to get tracking for %d vectors for VSI %d, err=%d\n",
14339			 vsi->num_q_vectors, vsi->seid, vsi->base_vector);
14340		i40e_vsi_free_q_vectors(vsi);
14341		ret = -ENOENT;
14342		goto vector_setup_out;
14343	}
14344
14345vector_setup_out:
14346	return ret;
14347}
14348
14349/**
14350 * i40e_vsi_reinit_setup - return and reallocate resources for a VSI
14351 * @vsi: pointer to the vsi.
14352 *
14353 * This re-allocates a vsi's queue resources.
14354 *
14355 * Returns pointer to the successfully allocated and configured VSI sw struct
14356 * on success, otherwise returns NULL on failure.
14357 **/
14358static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
14359{
14360	u16 alloc_queue_pairs;
14361	struct i40e_pf *pf;
14362	u8 enabled_tc;
14363	int ret;
14364
14365	if (!vsi)
14366		return NULL;
14367
14368	pf = vsi->back;
14369
14370	i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
14371	i40e_vsi_clear_rings(vsi);
14372
14373	i40e_vsi_free_arrays(vsi, false);
14374	i40e_set_num_rings_in_vsi(vsi);
14375	ret = i40e_vsi_alloc_arrays(vsi, false);
14376	if (ret)
14377		goto err_vsi;
14378
14379	alloc_queue_pairs = vsi->alloc_queue_pairs *
14380			    (i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
14381
14382	ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx);
14383	if (ret < 0) {
14384		dev_info(&pf->pdev->dev,
14385			 "failed to get tracking for %d queues for VSI %d err %d\n",
14386			 alloc_queue_pairs, vsi->seid, ret);
14387		goto err_vsi;
14388	}
14389	vsi->base_queue = ret;
14390
14391	/* Update the FW view of the VSI. Force a reset of TC and queue
14392	 * layout configurations.
14393	 */
14394	enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
14395	pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
14396	pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
14397	i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
14398	if (vsi->type == I40E_VSI_MAIN)
14399		i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr);
14400
14401	/* assign it some queues */
14402	ret = i40e_alloc_rings(vsi);
14403	if (ret)
14404		goto err_rings;
14405
14406	/* map all of the rings to the q_vectors */
14407	i40e_vsi_map_rings_to_vectors(vsi);
14408	return vsi;
14409
14410err_rings:
14411	i40e_vsi_free_q_vectors(vsi);
14412	if (vsi->netdev_registered) {
14413		vsi->netdev_registered = false;
14414		unregister_netdev(vsi->netdev);
14415		free_netdev(vsi->netdev);
14416		vsi->netdev = NULL;
14417	}
14418	i40e_aq_delete_element(&pf->hw, vsi->seid, NULL);
14419err_vsi:
14420	i40e_vsi_clear(vsi);
14421	return NULL;
14422}
14423
14424/**
14425 * i40e_vsi_setup - Set up a VSI by a given type
14426 * @pf: board private structure
14427 * @type: VSI type
14428 * @uplink_seid: the switch element to link to
14429 * @param1: usage depends upon VSI type. For VF types, indicates VF id
14430 *
14431 * This allocates the sw VSI structure and its queue resources, then add a VSI
14432 * to the identified VEB.
14433 *
14434 * Returns pointer to the successfully allocated and configure VSI sw struct on
14435 * success, otherwise returns NULL on failure.
14436 **/
14437struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
14438				u16 uplink_seid, u32 param1)
14439{
14440	struct i40e_vsi *vsi = NULL;
14441	struct i40e_veb *veb = NULL;
14442	u16 alloc_queue_pairs;
14443	int ret, i;
14444	int v_idx;
14445
14446	/* The requested uplink_seid must be either
14447	 *     - the PF's port seid
14448	 *              no VEB is needed because this is the PF
14449	 *              or this is a Flow Director special case VSI
14450	 *     - seid of an existing VEB
14451	 *     - seid of a VSI that owns an existing VEB
14452	 *     - seid of a VSI that doesn't own a VEB
14453	 *              a new VEB is created and the VSI becomes the owner
14454	 *     - seid of the PF VSI, which is what creates the first VEB
14455	 *              this is a special case of the previous
14456	 *
14457	 * Find which uplink_seid we were given and create a new VEB if needed
14458	 */
14459	for (i = 0; i < I40E_MAX_VEB; i++) {
14460		if (pf->veb[i] && pf->veb[i]->seid == uplink_seid) {
14461			veb = pf->veb[i];
14462			break;
14463		}
14464	}
14465
14466	if (!veb && uplink_seid != pf->mac_seid) {
14467
14468		for (i = 0; i < pf->num_alloc_vsi; i++) {
14469			if (pf->vsi[i] && pf->vsi[i]->seid == uplink_seid) {
14470				vsi = pf->vsi[i];
14471				break;
14472			}
14473		}
14474		if (!vsi) {
14475			dev_info(&pf->pdev->dev, "no such uplink_seid %d\n",
14476				 uplink_seid);
14477			return NULL;
14478		}
14479
14480		if (vsi->uplink_seid == pf->mac_seid)
14481			veb = i40e_veb_setup(pf, 0, pf->mac_seid, vsi->seid,
14482					     vsi->tc_config.enabled_tc);
14483		else if ((vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0)
14484			veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid,
14485					     vsi->tc_config.enabled_tc);
14486		if (veb) {
14487			if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) {
14488				dev_info(&vsi->back->pdev->dev,
14489					 "New VSI creation error, uplink seid of LAN VSI expected.\n");
14490				return NULL;
14491			}
14492			/* We come up by default in VEPA mode if SRIOV is not
14493			 * already enabled, in which case we can't force VEPA
14494			 * mode.
14495			 */
14496			if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
14497				veb->bridge_mode = BRIDGE_MODE_VEPA;
14498				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
14499			}
14500			i40e_config_bridge_mode(veb);
14501		}
14502		for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
14503			if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
14504				veb = pf->veb[i];
14505		}
14506		if (!veb) {
14507			dev_info(&pf->pdev->dev, "couldn't add VEB\n");
14508			return NULL;
14509		}
14510
14511		vsi->flags |= I40E_VSI_FLAG_VEB_OWNER;
14512		uplink_seid = veb->seid;
14513	}
14514
14515	/* get vsi sw struct */
14516	v_idx = i40e_vsi_mem_alloc(pf, type);
14517	if (v_idx < 0)
14518		goto err_alloc;
14519	vsi = pf->vsi[v_idx];
14520	if (!vsi)
14521		goto err_alloc;
14522	vsi->type = type;
14523	vsi->veb_idx = (veb ? veb->idx : I40E_NO_VEB);
14524
14525	if (type == I40E_VSI_MAIN)
14526		pf->lan_vsi = v_idx;
14527	else if (type == I40E_VSI_SRIOV)
14528		vsi->vf_id = param1;
14529	/* assign it some queues */
14530	alloc_queue_pairs = vsi->alloc_queue_pairs *
14531			    (i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
14532
14533	ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx);
14534	if (ret < 0) {
14535		dev_info(&pf->pdev->dev,
14536			 "failed to get tracking for %d queues for VSI %d err=%d\n",
14537			 alloc_queue_pairs, vsi->seid, ret);
14538		goto err_vsi;
14539	}
14540	vsi->base_queue = ret;
14541
14542	/* get a VSI from the hardware */
14543	vsi->uplink_seid = uplink_seid;
14544	ret = i40e_add_vsi(vsi);
14545	if (ret)
14546		goto err_vsi;
14547
14548	switch (vsi->type) {
14549	/* setup the netdev if needed */
14550	case I40E_VSI_MAIN:
14551	case I40E_VSI_VMDQ2:
14552		ret = i40e_config_netdev(vsi);
14553		if (ret)
14554			goto err_netdev;
14555		ret = i40e_netif_set_realnum_tx_rx_queues(vsi);
14556		if (ret)
14557			goto err_netdev;
14558		ret = register_netdev(vsi->netdev);
14559		if (ret)
14560			goto err_netdev;
14561		vsi->netdev_registered = true;
14562		netif_carrier_off(vsi->netdev);
14563#ifdef CONFIG_I40E_DCB
14564		/* Setup DCB netlink interface */
14565		i40e_dcbnl_setup(vsi);
14566#endif /* CONFIG_I40E_DCB */
14567		fallthrough;
14568	case I40E_VSI_FDIR:
14569		/* set up vectors and rings if needed */
14570		ret = i40e_vsi_setup_vectors(vsi);
14571		if (ret)
14572			goto err_msix;
14573
14574		ret = i40e_alloc_rings(vsi);
14575		if (ret)
14576			goto err_rings;
14577
14578		/* map all of the rings to the q_vectors */
14579		i40e_vsi_map_rings_to_vectors(vsi);
14580
14581		i40e_vsi_reset_stats(vsi);
14582		break;
14583	default:
14584		/* no netdev or rings for the other VSI types */
14585		break;
14586	}
14587
14588	if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) &&
14589	    (vsi->type == I40E_VSI_VMDQ2)) {
14590		ret = i40e_vsi_config_rss(vsi);
14591	}
14592	return vsi;
14593
14594err_rings:
14595	i40e_vsi_free_q_vectors(vsi);
14596err_msix:
14597	if (vsi->netdev_registered) {
14598		vsi->netdev_registered = false;
14599		unregister_netdev(vsi->netdev);
14600		free_netdev(vsi->netdev);
14601		vsi->netdev = NULL;
14602	}
14603err_netdev:
14604	i40e_aq_delete_element(&pf->hw, vsi->seid, NULL);
14605err_vsi:
14606	i40e_vsi_clear(vsi);
14607err_alloc:
14608	return NULL;
14609}
14610
14611/**
14612 * i40e_veb_get_bw_info - Query VEB BW information
14613 * @veb: the veb to query
14614 *
14615 * Query the Tx scheduler BW configuration data for given VEB
14616 **/
14617static int i40e_veb_get_bw_info(struct i40e_veb *veb)
14618{
14619	struct i40e_aqc_query_switching_comp_ets_config_resp ets_data;
14620	struct i40e_aqc_query_switching_comp_bw_config_resp bw_data;
14621	struct i40e_pf *pf = veb->pf;
14622	struct i40e_hw *hw = &pf->hw;
14623	u32 tc_bw_max;
14624	int ret = 0;
14625	int i;
14626
14627	ret = i40e_aq_query_switch_comp_bw_config(hw, veb->seid,
14628						  &bw_data, NULL);
14629	if (ret) {
14630		dev_info(&pf->pdev->dev,
14631			 "query veb bw config failed, err %pe aq_err %s\n",
14632			 ERR_PTR(ret),
14633			 i40e_aq_str(&pf->hw, hw->aq.asq_last_status));
14634		goto out;
14635	}
14636
14637	ret = i40e_aq_query_switch_comp_ets_config(hw, veb->seid,
14638						   &ets_data, NULL);
14639	if (ret) {
14640		dev_info(&pf->pdev->dev,
14641			 "query veb bw ets config failed, err %pe aq_err %s\n",
14642			 ERR_PTR(ret),
14643			 i40e_aq_str(&pf->hw, hw->aq.asq_last_status));
14644		goto out;
14645	}
14646
14647	veb->bw_limit = le16_to_cpu(ets_data.port_bw_limit);
14648	veb->bw_max_quanta = ets_data.tc_bw_max;
14649	veb->is_abs_credits = bw_data.absolute_credits_enable;
14650	veb->enabled_tc = ets_data.tc_valid_bits;
14651	tc_bw_max = le16_to_cpu(bw_data.tc_bw_max[0]) |
14652		    (le16_to_cpu(bw_data.tc_bw_max[1]) << 16);
14653	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
14654		veb->bw_tc_share_credits[i] = bw_data.tc_bw_share_credits[i];
14655		veb->bw_tc_limit_credits[i] =
14656					le16_to_cpu(bw_data.tc_bw_limits[i]);
14657		veb->bw_tc_max_quanta[i] = ((tc_bw_max >> (i*4)) & 0x7);
14658	}
14659
14660out:
14661	return ret;
14662}
14663
14664/**
14665 * i40e_veb_mem_alloc - Allocates the next available struct veb in the PF
14666 * @pf: board private structure
14667 *
14668 * On error: returns error code (negative)
14669 * On success: returns vsi index in PF (positive)
14670 **/
14671static int i40e_veb_mem_alloc(struct i40e_pf *pf)
14672{
14673	int ret = -ENOENT;
14674	struct i40e_veb *veb;
14675	int i;
14676
14677	/* Need to protect the allocation of switch elements at the PF level */
14678	mutex_lock(&pf->switch_mutex);
14679
14680	/* VEB list may be fragmented if VEB creation/destruction has
14681	 * been happening.  We can afford to do a quick scan to look
14682	 * for any free slots in the list.
14683	 *
14684	 * find next empty veb slot, looping back around if necessary
14685	 */
14686	i = 0;
14687	while ((i < I40E_MAX_VEB) && (pf->veb[i] != NULL))
14688		i++;
14689	if (i >= I40E_MAX_VEB) {
14690		ret = -ENOMEM;
14691		goto err_alloc_veb;  /* out of VEB slots! */
14692	}
14693
14694	veb = kzalloc(sizeof(*veb), GFP_KERNEL);
14695	if (!veb) {
14696		ret = -ENOMEM;
14697		goto err_alloc_veb;
14698	}
14699	veb->pf = pf;
14700	veb->idx = i;
14701	veb->enabled_tc = 1;
14702
14703	pf->veb[i] = veb;
14704	ret = i;
14705err_alloc_veb:
14706	mutex_unlock(&pf->switch_mutex);
14707	return ret;
14708}
14709
14710/**
14711 * i40e_switch_branch_release - Delete a branch of the switch tree
14712 * @branch: where to start deleting
14713 *
14714 * This uses recursion to find the tips of the branch to be
14715 * removed, deleting until we get back to and can delete this VEB.
14716 **/
14717static void i40e_switch_branch_release(struct i40e_veb *branch)
14718{
14719	struct i40e_pf *pf = branch->pf;
14720	u16 branch_seid = branch->seid;
14721	u16 veb_idx = branch->idx;
14722	int i;
14723
14724	/* release any VEBs on this VEB - RECURSION */
14725	for (i = 0; i < I40E_MAX_VEB; i++) {
14726		if (!pf->veb[i])
14727			continue;
14728		if (pf->veb[i]->uplink_seid == branch->seid)
14729			i40e_switch_branch_release(pf->veb[i]);
14730	}
14731
14732	/* Release the VSIs on this VEB, but not the owner VSI.
14733	 *
14734	 * NOTE: Removing the last VSI on a VEB has the SIDE EFFECT of removing
14735	 *       the VEB itself, so don't use (*branch) after this loop.
14736	 */
14737	for (i = 0; i < pf->num_alloc_vsi; i++) {
14738		if (!pf->vsi[i])
14739			continue;
14740		if (pf->vsi[i]->uplink_seid == branch_seid &&
14741		   (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
14742			i40e_vsi_release(pf->vsi[i]);
14743		}
14744	}
14745
14746	/* There's one corner case where the VEB might not have been
14747	 * removed, so double check it here and remove it if needed.
14748	 * This case happens if the veb was created from the debugfs
14749	 * commands and no VSIs were added to it.
14750	 */
14751	if (pf->veb[veb_idx])
14752		i40e_veb_release(pf->veb[veb_idx]);
14753}
14754
14755/**
14756 * i40e_veb_clear - remove veb struct
14757 * @veb: the veb to remove
14758 **/
14759static void i40e_veb_clear(struct i40e_veb *veb)
14760{
14761	if (!veb)
14762		return;
14763
14764	if (veb->pf) {
14765		struct i40e_pf *pf = veb->pf;
14766
14767		mutex_lock(&pf->switch_mutex);
14768		if (pf->veb[veb->idx] == veb)
14769			pf->veb[veb->idx] = NULL;
14770		mutex_unlock(&pf->switch_mutex);
14771	}
14772
14773	kfree(veb);
14774}
14775
14776/**
14777 * i40e_veb_release - Delete a VEB and free its resources
14778 * @veb: the VEB being removed
14779 **/
14780void i40e_veb_release(struct i40e_veb *veb)
14781{
14782	struct i40e_vsi *vsi = NULL;
14783	struct i40e_pf *pf;
14784	int i, n = 0;
14785
14786	pf = veb->pf;
14787
14788	/* find the remaining VSI and check for extras */
14789	for (i = 0; i < pf->num_alloc_vsi; i++) {
14790		if (pf->vsi[i] && pf->vsi[i]->uplink_seid == veb->seid) {
14791			n++;
14792			vsi = pf->vsi[i];
14793		}
14794	}
14795	if (n != 1) {
14796		dev_info(&pf->pdev->dev,
14797			 "can't remove VEB %d with %d VSIs left\n",
14798			 veb->seid, n);
14799		return;
14800	}
14801
14802	/* move the remaining VSI to uplink veb */
14803	vsi->flags &= ~I40E_VSI_FLAG_VEB_OWNER;
14804	if (veb->uplink_seid) {
14805		vsi->uplink_seid = veb->uplink_seid;
14806		if (veb->uplink_seid == pf->mac_seid)
14807			vsi->veb_idx = I40E_NO_VEB;
14808		else
14809			vsi->veb_idx = veb->veb_idx;
14810	} else {
14811		/* floating VEB */
14812		vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid;
14813		vsi->veb_idx = pf->vsi[pf->lan_vsi]->veb_idx;
14814	}
14815
14816	i40e_aq_delete_element(&pf->hw, veb->seid, NULL);
14817	i40e_veb_clear(veb);
14818}
14819
14820/**
14821 * i40e_add_veb - create the VEB in the switch
14822 * @veb: the VEB to be instantiated
14823 * @vsi: the controlling VSI
14824 **/
14825static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
14826{
14827	struct i40e_pf *pf = veb->pf;
14828	bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED);
14829	int ret;
14830
14831	ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid,
14832			      veb->enabled_tc, false,
14833			      &veb->seid, enable_stats, NULL);
14834
14835	/* get a VEB from the hardware */
14836	if (ret) {
14837		dev_info(&pf->pdev->dev,
14838			 "couldn't add VEB, err %pe aq_err %s\n",
14839			 ERR_PTR(ret),
14840			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
14841		return -EPERM;
14842	}
14843
14844	/* get statistics counter */
14845	ret = i40e_aq_get_veb_parameters(&pf->hw, veb->seid, NULL, NULL,
14846					 &veb->stats_idx, NULL, NULL, NULL);
14847	if (ret) {
14848		dev_info(&pf->pdev->dev,
14849			 "couldn't get VEB statistics idx, err %pe aq_err %s\n",
14850			 ERR_PTR(ret),
14851			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
14852		return -EPERM;
14853	}
14854	ret = i40e_veb_get_bw_info(veb);
14855	if (ret) {
14856		dev_info(&pf->pdev->dev,
14857			 "couldn't get VEB bw info, err %pe aq_err %s\n",
14858			 ERR_PTR(ret),
14859			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
14860		i40e_aq_delete_element(&pf->hw, veb->seid, NULL);
14861		return -ENOENT;
14862	}
14863
14864	vsi->uplink_seid = veb->seid;
14865	vsi->veb_idx = veb->idx;
14866	vsi->flags |= I40E_VSI_FLAG_VEB_OWNER;
14867
14868	return 0;
14869}
14870
14871/**
14872 * i40e_veb_setup - Set up a VEB
14873 * @pf: board private structure
14874 * @flags: VEB setup flags
14875 * @uplink_seid: the switch element to link to
14876 * @vsi_seid: the initial VSI seid
14877 * @enabled_tc: Enabled TC bit-map
14878 *
14879 * This allocates the sw VEB structure and links it into the switch
14880 * It is possible and legal for this to be a duplicate of an already
14881 * existing VEB.  It is also possible for both uplink and vsi seids
14882 * to be zero, in order to create a floating VEB.
14883 *
14884 * Returns pointer to the successfully allocated VEB sw struct on
14885 * success, otherwise returns NULL on failure.
14886 **/
14887struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags,
14888				u16 uplink_seid, u16 vsi_seid,
14889				u8 enabled_tc)
14890{
14891	struct i40e_veb *veb, *uplink_veb = NULL;
14892	int vsi_idx, veb_idx;
14893	int ret;
14894
14895	/* if one seid is 0, the other must be 0 to create a floating relay */
14896	if ((uplink_seid == 0 || vsi_seid == 0) &&
14897	    (uplink_seid + vsi_seid != 0)) {
14898		dev_info(&pf->pdev->dev,
14899			 "one, not both seid's are 0: uplink=%d vsi=%d\n",
14900			 uplink_seid, vsi_seid);
14901		return NULL;
14902	}
14903
14904	/* make sure there is such a vsi and uplink */
14905	for (vsi_idx = 0; vsi_idx < pf->num_alloc_vsi; vsi_idx++)
14906		if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid)
14907			break;
14908	if (vsi_idx == pf->num_alloc_vsi && vsi_seid != 0) {
14909		dev_info(&pf->pdev->dev, "vsi seid %d not found\n",
14910			 vsi_seid);
14911		return NULL;
14912	}
14913
14914	if (uplink_seid && uplink_seid != pf->mac_seid) {
14915		for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) {
14916			if (pf->veb[veb_idx] &&
14917			    pf->veb[veb_idx]->seid == uplink_seid) {
14918				uplink_veb = pf->veb[veb_idx];
14919				break;
14920			}
14921		}
14922		if (!uplink_veb) {
14923			dev_info(&pf->pdev->dev,
14924				 "uplink seid %d not found\n", uplink_seid);
14925			return NULL;
14926		}
14927	}
14928
14929	/* get veb sw struct */
14930	veb_idx = i40e_veb_mem_alloc(pf);
14931	if (veb_idx < 0)
14932		goto err_alloc;
14933	veb = pf->veb[veb_idx];
14934	veb->flags = flags;
14935	veb->uplink_seid = uplink_seid;
14936	veb->veb_idx = (uplink_veb ? uplink_veb->idx : I40E_NO_VEB);
14937	veb->enabled_tc = (enabled_tc ? enabled_tc : 0x1);
14938
14939	/* create the VEB in the switch */
14940	ret = i40e_add_veb(veb, pf->vsi[vsi_idx]);
14941	if (ret)
14942		goto err_veb;
14943	if (vsi_idx == pf->lan_vsi)
14944		pf->lan_veb = veb->idx;
14945
14946	return veb;
14947
14948err_veb:
14949	i40e_veb_clear(veb);
14950err_alloc:
14951	return NULL;
14952}
14953
14954/**
14955 * i40e_setup_pf_switch_element - set PF vars based on switch type
14956 * @pf: board private structure
14957 * @ele: element we are building info from
14958 * @num_reported: total number of elements
14959 * @printconfig: should we print the contents
14960 *
14961 * helper function to assist in extracting a few useful SEID values.
14962 **/
14963static void i40e_setup_pf_switch_element(struct i40e_pf *pf,
14964				struct i40e_aqc_switch_config_element_resp *ele,
14965				u16 num_reported, bool printconfig)
14966{
14967	u16 downlink_seid = le16_to_cpu(ele->downlink_seid);
14968	u16 uplink_seid = le16_to_cpu(ele->uplink_seid);
14969	u8 element_type = ele->element_type;
14970	u16 seid = le16_to_cpu(ele->seid);
14971
14972	if (printconfig)
14973		dev_info(&pf->pdev->dev,
14974			 "type=%d seid=%d uplink=%d downlink=%d\n",
14975			 element_type, seid, uplink_seid, downlink_seid);
14976
14977	switch (element_type) {
14978	case I40E_SWITCH_ELEMENT_TYPE_MAC:
14979		pf->mac_seid = seid;
14980		break;
14981	case I40E_SWITCH_ELEMENT_TYPE_VEB:
14982		/* Main VEB? */
14983		if (uplink_seid != pf->mac_seid)
14984			break;
14985		if (pf->lan_veb >= I40E_MAX_VEB) {
14986			int v;
14987
14988			/* find existing or else empty VEB */
14989			for (v = 0; v < I40E_MAX_VEB; v++) {
14990				if (pf->veb[v] && (pf->veb[v]->seid == seid)) {
14991					pf->lan_veb = v;
14992					break;
14993				}
14994			}
14995			if (pf->lan_veb >= I40E_MAX_VEB) {
14996				v = i40e_veb_mem_alloc(pf);
14997				if (v < 0)
14998					break;
14999				pf->lan_veb = v;
15000			}
15001		}
15002		if (pf->lan_veb >= I40E_MAX_VEB)
15003			break;
15004
15005		pf->veb[pf->lan_veb]->seid = seid;
15006		pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid;
15007		pf->veb[pf->lan_veb]->pf = pf;
15008		pf->veb[pf->lan_veb]->veb_idx = I40E_NO_VEB;
15009		break;
15010	case I40E_SWITCH_ELEMENT_TYPE_VSI:
15011		if (num_reported != 1)
15012			break;
15013		/* This is immediately after a reset so we can assume this is
15014		 * the PF's VSI
15015		 */
15016		pf->mac_seid = uplink_seid;
15017		pf->pf_seid = downlink_seid;
15018		pf->main_vsi_seid = seid;
15019		if (printconfig)
15020			dev_info(&pf->pdev->dev,
15021				 "pf_seid=%d main_vsi_seid=%d\n",
15022				 pf->pf_seid, pf->main_vsi_seid);
15023		break;
15024	case I40E_SWITCH_ELEMENT_TYPE_PF:
15025	case I40E_SWITCH_ELEMENT_TYPE_VF:
15026	case I40E_SWITCH_ELEMENT_TYPE_EMP:
15027	case I40E_SWITCH_ELEMENT_TYPE_BMC:
15028	case I40E_SWITCH_ELEMENT_TYPE_PE:
15029	case I40E_SWITCH_ELEMENT_TYPE_PA:
15030		/* ignore these for now */
15031		break;
15032	default:
15033		dev_info(&pf->pdev->dev, "unknown element type=%d seid=%d\n",
15034			 element_type, seid);
15035		break;
15036	}
15037}
15038
15039/**
15040 * i40e_fetch_switch_configuration - Get switch config from firmware
15041 * @pf: board private structure
15042 * @printconfig: should we print the contents
15043 *
15044 * Get the current switch configuration from the device and
15045 * extract a few useful SEID values.
15046 **/
15047int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig)
15048{
15049	struct i40e_aqc_get_switch_config_resp *sw_config;
15050	u16 next_seid = 0;
15051	int ret = 0;
15052	u8 *aq_buf;
15053	int i;
15054
15055	aq_buf = kzalloc(I40E_AQ_LARGE_BUF, GFP_KERNEL);
15056	if (!aq_buf)
15057		return -ENOMEM;
15058
15059	sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf;
15060	do {
15061		u16 num_reported, num_total;
15062
15063		ret = i40e_aq_get_switch_config(&pf->hw, sw_config,
15064						I40E_AQ_LARGE_BUF,
15065						&next_seid, NULL);
15066		if (ret) {
15067			dev_info(&pf->pdev->dev,
15068				 "get switch config failed err %d aq_err %s\n",
15069				 ret,
15070				 i40e_aq_str(&pf->hw,
15071					     pf->hw.aq.asq_last_status));
15072			kfree(aq_buf);
15073			return -ENOENT;
15074		}
15075
15076		num_reported = le16_to_cpu(sw_config->header.num_reported);
15077		num_total = le16_to_cpu(sw_config->header.num_total);
15078
15079		if (printconfig)
15080			dev_info(&pf->pdev->dev,
15081				 "header: %d reported %d total\n",
15082				 num_reported, num_total);
15083
15084		for (i = 0; i < num_reported; i++) {
15085			struct i40e_aqc_switch_config_element_resp *ele =
15086				&sw_config->element[i];
15087
15088			i40e_setup_pf_switch_element(pf, ele, num_reported,
15089						     printconfig);
15090		}
15091	} while (next_seid != 0);
15092
15093	kfree(aq_buf);
15094	return ret;
15095}
15096
15097/**
15098 * i40e_setup_pf_switch - Setup the HW switch on startup or after reset
15099 * @pf: board private structure
15100 * @reinit: if the Main VSI needs to re-initialized.
15101 * @lock_acquired: indicates whether or not the lock has been acquired
15102 *
15103 * Returns 0 on success, negative value on failure
15104 **/
15105static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired)
15106{
15107	u16 flags = 0;
15108	int ret;
15109
15110	/* find out what's out there already */
15111	ret = i40e_fetch_switch_configuration(pf, false);
15112	if (ret) {
15113		dev_info(&pf->pdev->dev,
15114			 "couldn't fetch switch config, err %pe aq_err %s\n",
15115			 ERR_PTR(ret),
15116			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
15117		return ret;
15118	}
15119	i40e_pf_reset_stats(pf);
15120
15121	/* set the switch config bit for the whole device to
15122	 * support limited promisc or true promisc
15123	 * when user requests promisc. The default is limited
15124	 * promisc.
15125	*/
15126
15127	if ((pf->hw.pf_id == 0) &&
15128	    !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
15129		flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
15130		pf->last_sw_conf_flags = flags;
15131	}
15132
15133	if (pf->hw.pf_id == 0) {
15134		u16 valid_flags;
15135
15136		valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
15137		ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0,
15138						NULL);
15139		if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
15140			dev_info(&pf->pdev->dev,
15141				 "couldn't set switch config bits, err %pe aq_err %s\n",
15142				 ERR_PTR(ret),
15143				 i40e_aq_str(&pf->hw,
15144					     pf->hw.aq.asq_last_status));
15145			/* not a fatal problem, just keep going */
15146		}
15147		pf->last_sw_conf_valid_flags = valid_flags;
15148	}
15149
15150	/* first time setup */
15151	if (pf->lan_vsi == I40E_NO_VSI || reinit) {
15152		struct i40e_vsi *vsi = NULL;
15153		u16 uplink_seid;
15154
15155		/* Set up the PF VSI associated with the PF's main VSI
15156		 * that is already in the HW switch
15157		 */
15158		if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb])
15159			uplink_seid = pf->veb[pf->lan_veb]->seid;
15160		else
15161			uplink_seid = pf->mac_seid;
15162		if (pf->lan_vsi == I40E_NO_VSI)
15163			vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, uplink_seid, 0);
15164		else if (reinit)
15165			vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]);
15166		if (!vsi) {
15167			dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
15168			i40e_cloud_filter_exit(pf);
15169			i40e_fdir_teardown(pf);
15170			return -EAGAIN;
15171		}
15172	} else {
15173		/* force a reset of TC and queue layout configurations */
15174		u8 enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
15175
15176		pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
15177		pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
15178		i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
15179	}
15180	i40e_vlan_stripping_disable(pf->vsi[pf->lan_vsi]);
15181
15182	i40e_fdir_sb_setup(pf);
15183
15184	/* Setup static PF queue filter control settings */
15185	ret = i40e_setup_pf_filter_control(pf);
15186	if (ret) {
15187		dev_info(&pf->pdev->dev, "setup_pf_filter_control failed: %d\n",
15188			 ret);
15189		/* Failure here should not stop continuing other steps */
15190	}
15191
15192	/* enable RSS in the HW, even for only one queue, as the stack can use
15193	 * the hash
15194	 */
15195	if ((pf->flags & I40E_FLAG_RSS_ENABLED))
15196		i40e_pf_config_rss(pf);
15197
15198	/* fill in link information and enable LSE reporting */
15199	i40e_link_event(pf);
15200
15201	/* Initialize user-specific link properties */
15202	pf->fc_autoneg_status = ((pf->hw.phy.link_info.an_info &
15203				  I40E_AQ_AN_COMPLETED) ? true : false);
15204
15205	i40e_ptp_init(pf);
15206
15207	if (!lock_acquired)
15208		rtnl_lock();
15209
15210	/* repopulate tunnel port filters */
15211	udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev);
15212
15213	if (!lock_acquired)
15214		rtnl_unlock();
15215
15216	return ret;
15217}
15218
15219/**
15220 * i40e_determine_queue_usage - Work out queue distribution
15221 * @pf: board private structure
15222 **/
15223static void i40e_determine_queue_usage(struct i40e_pf *pf)
15224{
15225	int queues_left;
15226	int q_max;
15227
15228	pf->num_lan_qps = 0;
15229
15230	/* Find the max queues to be put into basic use.  We'll always be
15231	 * using TC0, whether or not DCB is running, and TC0 will get the
15232	 * big RSS set.
15233	 */
15234	queues_left = pf->hw.func_caps.num_tx_qp;
15235
15236	if ((queues_left == 1) ||
15237	    !(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
15238		/* one qp for PF, no queues for anything else */
15239		queues_left = 0;
15240		pf->alloc_rss_size = pf->num_lan_qps = 1;
15241
15242		/* make sure all the fancies are disabled */
15243		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
15244			       I40E_FLAG_IWARP_ENABLED	|
15245			       I40E_FLAG_FD_SB_ENABLED	|
15246			       I40E_FLAG_FD_ATR_ENABLED	|
15247			       I40E_FLAG_DCB_CAPABLE	|
15248			       I40E_FLAG_DCB_ENABLED	|
15249			       I40E_FLAG_SRIOV_ENABLED	|
15250			       I40E_FLAG_VMDQ_ENABLED);
15251		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
15252	} else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
15253				  I40E_FLAG_FD_SB_ENABLED |
15254				  I40E_FLAG_FD_ATR_ENABLED |
15255				  I40E_FLAG_DCB_CAPABLE))) {
15256		/* one qp for PF */
15257		pf->alloc_rss_size = pf->num_lan_qps = 1;
15258		queues_left -= pf->num_lan_qps;
15259
15260		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
15261			       I40E_FLAG_IWARP_ENABLED	|
15262			       I40E_FLAG_FD_SB_ENABLED	|
15263			       I40E_FLAG_FD_ATR_ENABLED	|
15264			       I40E_FLAG_DCB_ENABLED	|
15265			       I40E_FLAG_VMDQ_ENABLED);
15266		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
15267	} else {
15268		/* Not enough queues for all TCs */
15269		if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
15270		    (queues_left < I40E_MAX_TRAFFIC_CLASS)) {
15271			pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
15272					I40E_FLAG_DCB_ENABLED);
15273			dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
15274		}
15275
15276		/* limit lan qps to the smaller of qps, cpus or msix */
15277		q_max = max_t(int, pf->rss_size_max, num_online_cpus());
15278		q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp);
15279		q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors);
15280		pf->num_lan_qps = q_max;
15281
15282		queues_left -= pf->num_lan_qps;
15283	}
15284
15285	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
15286		if (queues_left > 1) {
15287			queues_left -= 1; /* save 1 queue for FD */
15288		} else {
15289			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
15290			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
15291			dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n");
15292		}
15293	}
15294
15295	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
15296	    pf->num_vf_qps && pf->num_req_vfs && queues_left) {
15297		pf->num_req_vfs = min_t(int, pf->num_req_vfs,
15298					(queues_left / pf->num_vf_qps));
15299		queues_left -= (pf->num_req_vfs * pf->num_vf_qps);
15300	}
15301
15302	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
15303	    pf->num_vmdq_vsis && pf->num_vmdq_qps && queues_left) {
15304		pf->num_vmdq_vsis = min_t(int, pf->num_vmdq_vsis,
15305					  (queues_left / pf->num_vmdq_qps));
15306		queues_left -= (pf->num_vmdq_vsis * pf->num_vmdq_qps);
15307	}
15308
15309	pf->queues_left = queues_left;
15310	dev_dbg(&pf->pdev->dev,
15311		"qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n",
15312		pf->hw.func_caps.num_tx_qp,
15313		!!(pf->flags & I40E_FLAG_FD_SB_ENABLED),
15314		pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs,
15315		pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps,
15316		queues_left);
15317}
15318
15319/**
15320 * i40e_setup_pf_filter_control - Setup PF static filter control
15321 * @pf: PF to be setup
15322 *
15323 * i40e_setup_pf_filter_control sets up a PF's initial filter control
15324 * settings. If PE/FCoE are enabled then it will also set the per PF
15325 * based filter sizes required for them. It also enables Flow director,
15326 * ethertype and macvlan type filter settings for the pf.
15327 *
15328 * Returns 0 on success, negative on failure
15329 **/
15330static int i40e_setup_pf_filter_control(struct i40e_pf *pf)
15331{
15332	struct i40e_filter_control_settings *settings = &pf->filter_settings;
15333
15334	settings->hash_lut_size = I40E_HASH_LUT_SIZE_128;
15335
15336	/* Flow Director is enabled */
15337	if (pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))
15338		settings->enable_fdir = true;
15339
15340	/* Ethtype and MACVLAN filters enabled for PF */
15341	settings->enable_ethtype = true;
15342	settings->enable_macvlan = true;
15343
15344	if (i40e_set_filter_control(&pf->hw, settings))
15345		return -ENOENT;
15346
15347	return 0;
15348}
15349
15350#define INFO_STRING_LEN 255
15351#define REMAIN(__x) (INFO_STRING_LEN - (__x))
15352static void i40e_print_features(struct i40e_pf *pf)
15353{
15354	struct i40e_hw *hw = &pf->hw;
15355	char *buf;
15356	int i;
15357
15358	buf = kmalloc(INFO_STRING_LEN, GFP_KERNEL);
15359	if (!buf)
15360		return;
15361
15362	i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id);
15363#ifdef CONFIG_PCI_IOV
15364	i += scnprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs);
15365#endif
15366	i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d",
15367		      pf->hw.func_caps.num_vsis,
15368		      pf->vsi[pf->lan_vsi]->num_queue_pairs);
15369	if (pf->flags & I40E_FLAG_RSS_ENABLED)
15370		i += scnprintf(&buf[i], REMAIN(i), " RSS");
15371	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
15372		i += scnprintf(&buf[i], REMAIN(i), " FD_ATR");
15373	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
15374		i += scnprintf(&buf[i], REMAIN(i), " FD_SB");
15375		i += scnprintf(&buf[i], REMAIN(i), " NTUPLE");
15376	}
15377	if (pf->flags & I40E_FLAG_DCB_CAPABLE)
15378		i += scnprintf(&buf[i], REMAIN(i), " DCB");
15379	i += scnprintf(&buf[i], REMAIN(i), " VxLAN");
15380	i += scnprintf(&buf[i], REMAIN(i), " Geneve");
15381	if (pf->flags & I40E_FLAG_PTP)
15382		i += scnprintf(&buf[i], REMAIN(i), " PTP");
15383	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
15384		i += scnprintf(&buf[i], REMAIN(i), " VEB");
15385	else
15386		i += scnprintf(&buf[i], REMAIN(i), " VEPA");
15387
15388	dev_info(&pf->pdev->dev, "%s\n", buf);
15389	kfree(buf);
15390	WARN_ON(i > INFO_STRING_LEN);
15391}
15392
15393/**
15394 * i40e_get_platform_mac_addr - get platform-specific MAC address
15395 * @pdev: PCI device information struct
15396 * @pf: board private structure
15397 *
15398 * Look up the MAC address for the device. First we'll try
15399 * eth_platform_get_mac_address, which will check Open Firmware, or arch
15400 * specific fallback. Otherwise, we'll default to the stored value in
15401 * firmware.
15402 **/
15403static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
15404{
15405	if (eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr))
15406		i40e_get_mac_addr(&pf->hw, pf->hw.mac.addr);
15407}
15408
15409/**
15410 * i40e_set_fec_in_flags - helper function for setting FEC options in flags
15411 * @fec_cfg: FEC option to set in flags
15412 * @flags: ptr to flags in which we set FEC option
15413 **/
15414void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags)
15415{
15416	if (fec_cfg & I40E_AQ_SET_FEC_AUTO)
15417		*flags |= I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC;
15418	if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) ||
15419	    (fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) {
15420		*flags |= I40E_FLAG_RS_FEC;
15421		*flags &= ~I40E_FLAG_BASE_R_FEC;
15422	}
15423	if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) ||
15424	    (fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) {
15425		*flags |= I40E_FLAG_BASE_R_FEC;
15426		*flags &= ~I40E_FLAG_RS_FEC;
15427	}
15428	if (fec_cfg == 0)
15429		*flags &= ~(I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC);
15430}
15431
15432/**
15433 * i40e_check_recovery_mode - check if we are running transition firmware
15434 * @pf: board private structure
15435 *
15436 * Check registers indicating the firmware runs in recovery mode. Sets the
15437 * appropriate driver state.
15438 *
15439 * Returns true if the recovery mode was detected, false otherwise
15440 **/
15441static bool i40e_check_recovery_mode(struct i40e_pf *pf)
15442{
15443	u32 val = rd32(&pf->hw, I40E_GL_FWSTS);
15444
15445	if (val & I40E_GL_FWSTS_FWS1B_MASK) {
15446		dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n");
15447		dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
15448		set_bit(__I40E_RECOVERY_MODE, pf->state);
15449
15450		return true;
15451	}
15452	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
15453		dev_info(&pf->pdev->dev, "Please do Power-On Reset to initialize adapter in normal mode with full functionality.\n");
15454
15455	return false;
15456}
15457
15458/**
15459 * i40e_pf_loop_reset - perform reset in a loop.
15460 * @pf: board private structure
15461 *
15462 * This function is useful when a NIC is about to enter recovery mode.
15463 * When a NIC's internal data structures are corrupted the NIC's
15464 * firmware is going to enter recovery mode.
15465 * Right after a POR it takes about 7 minutes for firmware to enter
15466 * recovery mode. Until that time a NIC is in some kind of intermediate
15467 * state. After that time period the NIC almost surely enters
15468 * recovery mode. The only way for a driver to detect intermediate
15469 * state is to issue a series of pf-resets and check a return value.
15470 * If a PF reset returns success then the firmware could be in recovery
15471 * mode so the caller of this code needs to check for recovery mode
15472 * if this function returns success. There is a little chance that
15473 * firmware will hang in intermediate state forever.
15474 * Since waiting 7 minutes is quite a lot of time this function waits
15475 * 10 seconds and then gives up by returning an error.
15476 *
15477 * Return 0 on success, negative on failure.
15478 **/
15479static int i40e_pf_loop_reset(struct i40e_pf *pf)
15480{
15481	/* wait max 10 seconds for PF reset to succeed */
15482	const unsigned long time_end = jiffies + 10 * HZ;
15483	struct i40e_hw *hw = &pf->hw;
15484	int ret;
15485
15486	ret = i40e_pf_reset(hw);
15487	while (ret != 0 && time_before(jiffies, time_end)) {
15488		usleep_range(10000, 20000);
15489		ret = i40e_pf_reset(hw);
15490	}
15491
15492	if (ret == 0)
15493		pf->pfr_count++;
15494	else
15495		dev_info(&pf->pdev->dev, "PF reset failed: %d\n", ret);
15496
15497	return ret;
15498}
15499
15500/**
15501 * i40e_check_fw_empr - check if FW issued unexpected EMP Reset
15502 * @pf: board private structure
15503 *
15504 * Check FW registers to determine if FW issued unexpected EMP Reset.
15505 * Every time when unexpected EMP Reset occurs the FW increments
15506 * a counter of unexpected EMP Resets. When the counter reaches 10
15507 * the FW should enter the Recovery mode
15508 *
15509 * Returns true if FW issued unexpected EMP Reset
15510 **/
15511static bool i40e_check_fw_empr(struct i40e_pf *pf)
15512{
15513	const u32 fw_sts = rd32(&pf->hw, I40E_GL_FWSTS) &
15514			   I40E_GL_FWSTS_FWS1B_MASK;
15515	return (fw_sts > I40E_GL_FWSTS_FWS1B_EMPR_0) &&
15516	       (fw_sts <= I40E_GL_FWSTS_FWS1B_EMPR_10);
15517}
15518
15519/**
15520 * i40e_handle_resets - handle EMP resets and PF resets
15521 * @pf: board private structure
15522 *
15523 * Handle both EMP resets and PF resets and conclude whether there are
15524 * any issues regarding these resets. If there are any issues then
15525 * generate log entry.
15526 *
15527 * Return 0 if NIC is healthy or negative value when there are issues
15528 * with resets
15529 **/
15530static int i40e_handle_resets(struct i40e_pf *pf)
15531{
15532	const int pfr = i40e_pf_loop_reset(pf);
15533	const bool is_empr = i40e_check_fw_empr(pf);
15534
15535	if (is_empr || pfr != 0)
15536		dev_crit(&pf->pdev->dev, "Entering recovery mode due to repeated FW resets. This may take several minutes. Refer to the Intel(R) Ethernet Adapters and Devices User Guide.\n");
15537
15538	return is_empr ? -EIO : pfr;
15539}
15540
15541/**
15542 * i40e_init_recovery_mode - initialize subsystems needed in recovery mode
15543 * @pf: board private structure
15544 * @hw: ptr to the hardware info
15545 *
15546 * This function does a minimal setup of all subsystems needed for running
15547 * recovery mode.
15548 *
15549 * Returns 0 on success, negative on failure
15550 **/
15551static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
15552{
15553	struct i40e_vsi *vsi;
15554	int err;
15555	int v_idx;
15556
15557	pci_set_drvdata(pf->pdev, pf);
15558	pci_save_state(pf->pdev);
15559
15560	/* set up periodic task facility */
15561	timer_setup(&pf->service_timer, i40e_service_timer, 0);
15562	pf->service_timer_period = HZ;
15563
15564	INIT_WORK(&pf->service_task, i40e_service_task);
15565	clear_bit(__I40E_SERVICE_SCHED, pf->state);
15566
15567	err = i40e_init_interrupt_scheme(pf);
15568	if (err)
15569		goto err_switch_setup;
15570
15571	/* The number of VSIs reported by the FW is the minimum guaranteed
15572	 * to us; HW supports far more and we share the remaining pool with
15573	 * the other PFs. We allocate space for more than the guarantee with
15574	 * the understanding that we might not get them all later.
15575	 */
15576	if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC)
15577		pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
15578	else
15579		pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
15580
15581	/* Set up the vsi struct and our local tracking of the MAIN PF vsi. */
15582	pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
15583			  GFP_KERNEL);
15584	if (!pf->vsi) {
15585		err = -ENOMEM;
15586		goto err_switch_setup;
15587	}
15588
15589	/* We allocate one VSI which is needed as absolute minimum
15590	 * in order to register the netdev
15591	 */
15592	v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN);
15593	if (v_idx < 0) {
15594		err = v_idx;
15595		goto err_switch_setup;
15596	}
15597	pf->lan_vsi = v_idx;
15598	vsi = pf->vsi[v_idx];
15599	if (!vsi) {
15600		err = -EFAULT;
15601		goto err_switch_setup;
15602	}
15603	vsi->alloc_queue_pairs = 1;
15604	err = i40e_config_netdev(vsi);
15605	if (err)
15606		goto err_switch_setup;
15607	err = register_netdev(vsi->netdev);
15608	if (err)
15609		goto err_switch_setup;
15610	vsi->netdev_registered = true;
15611	i40e_dbg_pf_init(pf);
15612
15613	err = i40e_setup_misc_vector_for_recovery_mode(pf);
15614	if (err)
15615		goto err_switch_setup;
15616
15617	/* tell the firmware that we're starting */
15618	i40e_send_version(pf);
15619
15620	/* since everything's happy, start the service_task timer */
15621	mod_timer(&pf->service_timer,
15622		  round_jiffies(jiffies + pf->service_timer_period));
15623
15624	return 0;
15625
15626err_switch_setup:
15627	i40e_reset_interrupt_capability(pf);
15628	timer_shutdown_sync(&pf->service_timer);
15629	i40e_shutdown_adminq(hw);
15630	iounmap(hw->hw_addr);
15631	pci_release_mem_regions(pf->pdev);
15632	pci_disable_device(pf->pdev);
15633	kfree(pf);
15634
15635	return err;
15636}
15637
15638/**
15639 * i40e_set_subsystem_device_id - set subsystem device id
15640 * @hw: pointer to the hardware info
15641 *
15642 * Set PCI subsystem device id either from a pci_dev structure or
15643 * a specific FW register.
15644 **/
15645static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw)
15646{
15647	struct pci_dev *pdev = ((struct i40e_pf *)hw->back)->pdev;
15648
15649	hw->subsystem_device_id = pdev->subsystem_device ?
15650		pdev->subsystem_device :
15651		(ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX);
15652}
15653
15654/**
15655 * i40e_probe - Device initialization routine
15656 * @pdev: PCI device information struct
15657 * @ent: entry in i40e_pci_tbl
15658 *
15659 * i40e_probe initializes a PF identified by a pci_dev structure.
15660 * The OS initialization, configuring of the PF private structure,
15661 * and a hardware reset occur.
15662 *
15663 * Returns 0 on success, negative on failure
15664 **/
15665static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
15666{
15667	struct i40e_aq_get_phy_abilities_resp abilities;
15668#ifdef CONFIG_I40E_DCB
15669	enum i40e_get_fw_lldp_status_resp lldp_status;
15670#endif /* CONFIG_I40E_DCB */
15671	struct i40e_pf *pf;
15672	struct i40e_hw *hw;
15673	static u16 pfs_found;
15674	u16 wol_nvm_bits;
15675	u16 link_status;
15676#ifdef CONFIG_I40E_DCB
15677	int status;
15678#endif /* CONFIG_I40E_DCB */
15679	int err;
15680	u32 val;
15681	u32 i;
15682
15683	err = pci_enable_device_mem(pdev);
15684	if (err)
15685		return err;
15686
15687	/* set up for high or low dma */
15688	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
15689	if (err) {
15690		dev_err(&pdev->dev,
15691			"DMA configuration failed: 0x%x\n", err);
15692		goto err_dma;
15693	}
15694
15695	/* set up pci connections */
15696	err = pci_request_mem_regions(pdev, i40e_driver_name);
15697	if (err) {
15698		dev_info(&pdev->dev,
15699			 "pci_request_selected_regions failed %d\n", err);
15700		goto err_pci_reg;
15701	}
15702
15703	pci_set_master(pdev);
15704
15705	/* Now that we have a PCI connection, we need to do the
15706	 * low level device setup.  This is primarily setting up
15707	 * the Admin Queue structures and then querying for the
15708	 * device's current profile information.
15709	 */
15710	pf = kzalloc(sizeof(*pf), GFP_KERNEL);
15711	if (!pf) {
15712		err = -ENOMEM;
15713		goto err_pf_alloc;
15714	}
15715	pf->next_vsi = 0;
15716	pf->pdev = pdev;
15717	set_bit(__I40E_DOWN, pf->state);
15718
15719	hw = &pf->hw;
15720	hw->back = pf;
15721
15722	pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
15723				I40E_MAX_CSR_SPACE);
15724	/* We believe that the highest register to read is
15725	 * I40E_GLGEN_STAT_CLEAR, so we check if the BAR size
15726	 * is not less than that before mapping to prevent a
15727	 * kernel panic.
15728	 */
15729	if (pf->ioremap_len < I40E_GLGEN_STAT_CLEAR) {
15730		dev_err(&pdev->dev, "Cannot map registers, bar size 0x%X too small, aborting\n",
15731			pf->ioremap_len);
15732		err = -ENOMEM;
15733		goto err_ioremap;
15734	}
15735	hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len);
15736	if (!hw->hw_addr) {
15737		err = -EIO;
15738		dev_info(&pdev->dev, "ioremap(0x%04x, 0x%04x) failed: 0x%x\n",
15739			 (unsigned int)pci_resource_start(pdev, 0),
15740			 pf->ioremap_len, err);
15741		goto err_ioremap;
15742	}
15743	hw->vendor_id = pdev->vendor;
15744	hw->device_id = pdev->device;
15745	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
15746	hw->subsystem_vendor_id = pdev->subsystem_vendor;
15747	i40e_set_subsystem_device_id(hw);
15748	hw->bus.device = PCI_SLOT(pdev->devfn);
15749	hw->bus.func = PCI_FUNC(pdev->devfn);
15750	hw->bus.bus_id = pdev->bus->number;
15751	pf->instance = pfs_found;
15752
15753	/* Select something other than the 802.1ad ethertype for the
15754	 * switch to use internally and drop on ingress.
15755	 */
15756	hw->switch_tag = 0xffff;
15757	hw->first_tag = ETH_P_8021AD;
15758	hw->second_tag = ETH_P_8021Q;
15759
15760	INIT_LIST_HEAD(&pf->l3_flex_pit_list);
15761	INIT_LIST_HEAD(&pf->l4_flex_pit_list);
15762	INIT_LIST_HEAD(&pf->ddp_old_prof);
15763
15764	/* set up the locks for the AQ, do this only once in probe
15765	 * and destroy them only once in remove
15766	 */
15767	mutex_init(&hw->aq.asq_mutex);
15768	mutex_init(&hw->aq.arq_mutex);
15769
15770	pf->msg_enable = netif_msg_init(debug,
15771					NETIF_MSG_DRV |
15772					NETIF_MSG_PROBE |
15773					NETIF_MSG_LINK);
15774	if (debug < -1)
15775		pf->hw.debug_mask = debug;
15776
15777	/* do a special CORER for clearing PXE mode once at init */
15778	if (hw->revision_id == 0 &&
15779	    (rd32(hw, I40E_GLLAN_RCTL_0) & I40E_GLLAN_RCTL_0_PXE_MODE_MASK)) {
15780		wr32(hw, I40E_GLGEN_RTRIG, I40E_GLGEN_RTRIG_CORER_MASK);
15781		i40e_flush(hw);
15782		msleep(200);
15783		pf->corer_count++;
15784
15785		i40e_clear_pxe_mode(hw);
15786	}
15787
15788	/* Reset here to make sure all is clean and to define PF 'n' */
15789	i40e_clear_hw(hw);
15790
15791	err = i40e_set_mac_type(hw);
15792	if (err) {
15793		dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n",
15794			 err);
15795		goto err_pf_reset;
15796	}
15797
15798	err = i40e_handle_resets(pf);
15799	if (err)
15800		goto err_pf_reset;
15801
15802	i40e_check_recovery_mode(pf);
15803
15804	if (is_kdump_kernel()) {
15805		hw->aq.num_arq_entries = I40E_MIN_ARQ_LEN;
15806		hw->aq.num_asq_entries = I40E_MIN_ASQ_LEN;
15807	} else {
15808		hw->aq.num_arq_entries = I40E_AQ_LEN;
15809		hw->aq.num_asq_entries = I40E_AQ_LEN;
15810	}
15811	hw->aq.arq_buf_size = I40E_MAX_AQ_BUF_SIZE;
15812	hw->aq.asq_buf_size = I40E_MAX_AQ_BUF_SIZE;
15813	pf->adminq_work_limit = I40E_AQ_WORK_LIMIT;
15814
15815	snprintf(pf->int_name, sizeof(pf->int_name) - 1,
15816		 "%s-%s:misc",
15817		 dev_driver_string(&pf->pdev->dev), dev_name(&pdev->dev));
15818
15819	err = i40e_init_shared_code(hw);
15820	if (err) {
15821		dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n",
15822			 err);
15823		goto err_pf_reset;
15824	}
15825
15826	/* set up a default setting for link flow control */
15827	pf->hw.fc.requested_mode = I40E_FC_NONE;
15828
15829	err = i40e_init_adminq(hw);
15830	if (err) {
15831		if (err == -EIO)
15832			dev_info(&pdev->dev,
15833				 "The driver for the device stopped because the NVM image v%u.%u is newer than expected v%u.%u. You must install the most recent version of the network driver.\n",
15834				 hw->aq.api_maj_ver,
15835				 hw->aq.api_min_ver,
15836				 I40E_FW_API_VERSION_MAJOR,
15837				 I40E_FW_MINOR_VERSION(hw));
15838		else
15839			dev_info(&pdev->dev,
15840				 "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n");
15841
15842		goto err_pf_reset;
15843	}
15844	i40e_get_oem_version(hw);
15845
15846	/* provide nvm, fw, api versions, vendor:device id, subsys vendor:device id */
15847	dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s [%04x:%04x] [%04x:%04x]\n",
15848		 hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build,
15849		 hw->aq.api_maj_ver, hw->aq.api_min_ver,
15850		 i40e_nvm_version_str(hw), hw->vendor_id, hw->device_id,
15851		 hw->subsystem_vendor_id, hw->subsystem_device_id);
15852
15853	if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
15854	    hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
15855		dev_dbg(&pdev->dev,
15856			"The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n",
15857			 hw->aq.api_maj_ver,
15858			 hw->aq.api_min_ver,
15859			 I40E_FW_API_VERSION_MAJOR,
15860			 I40E_FW_MINOR_VERSION(hw));
15861	else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
15862		dev_info(&pdev->dev,
15863			 "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n",
15864			 hw->aq.api_maj_ver,
15865			 hw->aq.api_min_ver,
15866			 I40E_FW_API_VERSION_MAJOR,
15867			 I40E_FW_MINOR_VERSION(hw));
15868
15869	i40e_verify_eeprom(pf);
15870
15871	/* Rev 0 hardware was never productized */
15872	if (hw->revision_id < 1)
15873		dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n");
15874
15875	i40e_clear_pxe_mode(hw);
15876
15877	err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
15878	if (err)
15879		goto err_adminq_setup;
15880
15881	err = i40e_sw_init(pf);
15882	if (err) {
15883		dev_info(&pdev->dev, "sw_init failed: %d\n", err);
15884		goto err_sw_init;
15885	}
15886
15887	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
15888		return i40e_init_recovery_mode(pf, hw);
15889
15890	err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
15891				hw->func_caps.num_rx_qp, 0, 0);
15892	if (err) {
15893		dev_info(&pdev->dev, "init_lan_hmc failed: %d\n", err);
15894		goto err_init_lan_hmc;
15895	}
15896
15897	err = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY);
15898	if (err) {
15899		dev_info(&pdev->dev, "configure_lan_hmc failed: %d\n", err);
15900		err = -ENOENT;
15901		goto err_configure_lan_hmc;
15902	}
15903
15904	/* Disable LLDP for NICs that have firmware versions lower than v4.3.
15905	 * Ignore error return codes because if it was already disabled via
15906	 * hardware settings this will fail
15907	 */
15908	if (pf->hw_features & I40E_HW_STOP_FW_LLDP) {
15909		dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n");
15910		i40e_aq_stop_lldp(hw, true, false, NULL);
15911	}
15912
15913	/* allow a platform config to override the HW addr */
15914	i40e_get_platform_mac_addr(pdev, pf);
15915
15916	if (!is_valid_ether_addr(hw->mac.addr)) {
15917		dev_info(&pdev->dev, "invalid MAC address %pM\n", hw->mac.addr);
15918		err = -EIO;
15919		goto err_mac_addr;
15920	}
15921	dev_info(&pdev->dev, "MAC address: %pM\n", hw->mac.addr);
15922	ether_addr_copy(hw->mac.perm_addr, hw->mac.addr);
15923	i40e_get_port_mac_addr(hw, hw->mac.port_addr);
15924	if (is_valid_ether_addr(hw->mac.port_addr))
15925		pf->hw_features |= I40E_HW_PORT_ID_VALID;
15926
15927	i40e_ptp_alloc_pins(pf);
15928	pci_set_drvdata(pdev, pf);
15929	pci_save_state(pdev);
15930
15931#ifdef CONFIG_I40E_DCB
15932	status = i40e_get_fw_lldp_status(&pf->hw, &lldp_status);
15933	(!status &&
15934	 lldp_status == I40E_GET_FW_LLDP_STATUS_ENABLED) ?
15935		(pf->flags &= ~I40E_FLAG_DISABLE_FW_LLDP) :
15936		(pf->flags |= I40E_FLAG_DISABLE_FW_LLDP);
15937	dev_info(&pdev->dev,
15938		 (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ?
15939			"FW LLDP is disabled\n" :
15940			"FW LLDP is enabled\n");
15941
15942	/* Enable FW to write default DCB config on link-up */
15943	i40e_aq_set_dcb_parameters(hw, true, NULL);
15944
15945	err = i40e_init_pf_dcb(pf);
15946	if (err) {
15947		dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err);
15948		pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | I40E_FLAG_DCB_ENABLED);
15949		/* Continue without DCB enabled */
15950	}
15951#endif /* CONFIG_I40E_DCB */
15952
15953	/* set up periodic task facility */
15954	timer_setup(&pf->service_timer, i40e_service_timer, 0);
15955	pf->service_timer_period = HZ;
15956
15957	INIT_WORK(&pf->service_task, i40e_service_task);
15958	clear_bit(__I40E_SERVICE_SCHED, pf->state);
15959
15960	/* NVM bit on means WoL disabled for the port */
15961	i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits);
15962	if (BIT (hw->port) & wol_nvm_bits || hw->partition_id != 1)
15963		pf->wol_en = false;
15964	else
15965		pf->wol_en = true;
15966	device_set_wakeup_enable(&pf->pdev->dev, pf->wol_en);
15967
15968	/* set up the main switch operations */
15969	i40e_determine_queue_usage(pf);
15970	err = i40e_init_interrupt_scheme(pf);
15971	if (err)
15972		goto err_switch_setup;
15973
15974	/* Reduce Tx and Rx pairs for kdump
15975	 * When MSI-X is enabled, it's not allowed to use more TC queue
15976	 * pairs than MSI-X vectors (pf->num_lan_msix) exist. Thus
15977	 * vsi->num_queue_pairs will be equal to pf->num_lan_msix, i.e., 1.
15978	 */
15979	if (is_kdump_kernel())
15980		pf->num_lan_msix = 1;
15981
15982	pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port;
15983	pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port;
15984	pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
15985	pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared;
15986	pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS;
15987	pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN |
15988						    UDP_TUNNEL_TYPE_GENEVE;
15989
15990	/* The number of VSIs reported by the FW is the minimum guaranteed
15991	 * to us; HW supports far more and we share the remaining pool with
15992	 * the other PFs. We allocate space for more than the guarantee with
15993	 * the understanding that we might not get them all later.
15994	 */
15995	if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC)
15996		pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
15997	else
15998		pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
15999	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
16000		dev_warn(&pf->pdev->dev,
16001			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
16002			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
16003		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
16004	}
16005
16006	/* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */
16007	pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
16008			  GFP_KERNEL);
16009	if (!pf->vsi) {
16010		err = -ENOMEM;
16011		goto err_switch_setup;
16012	}
16013
16014#ifdef CONFIG_PCI_IOV
16015	/* prep for VF support */
16016	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
16017	    (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
16018	    !test_bit(__I40E_BAD_EEPROM, pf->state)) {
16019		if (pci_num_vf(pdev))
16020			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
16021	}
16022#endif
16023	err = i40e_setup_pf_switch(pf, false, false);
16024	if (err) {
16025		dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
16026		goto err_vsis;
16027	}
16028	INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list);
16029
16030	/* if FDIR VSI was set up, start it now */
16031	for (i = 0; i < pf->num_alloc_vsi; i++) {
16032		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
16033			i40e_vsi_open(pf->vsi[i]);
16034			break;
16035		}
16036	}
16037
16038	/* The driver only wants link up/down and module qualification
16039	 * reports from firmware.  Note the negative logic.
16040	 */
16041	err = i40e_aq_set_phy_int_mask(&pf->hw,
16042				       ~(I40E_AQ_EVENT_LINK_UPDOWN |
16043					 I40E_AQ_EVENT_MEDIA_NA |
16044					 I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
16045	if (err)
16046		dev_info(&pf->pdev->dev, "set phy mask fail, err %pe aq_err %s\n",
16047			 ERR_PTR(err),
16048			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
16049
16050	/* Reconfigure hardware for allowing smaller MSS in the case
16051	 * of TSO, so that we avoid the MDD being fired and causing
16052	 * a reset in the case of small MSS+TSO.
16053	 */
16054	val = rd32(hw, I40E_REG_MSS);
16055	if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) {
16056		val &= ~I40E_REG_MSS_MIN_MASK;
16057		val |= I40E_64BYTE_MSS;
16058		wr32(hw, I40E_REG_MSS, val);
16059	}
16060
16061	if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
16062		msleep(75);
16063		err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
16064		if (err)
16065			dev_info(&pf->pdev->dev, "link restart failed, err %pe aq_err %s\n",
16066				 ERR_PTR(err),
16067				 i40e_aq_str(&pf->hw,
16068					     pf->hw.aq.asq_last_status));
16069	}
16070	/* The main driver is (mostly) up and happy. We need to set this state
16071	 * before setting up the misc vector or we get a race and the vector
16072	 * ends up disabled forever.
16073	 */
16074	clear_bit(__I40E_DOWN, pf->state);
16075
16076	/* In case of MSIX we are going to setup the misc vector right here
16077	 * to handle admin queue events etc. In case of legacy and MSI
16078	 * the misc functionality and queue processing is combined in
16079	 * the same vector and that gets setup at open.
16080	 */
16081	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
16082		err = i40e_setup_misc_vector(pf);
16083		if (err) {
16084			dev_info(&pdev->dev,
16085				 "setup of misc vector failed: %d\n", err);
16086			i40e_cloud_filter_exit(pf);
16087			i40e_fdir_teardown(pf);
16088			goto err_vsis;
16089		}
16090	}
16091
16092#ifdef CONFIG_PCI_IOV
16093	/* prep for VF support */
16094	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
16095	    (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
16096	    !test_bit(__I40E_BAD_EEPROM, pf->state)) {
16097		/* disable link interrupts for VFs */
16098		val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM);
16099		val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK;
16100		wr32(hw, I40E_PFGEN_PORTMDIO_NUM, val);
16101		i40e_flush(hw);
16102
16103		if (pci_num_vf(pdev)) {
16104			dev_info(&pdev->dev,
16105				 "Active VFs found, allocating resources.\n");
16106			err = i40e_alloc_vfs(pf, pci_num_vf(pdev));
16107			if (err)
16108				dev_info(&pdev->dev,
16109					 "Error %d allocating resources for existing VFs\n",
16110					 err);
16111		}
16112	}
16113#endif /* CONFIG_PCI_IOV */
16114
16115	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
16116		pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile,
16117						      pf->num_iwarp_msix,
16118						      I40E_IWARP_IRQ_PILE_ID);
16119		if (pf->iwarp_base_vector < 0) {
16120			dev_info(&pdev->dev,
16121				 "failed to get tracking for %d vectors for IWARP err=%d\n",
16122				 pf->num_iwarp_msix, pf->iwarp_base_vector);
16123			pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
16124		}
16125	}
16126
16127	i40e_dbg_pf_init(pf);
16128
16129	/* tell the firmware that we're starting */
16130	i40e_send_version(pf);
16131
16132	/* since everything's happy, start the service_task timer */
16133	mod_timer(&pf->service_timer,
16134		  round_jiffies(jiffies + pf->service_timer_period));
16135
16136	/* add this PF to client device list and launch a client service task */
16137	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
16138		err = i40e_lan_add_device(pf);
16139		if (err)
16140			dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
16141				 err);
16142	}
16143
16144#define PCI_SPEED_SIZE 8
16145#define PCI_WIDTH_SIZE 8
16146	/* Devices on the IOSF bus do not have this information
16147	 * and will report PCI Gen 1 x 1 by default so don't bother
16148	 * checking them.
16149	 */
16150	if (!(pf->hw_features & I40E_HW_NO_PCI_LINK_CHECK)) {
16151		char speed[PCI_SPEED_SIZE] = "Unknown";
16152		char width[PCI_WIDTH_SIZE] = "Unknown";
16153
16154		/* Get the negotiated link width and speed from PCI config
16155		 * space
16156		 */
16157		pcie_capability_read_word(pf->pdev, PCI_EXP_LNKSTA,
16158					  &link_status);
16159
16160		i40e_set_pci_config_data(hw, link_status);
16161
16162		switch (hw->bus.speed) {
16163		case i40e_bus_speed_8000:
16164			strscpy(speed, "8.0", PCI_SPEED_SIZE); break;
16165		case i40e_bus_speed_5000:
16166			strscpy(speed, "5.0", PCI_SPEED_SIZE); break;
16167		case i40e_bus_speed_2500:
16168			strscpy(speed, "2.5", PCI_SPEED_SIZE); break;
16169		default:
16170			break;
16171		}
16172		switch (hw->bus.width) {
16173		case i40e_bus_width_pcie_x8:
16174			strscpy(width, "8", PCI_WIDTH_SIZE); break;
16175		case i40e_bus_width_pcie_x4:
16176			strscpy(width, "4", PCI_WIDTH_SIZE); break;
16177		case i40e_bus_width_pcie_x2:
16178			strscpy(width, "2", PCI_WIDTH_SIZE); break;
16179		case i40e_bus_width_pcie_x1:
16180			strscpy(width, "1", PCI_WIDTH_SIZE); break;
16181		default:
16182			break;
16183		}
16184
16185		dev_info(&pdev->dev, "PCI-Express: Speed %sGT/s Width x%s\n",
16186			 speed, width);
16187
16188		if (hw->bus.width < i40e_bus_width_pcie_x8 ||
16189		    hw->bus.speed < i40e_bus_speed_8000) {
16190			dev_warn(&pdev->dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
16191			dev_warn(&pdev->dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
16192		}
16193	}
16194
16195	/* get the requested speeds from the fw */
16196	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL);
16197	if (err)
16198		dev_dbg(&pf->pdev->dev, "get requested speeds ret =  %pe last_status =  %s\n",
16199			ERR_PTR(err),
16200			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
16201	pf->hw.phy.link_info.requested_speeds = abilities.link_speed;
16202
16203	/* set the FEC config due to the board capabilities */
16204	i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, &pf->flags);
16205
16206	/* get the supported phy types from the fw */
16207	err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL);
16208	if (err)
16209		dev_dbg(&pf->pdev->dev, "get supported phy types ret =  %pe last_status =  %s\n",
16210			ERR_PTR(err),
16211			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
16212
16213	/* make sure the MFS hasn't been set lower than the default */
16214#define MAX_FRAME_SIZE_DEFAULT 0x2600
16215	val = (rd32(&pf->hw, I40E_PRTGL_SAH) &
16216	       I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT;
16217	if (val < MAX_FRAME_SIZE_DEFAULT)
16218		dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n",
16219			 pf->hw.port, val);
16220
16221	/* Add a filter to drop all Flow control frames from any VSI from being
16222	 * transmitted. By doing so we stop a malicious VF from sending out
16223	 * PAUSE or PFC frames and potentially controlling traffic for other
16224	 * PF/VF VSIs.
16225	 * The FW can still send Flow control frames if enabled.
16226	 */
16227	i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
16228						       pf->main_vsi_seid);
16229
16230	if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
16231		(pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
16232		pf->hw_features |= I40E_HW_PHY_CONTROLS_LEDS;
16233	if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722)
16234		pf->hw_features |= I40E_HW_HAVE_CRT_RETIMER;
16235	/* print a string summarizing features */
16236	i40e_print_features(pf);
16237
16238	return 0;
16239
16240	/* Unwind what we've done if something failed in the setup */
16241err_vsis:
16242	set_bit(__I40E_DOWN, pf->state);
16243	i40e_clear_interrupt_scheme(pf);
16244	kfree(pf->vsi);
16245err_switch_setup:
16246	i40e_reset_interrupt_capability(pf);
16247	timer_shutdown_sync(&pf->service_timer);
16248err_mac_addr:
16249err_configure_lan_hmc:
16250	(void)i40e_shutdown_lan_hmc(hw);
16251err_init_lan_hmc:
16252	kfree(pf->qp_pile);
16253err_sw_init:
16254err_adminq_setup:
16255err_pf_reset:
16256	iounmap(hw->hw_addr);
16257err_ioremap:
16258	kfree(pf);
16259err_pf_alloc:
16260	pci_release_mem_regions(pdev);
16261err_pci_reg:
16262err_dma:
16263	pci_disable_device(pdev);
16264	return err;
16265}
16266
16267/**
16268 * i40e_remove - Device removal routine
16269 * @pdev: PCI device information struct
16270 *
16271 * i40e_remove is called by the PCI subsystem to alert the driver
16272 * that is should release a PCI device.  This could be caused by a
16273 * Hot-Plug event, or because the driver is going to be removed from
16274 * memory.
16275 **/
16276static void i40e_remove(struct pci_dev *pdev)
16277{
16278	struct i40e_pf *pf = pci_get_drvdata(pdev);
16279	struct i40e_hw *hw = &pf->hw;
16280	int ret_code;
16281	int i;
16282
16283	i40e_dbg_pf_exit(pf);
16284
16285	i40e_ptp_stop(pf);
16286
16287	/* Disable RSS in hw */
16288	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0);
16289	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0);
16290
16291	/* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE
16292	 * flags, once they are set, i40e_rebuild should not be called as
16293	 * i40e_prep_for_reset always returns early.
16294	 */
16295	while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
16296		usleep_range(1000, 2000);
16297	set_bit(__I40E_IN_REMOVE, pf->state);
16298
16299	if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
16300		set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
16301		i40e_free_vfs(pf);
16302		pf->flags &= ~I40E_FLAG_SRIOV_ENABLED;
16303	}
16304	/* no more scheduling of any task */
16305	set_bit(__I40E_SUSPENDED, pf->state);
16306	set_bit(__I40E_DOWN, pf->state);
16307	if (pf->service_timer.function)
16308		timer_shutdown_sync(&pf->service_timer);
16309	if (pf->service_task.func)
16310		cancel_work_sync(&pf->service_task);
16311
16312	if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
16313		struct i40e_vsi *vsi = pf->vsi[0];
16314
16315		/* We know that we have allocated only one vsi for this PF,
16316		 * it was just for registering netdevice, so the interface
16317		 * could be visible in the 'ifconfig' output
16318		 */
16319		unregister_netdev(vsi->netdev);
16320		free_netdev(vsi->netdev);
16321
16322		goto unmap;
16323	}
16324
16325	/* Client close must be called explicitly here because the timer
16326	 * has been stopped.
16327	 */
16328	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
16329
16330	i40e_fdir_teardown(pf);
16331
16332	/* If there is a switch structure or any orphans, remove them.
16333	 * This will leave only the PF's VSI remaining.
16334	 */
16335	for (i = 0; i < I40E_MAX_VEB; i++) {
16336		if (!pf->veb[i])
16337			continue;
16338
16339		if (pf->veb[i]->uplink_seid == pf->mac_seid ||
16340		    pf->veb[i]->uplink_seid == 0)
16341			i40e_switch_branch_release(pf->veb[i]);
16342	}
16343
16344	/* Now we can shutdown the PF's VSIs, just before we kill
16345	 * adminq and hmc.
16346	 */
16347	for (i = pf->num_alloc_vsi; i--;)
16348		if (pf->vsi[i]) {
16349			i40e_vsi_close(pf->vsi[i]);
16350			i40e_vsi_release(pf->vsi[i]);
16351			pf->vsi[i] = NULL;
16352		}
16353
16354	i40e_cloud_filter_exit(pf);
16355
16356	/* remove attached clients */
16357	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
16358		ret_code = i40e_lan_del_device(pf);
16359		if (ret_code)
16360			dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
16361				 ret_code);
16362	}
16363
16364	/* shutdown and destroy the HMC */
16365	if (hw->hmc.hmc_obj) {
16366		ret_code = i40e_shutdown_lan_hmc(hw);
16367		if (ret_code)
16368			dev_warn(&pdev->dev,
16369				 "Failed to destroy the HMC resources: %d\n",
16370				 ret_code);
16371	}
16372
16373unmap:
16374	/* Free MSI/legacy interrupt 0 when in recovery mode. */
16375	if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
16376	    !(pf->flags & I40E_FLAG_MSIX_ENABLED))
16377		free_irq(pf->pdev->irq, pf);
16378
16379	/* shutdown the adminq */
16380	i40e_shutdown_adminq(hw);
16381
16382	/* destroy the locks only once, here */
16383	mutex_destroy(&hw->aq.arq_mutex);
16384	mutex_destroy(&hw->aq.asq_mutex);
16385
16386	/* Clear all dynamic memory lists of rings, q_vectors, and VSIs */
16387	rtnl_lock();
16388	i40e_clear_interrupt_scheme(pf);
16389	for (i = 0; i < pf->num_alloc_vsi; i++) {
16390		if (pf->vsi[i]) {
16391			if (!test_bit(__I40E_RECOVERY_MODE, pf->state))
16392				i40e_vsi_clear_rings(pf->vsi[i]);
16393			i40e_vsi_clear(pf->vsi[i]);
16394			pf->vsi[i] = NULL;
16395		}
16396	}
16397	rtnl_unlock();
16398
16399	for (i = 0; i < I40E_MAX_VEB; i++) {
16400		kfree(pf->veb[i]);
16401		pf->veb[i] = NULL;
16402	}
16403
16404	kfree(pf->qp_pile);
16405	kfree(pf->vsi);
16406
16407	iounmap(hw->hw_addr);
16408	kfree(pf);
16409	pci_release_mem_regions(pdev);
16410
16411	pci_disable_device(pdev);
16412}
16413
16414/**
16415 * i40e_pci_error_detected - warning that something funky happened in PCI land
16416 * @pdev: PCI device information struct
16417 * @error: the type of PCI error
16418 *
16419 * Called to warn that something happened and the error handling steps
16420 * are in progress.  Allows the driver to quiesce things, be ready for
16421 * remediation.
16422 **/
16423static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev,
16424						pci_channel_state_t error)
16425{
16426	struct i40e_pf *pf = pci_get_drvdata(pdev);
16427
16428	dev_info(&pdev->dev, "%s: error %d\n", __func__, error);
16429
16430	if (!pf) {
16431		dev_info(&pdev->dev,
16432			 "Cannot recover - error happened during device probe\n");
16433		return PCI_ERS_RESULT_DISCONNECT;
16434	}
16435
16436	/* shutdown all operations */
16437	if (!test_bit(__I40E_SUSPENDED, pf->state))
16438		i40e_prep_for_reset(pf);
16439
16440	/* Request a slot reset */
16441	return PCI_ERS_RESULT_NEED_RESET;
16442}
16443
16444/**
16445 * i40e_pci_error_slot_reset - a PCI slot reset just happened
16446 * @pdev: PCI device information struct
16447 *
16448 * Called to find if the driver can work with the device now that
16449 * the pci slot has been reset.  If a basic connection seems good
16450 * (registers are readable and have sane content) then return a
16451 * happy little PCI_ERS_RESULT_xxx.
16452 **/
16453static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev)
16454{
16455	struct i40e_pf *pf = pci_get_drvdata(pdev);
16456	pci_ers_result_t result;
16457	u32 reg;
16458
16459	dev_dbg(&pdev->dev, "%s\n", __func__);
16460	if (pci_enable_device_mem(pdev)) {
16461		dev_info(&pdev->dev,
16462			 "Cannot re-enable PCI device after reset.\n");
16463		result = PCI_ERS_RESULT_DISCONNECT;
16464	} else {
16465		pci_set_master(pdev);
16466		pci_restore_state(pdev);
16467		pci_save_state(pdev);
16468		pci_wake_from_d3(pdev, false);
16469
16470		reg = rd32(&pf->hw, I40E_GLGEN_RTRIG);
16471		if (reg == 0)
16472			result = PCI_ERS_RESULT_RECOVERED;
16473		else
16474			result = PCI_ERS_RESULT_DISCONNECT;
16475	}
16476
16477	return result;
16478}
16479
16480/**
16481 * i40e_pci_error_reset_prepare - prepare device driver for pci reset
16482 * @pdev: PCI device information struct
16483 */
16484static void i40e_pci_error_reset_prepare(struct pci_dev *pdev)
16485{
16486	struct i40e_pf *pf = pci_get_drvdata(pdev);
16487
16488	i40e_prep_for_reset(pf);
16489}
16490
16491/**
16492 * i40e_pci_error_reset_done - pci reset done, device driver reset can begin
16493 * @pdev: PCI device information struct
16494 */
16495static void i40e_pci_error_reset_done(struct pci_dev *pdev)
16496{
16497	struct i40e_pf *pf = pci_get_drvdata(pdev);
16498
16499	if (test_bit(__I40E_IN_REMOVE, pf->state))
16500		return;
16501
16502	i40e_reset_and_rebuild(pf, false, false);
16503#ifdef CONFIG_PCI_IOV
16504	i40e_restore_all_vfs_msi_state(pdev);
16505#endif /* CONFIG_PCI_IOV */
16506}
16507
16508/**
16509 * i40e_pci_error_resume - restart operations after PCI error recovery
16510 * @pdev: PCI device information struct
16511 *
16512 * Called to allow the driver to bring things back up after PCI error
16513 * and/or reset recovery has finished.
16514 **/
16515static void i40e_pci_error_resume(struct pci_dev *pdev)
16516{
16517	struct i40e_pf *pf = pci_get_drvdata(pdev);
16518
16519	dev_dbg(&pdev->dev, "%s\n", __func__);
16520	if (test_bit(__I40E_SUSPENDED, pf->state))
16521		return;
16522
16523	i40e_handle_reset_warning(pf, false);
16524}
16525
16526/**
16527 * i40e_enable_mc_magic_wake - enable multicast magic packet wake up
16528 * using the mac_address_write admin q function
16529 * @pf: pointer to i40e_pf struct
16530 **/
16531static void i40e_enable_mc_magic_wake(struct i40e_pf *pf)
16532{
16533	struct i40e_hw *hw = &pf->hw;
16534	u8 mac_addr[6];
16535	u16 flags = 0;
16536	int ret;
16537
16538	/* Get current MAC address in case it's an LAA */
16539	if (pf->vsi[pf->lan_vsi] && pf->vsi[pf->lan_vsi]->netdev) {
16540		ether_addr_copy(mac_addr,
16541				pf->vsi[pf->lan_vsi]->netdev->dev_addr);
16542	} else {
16543		dev_err(&pf->pdev->dev,
16544			"Failed to retrieve MAC address; using default\n");
16545		ether_addr_copy(mac_addr, hw->mac.addr);
16546	}
16547
16548	/* The FW expects the mac address write cmd to first be called with
16549	 * one of these flags before calling it again with the multicast
16550	 * enable flags.
16551	 */
16552	flags = I40E_AQC_WRITE_TYPE_LAA_WOL;
16553
16554	if (hw->func_caps.flex10_enable && hw->partition_id != 1)
16555		flags = I40E_AQC_WRITE_TYPE_LAA_ONLY;
16556
16557	ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
16558	if (ret) {
16559		dev_err(&pf->pdev->dev,
16560			"Failed to update MAC address registers; cannot enable Multicast Magic packet wake up");
16561		return;
16562	}
16563
16564	flags = I40E_AQC_MC_MAG_EN
16565			| I40E_AQC_WOL_PRESERVE_ON_PFR
16566			| I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG;
16567	ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
16568	if (ret)
16569		dev_err(&pf->pdev->dev,
16570			"Failed to enable Multicast Magic Packet wake up\n");
16571}
16572
16573/**
16574 * i40e_shutdown - PCI callback for shutting down
16575 * @pdev: PCI device information struct
16576 **/
16577static void i40e_shutdown(struct pci_dev *pdev)
16578{
16579	struct i40e_pf *pf = pci_get_drvdata(pdev);
16580	struct i40e_hw *hw = &pf->hw;
16581
16582	set_bit(__I40E_SUSPENDED, pf->state);
16583	set_bit(__I40E_DOWN, pf->state);
16584
16585	del_timer_sync(&pf->service_timer);
16586	cancel_work_sync(&pf->service_task);
16587	i40e_cloud_filter_exit(pf);
16588	i40e_fdir_teardown(pf);
16589
16590	/* Client close must be called explicitly here because the timer
16591	 * has been stopped.
16592	 */
16593	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
16594
16595	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
16596		i40e_enable_mc_magic_wake(pf);
16597
16598	i40e_prep_for_reset(pf);
16599
16600	wr32(hw, I40E_PFPM_APM,
16601	     (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
16602	wr32(hw, I40E_PFPM_WUFC,
16603	     (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
16604
16605	/* Free MSI/legacy interrupt 0 when in recovery mode. */
16606	if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
16607	    !(pf->flags & I40E_FLAG_MSIX_ENABLED))
16608		free_irq(pf->pdev->irq, pf);
16609
16610	/* Since we're going to destroy queues during the
16611	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
16612	 * whole section
16613	 */
16614	rtnl_lock();
16615	i40e_clear_interrupt_scheme(pf);
16616	rtnl_unlock();
16617
16618	if (system_state == SYSTEM_POWER_OFF) {
16619		pci_wake_from_d3(pdev, pf->wol_en);
16620		pci_set_power_state(pdev, PCI_D3hot);
16621	}
16622}
16623
16624/**
16625 * i40e_suspend - PM callback for moving to D3
16626 * @dev: generic device information structure
16627 **/
16628static int __maybe_unused i40e_suspend(struct device *dev)
16629{
16630	struct i40e_pf *pf = dev_get_drvdata(dev);
16631	struct i40e_hw *hw = &pf->hw;
16632
16633	/* If we're already suspended, then there is nothing to do */
16634	if (test_and_set_bit(__I40E_SUSPENDED, pf->state))
16635		return 0;
16636
16637	set_bit(__I40E_DOWN, pf->state);
16638
16639	/* Ensure service task will not be running */
16640	del_timer_sync(&pf->service_timer);
16641	cancel_work_sync(&pf->service_task);
16642
16643	/* Client close must be called explicitly here because the timer
16644	 * has been stopped.
16645	 */
16646	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
16647
16648	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
16649		i40e_enable_mc_magic_wake(pf);
16650
16651	/* Since we're going to destroy queues during the
16652	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
16653	 * whole section
16654	 */
16655	rtnl_lock();
16656
16657	i40e_prep_for_reset(pf);
16658
16659	wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
16660	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
16661
16662	/* Clear the interrupt scheme and release our IRQs so that the system
16663	 * can safely hibernate even when there are a large number of CPUs.
16664	 * Otherwise hibernation might fail when mapping all the vectors back
16665	 * to CPU0.
16666	 */
16667	i40e_clear_interrupt_scheme(pf);
16668
16669	rtnl_unlock();
16670
16671	return 0;
16672}
16673
16674/**
16675 * i40e_resume - PM callback for waking up from D3
16676 * @dev: generic device information structure
16677 **/
16678static int __maybe_unused i40e_resume(struct device *dev)
16679{
16680	struct i40e_pf *pf = dev_get_drvdata(dev);
16681	int err;
16682
16683	/* If we're not suspended, then there is nothing to do */
16684	if (!test_bit(__I40E_SUSPENDED, pf->state))
16685		return 0;
16686
16687	/* We need to hold the RTNL lock prior to restoring interrupt schemes,
16688	 * since we're going to be restoring queues
16689	 */
16690	rtnl_lock();
16691
16692	/* We cleared the interrupt scheme when we suspended, so we need to
16693	 * restore it now to resume device functionality.
16694	 */
16695	err = i40e_restore_interrupt_scheme(pf);
16696	if (err) {
16697		dev_err(dev, "Cannot restore interrupt scheme: %d\n",
16698			err);
16699	}
16700
16701	clear_bit(__I40E_DOWN, pf->state);
16702	i40e_reset_and_rebuild(pf, false, true);
16703
16704	rtnl_unlock();
16705
16706	/* Clear suspended state last after everything is recovered */
16707	clear_bit(__I40E_SUSPENDED, pf->state);
16708
16709	/* Restart the service task */
16710	mod_timer(&pf->service_timer,
16711		  round_jiffies(jiffies + pf->service_timer_period));
16712
16713	return 0;
16714}
16715
16716static const struct pci_error_handlers i40e_err_handler = {
16717	.error_detected = i40e_pci_error_detected,
16718	.slot_reset = i40e_pci_error_slot_reset,
16719	.reset_prepare = i40e_pci_error_reset_prepare,
16720	.reset_done = i40e_pci_error_reset_done,
16721	.resume = i40e_pci_error_resume,
16722};
16723
16724static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume);
16725
16726static struct pci_driver i40e_driver = {
16727	.name     = i40e_driver_name,
16728	.id_table = i40e_pci_tbl,
16729	.probe    = i40e_probe,
16730	.remove   = i40e_remove,
16731	.driver   = {
16732		.pm = &i40e_pm_ops,
16733	},
16734	.shutdown = i40e_shutdown,
16735	.err_handler = &i40e_err_handler,
16736	.sriov_configure = i40e_pci_sriov_configure,
16737};
16738
16739/**
16740 * i40e_init_module - Driver registration routine
16741 *
16742 * i40e_init_module is the first routine called when the driver is
16743 * loaded. All it does is register with the PCI subsystem.
16744 **/
16745static int __init i40e_init_module(void)
16746{
16747	int err;
16748
16749	pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string);
16750	pr_info("%s: %s\n", i40e_driver_name, i40e_copyright);
16751
16752	/* There is no need to throttle the number of active tasks because
16753	 * each device limits its own task using a state bit for scheduling
16754	 * the service task, and the device tasks do not interfere with each
16755	 * other, so we don't set a max task limit. We must set WQ_MEM_RECLAIM
16756	 * since we need to be able to guarantee forward progress even under
16757	 * memory pressure.
16758	 */
16759	i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name);
16760	if (!i40e_wq) {
16761		pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
16762		return -ENOMEM;
16763	}
16764
16765	i40e_dbg_init();
16766	err = pci_register_driver(&i40e_driver);
16767	if (err) {
16768		destroy_workqueue(i40e_wq);
16769		i40e_dbg_exit();
16770		return err;
16771	}
16772
16773	return 0;
16774}
16775module_init(i40e_init_module);
16776
16777/**
16778 * i40e_exit_module - Driver exit cleanup routine
16779 *
16780 * i40e_exit_module is called just before the driver is removed
16781 * from memory.
16782 **/
16783static void __exit i40e_exit_module(void)
16784{
16785	pci_unregister_driver(&i40e_driver);
16786	destroy_workqueue(i40e_wq);
16787	ida_destroy(&i40e_client_ida);
16788	i40e_dbg_exit();
16789}
16790module_exit(i40e_exit_module);
16791