1// SPDX-License-Identifier: GPL-2.0
2/* Copyright(c) 2013 - 2018 Intel Corporation. */
3
4#include <linux/etherdevice.h>
5#include <linux/of_net.h>
6#include <linux/pci.h>
7#include <linux/bpf.h>
8#include <generated/utsrelease.h>
9
10/* Local includes */
11#include "i40e.h"
12#include "i40e_diag.h"
13#include "i40e_xsk.h"
14#include <net/udp_tunnel.h>
15#include <net/xdp_sock_drv.h>
16/* All i40e tracepoints are defined by the include below, which
17 * must be included exactly once across the whole kernel with
18 * CREATE_TRACE_POINTS defined
19 */
20#define CREATE_TRACE_POINTS
21#include "i40e_trace.h"
22
23const char i40e_driver_name[] = "i40e";
24static const char i40e_driver_string[] =
25			"Intel(R) Ethernet Connection XL710 Network Driver";
26
27static const char i40e_copyright[] = "Copyright (c) 2013 - 2019 Intel Corporation.";
28
29/* a bit of forward declarations */
30static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi);
31static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired);
32static int i40e_add_vsi(struct i40e_vsi *vsi);
33static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi);
34static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired);
35static int i40e_setup_misc_vector(struct i40e_pf *pf);
36static void i40e_determine_queue_usage(struct i40e_pf *pf);
37static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
38static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired);
39static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
40				   bool lock_acquired);
41static int i40e_reset(struct i40e_pf *pf);
42static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
43static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf);
44static int i40e_restore_interrupt_scheme(struct i40e_pf *pf);
45static bool i40e_check_recovery_mode(struct i40e_pf *pf);
46static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw);
47static void i40e_fdir_sb_setup(struct i40e_pf *pf);
48static int i40e_veb_get_bw_info(struct i40e_veb *veb);
49static int i40e_get_capabilities(struct i40e_pf *pf,
50				 enum i40e_admin_queue_opc list_type);
51static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf);
52
53/* i40e_pci_tbl - PCI Device ID Table
54 *
55 * Last entry must be all 0s
56 *
57 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
58 *   Class, Class Mask, private data (not used) }
59 */
60static const struct pci_device_id i40e_pci_tbl[] = {
61	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_XL710), 0},
62	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QEMU), 0},
63	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_B), 0},
64	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_C), 0},
65	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_A), 0},
66	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_B), 0},
67	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0},
68	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0},
69	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0},
70	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_BC), 0},
71	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_SFP), 0},
72	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_B), 0},
73	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0},
74	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0},
75	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0},
76	{PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0},
77	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0},
78	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0},
79	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
80	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0},
81	{PCI_VDEVICE(INTEL, I40E_DEV_ID_X710_N3000), 0},
82	{PCI_VDEVICE(INTEL, I40E_DEV_ID_XXV710_N3000), 0},
83	{PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_B), 0},
84	{PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_SFP28), 0},
85	/* required last entry */
86	{0, }
87};
88MODULE_DEVICE_TABLE(pci, i40e_pci_tbl);
89
90#define I40E_MAX_VF_COUNT 128
91static int debug = -1;
92module_param(debug, uint, 0);
93MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX)");
94
95MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
96MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver");
97MODULE_LICENSE("GPL v2");
98
99static struct workqueue_struct *i40e_wq;
100
101static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f,
102				  struct net_device *netdev, int delta)
103{
104	struct netdev_hw_addr_list *ha_list;
105	struct netdev_hw_addr *ha;
106
107	if (!f || !netdev)
108		return;
109
110	if (is_unicast_ether_addr(f->macaddr) || is_link_local_ether_addr(f->macaddr))
111		ha_list = &netdev->uc;
112	else
113		ha_list = &netdev->mc;
114
115	netdev_hw_addr_list_for_each(ha, ha_list) {
116		if (ether_addr_equal(ha->addr, f->macaddr)) {
117			ha->refcount += delta;
118			if (ha->refcount <= 0)
119				ha->refcount = 1;
120			break;
121		}
122	}
123}
124
125/**
126 * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code
127 * @hw:   pointer to the HW structure
128 * @mem:  ptr to mem struct to fill out
129 * @size: size of memory requested
130 * @alignment: what to align the allocation to
131 **/
132int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
133			    u64 size, u32 alignment)
134{
135	struct i40e_pf *pf = (struct i40e_pf *)hw->back;
136
137	mem->size = ALIGN(size, alignment);
138	mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa,
139				     GFP_KERNEL);
140	if (!mem->va)
141		return -ENOMEM;
142
143	return 0;
144}
145
146/**
147 * i40e_free_dma_mem_d - OS specific memory free for shared code
148 * @hw:   pointer to the HW structure
149 * @mem:  ptr to mem struct to free
150 **/
151int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
152{
153	struct i40e_pf *pf = (struct i40e_pf *)hw->back;
154
155	dma_free_coherent(&pf->pdev->dev, mem->size, mem->va, mem->pa);
156	mem->va = NULL;
157	mem->pa = 0;
158	mem->size = 0;
159
160	return 0;
161}
162
163/**
164 * i40e_allocate_virt_mem_d - OS specific memory alloc for shared code
165 * @hw:   pointer to the HW structure
166 * @mem:  ptr to mem struct to fill out
167 * @size: size of memory requested
168 **/
169int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
170			     u32 size)
171{
172	mem->size = size;
173	mem->va = kzalloc(size, GFP_KERNEL);
174
175	if (!mem->va)
176		return -ENOMEM;
177
178	return 0;
179}
180
181/**
182 * i40e_free_virt_mem_d - OS specific memory free for shared code
183 * @hw:   pointer to the HW structure
184 * @mem:  ptr to mem struct to free
185 **/
186int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem)
187{
188	/* it's ok to kfree a NULL pointer */
189	kfree(mem->va);
190	mem->va = NULL;
191	mem->size = 0;
192
193	return 0;
194}
195
196/**
197 * i40e_get_lump - find a lump of free generic resource
198 * @pf: board private structure
199 * @pile: the pile of resource to search
200 * @needed: the number of items needed
201 * @id: an owner id to stick on the items assigned
202 *
203 * Returns the base item index of the lump, or negative for error
204 **/
205static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
206			 u16 needed, u16 id)
207{
208	int ret = -ENOMEM;
209	int i, j;
210
211	if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
212		dev_info(&pf->pdev->dev,
213			 "param err: pile=%s needed=%d id=0x%04x\n",
214			 pile ? "<valid>" : "<null>", needed, id);
215		return -EINVAL;
216	}
217
218	/* Allocate last queue in the pile for FDIR VSI queue
219	 * so it doesn't fragment the qp_pile
220	 */
221	if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) {
222		if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) {
223			dev_err(&pf->pdev->dev,
224				"Cannot allocate queue %d for I40E_VSI_FDIR\n",
225				pile->num_entries - 1);
226			return -ENOMEM;
227		}
228		pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT;
229		return pile->num_entries - 1;
230	}
231
232	i = 0;
233	while (i < pile->num_entries) {
234		/* skip already allocated entries */
235		if (pile->list[i] & I40E_PILE_VALID_BIT) {
236			i++;
237			continue;
238		}
239
240		/* do we have enough in this lump? */
241		for (j = 0; (j < needed) && ((i+j) < pile->num_entries); j++) {
242			if (pile->list[i+j] & I40E_PILE_VALID_BIT)
243				break;
244		}
245
246		if (j == needed) {
247			/* there was enough, so assign it to the requestor */
248			for (j = 0; j < needed; j++)
249				pile->list[i+j] = id | I40E_PILE_VALID_BIT;
250			ret = i;
251			break;
252		}
253
254		/* not enough, so skip over it and continue looking */
255		i += j;
256	}
257
258	return ret;
259}
260
261/**
262 * i40e_put_lump - return a lump of generic resource
263 * @pile: the pile of resource to search
264 * @index: the base item index
265 * @id: the owner id of the items assigned
266 *
267 * Returns the count of items in the lump
268 **/
269static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
270{
271	int valid_id = (id | I40E_PILE_VALID_BIT);
272	int count = 0;
273	u16 i;
274
275	if (!pile || index >= pile->num_entries)
276		return -EINVAL;
277
278	for (i = index;
279	     i < pile->num_entries && pile->list[i] == valid_id;
280	     i++) {
281		pile->list[i] = 0;
282		count++;
283	}
284
285
286	return count;
287}
288
289/**
290 * i40e_find_vsi_from_id - searches for the vsi with the given id
291 * @pf: the pf structure to search for the vsi
292 * @id: id of the vsi it is searching for
293 **/
294struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
295{
296	int i;
297
298	for (i = 0; i < pf->num_alloc_vsi; i++)
299		if (pf->vsi[i] && (pf->vsi[i]->id == id))
300			return pf->vsi[i];
301
302	return NULL;
303}
304
305/**
306 * i40e_service_event_schedule - Schedule the service task to wake up
307 * @pf: board private structure
308 *
309 * If not already scheduled, this puts the task into the work queue
310 **/
311void i40e_service_event_schedule(struct i40e_pf *pf)
312{
313	if ((!test_bit(__I40E_DOWN, pf->state) &&
314	     !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) ||
315	      test_bit(__I40E_RECOVERY_MODE, pf->state))
316		queue_work(i40e_wq, &pf->service_task);
317}
318
319/**
320 * i40e_tx_timeout - Respond to a Tx Hang
321 * @netdev: network interface device structure
322 * @txqueue: queue number timing out
323 *
324 * If any port has noticed a Tx timeout, it is likely that the whole
325 * device is munged, not just the one netdev port, so go for the full
326 * reset.
327 **/
328static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
329{
330	struct i40e_netdev_priv *np = netdev_priv(netdev);
331	struct i40e_vsi *vsi = np->vsi;
332	struct i40e_pf *pf = vsi->back;
333	struct i40e_ring *tx_ring = NULL;
334	unsigned int i;
335	u32 head, val;
336
337	pf->tx_timeout_count++;
338
339	/* with txqueue index, find the tx_ring struct */
340	for (i = 0; i < vsi->num_queue_pairs; i++) {
341		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
342			if (txqueue ==
343			    vsi->tx_rings[i]->queue_index) {
344				tx_ring = vsi->tx_rings[i];
345				break;
346			}
347		}
348	}
349
350	if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ*20)))
351		pf->tx_timeout_recovery_level = 1;  /* reset after some time */
352	else if (time_before(jiffies,
353		      (pf->tx_timeout_last_recovery + netdev->watchdog_timeo)))
354		return;   /* don't do any new action before the next timeout */
355
356	/* don't kick off another recovery if one is already pending */
357	if (test_and_set_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state))
358		return;
359
360	if (tx_ring) {
361		head = i40e_get_head(tx_ring);
362		/* Read interrupt register */
363		if (pf->flags & I40E_FLAG_MSIX_ENABLED)
364			val = rd32(&pf->hw,
365			     I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
366						tx_ring->vsi->base_vector - 1));
367		else
368			val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
369
370		netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n",
371			    vsi->seid, txqueue, tx_ring->next_to_clean,
372			    head, tx_ring->next_to_use,
373			    readl(tx_ring->tail), val);
374	}
375
376	pf->tx_timeout_last_recovery = jiffies;
377	netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n",
378		    pf->tx_timeout_recovery_level, txqueue);
379
380	switch (pf->tx_timeout_recovery_level) {
381	case 1:
382		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
383		break;
384	case 2:
385		set_bit(__I40E_CORE_RESET_REQUESTED, pf->state);
386		break;
387	case 3:
388		set_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state);
389		break;
390	default:
391		netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in non-recoverable state.\n");
392		set_bit(__I40E_DOWN_REQUESTED, pf->state);
393		set_bit(__I40E_VSI_DOWN_REQUESTED, vsi->state);
394		break;
395	}
396
397	i40e_service_event_schedule(pf);
398	pf->tx_timeout_recovery_level++;
399}
400
401/**
402 * i40e_get_vsi_stats_struct - Get System Network Statistics
403 * @vsi: the VSI we care about
404 *
405 * Returns the address of the device statistics structure.
406 * The statistics are actually updated from the service task.
407 **/
408struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi)
409{
410	return &vsi->net_stats;
411}
412
413/**
414 * i40e_get_netdev_stats_struct_tx - populate stats from a Tx ring
415 * @ring: Tx ring to get statistics from
416 * @stats: statistics entry to be updated
417 **/
418static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring,
419					    struct rtnl_link_stats64 *stats)
420{
421	u64 bytes, packets;
422	unsigned int start;
423
424	do {
425		start = u64_stats_fetch_begin_irq(&ring->syncp);
426		packets = ring->stats.packets;
427		bytes   = ring->stats.bytes;
428	} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
429
430	stats->tx_packets += packets;
431	stats->tx_bytes   += bytes;
432}
433
434/**
435 * i40e_get_netdev_stats_struct - Get statistics for netdev interface
436 * @netdev: network interface device structure
437 * @stats: data structure to store statistics
438 *
439 * Returns the address of the device statistics structure.
440 * The statistics are actually updated from the service task.
441 **/
442static void i40e_get_netdev_stats_struct(struct net_device *netdev,
443				  struct rtnl_link_stats64 *stats)
444{
445	struct i40e_netdev_priv *np = netdev_priv(netdev);
446	struct i40e_vsi *vsi = np->vsi;
447	struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi);
448	struct i40e_ring *ring;
449	int i;
450
451	if (test_bit(__I40E_VSI_DOWN, vsi->state))
452		return;
453
454	if (!vsi->tx_rings)
455		return;
456
457	rcu_read_lock();
458	for (i = 0; i < vsi->num_queue_pairs; i++) {
459		u64 bytes, packets;
460		unsigned int start;
461
462		ring = READ_ONCE(vsi->tx_rings[i]);
463		if (!ring)
464			continue;
465		i40e_get_netdev_stats_struct_tx(ring, stats);
466
467		if (i40e_enabled_xdp_vsi(vsi)) {
468			ring = READ_ONCE(vsi->xdp_rings[i]);
469			if (!ring)
470				continue;
471			i40e_get_netdev_stats_struct_tx(ring, stats);
472		}
473
474		ring = READ_ONCE(vsi->rx_rings[i]);
475		if (!ring)
476			continue;
477		do {
478			start   = u64_stats_fetch_begin_irq(&ring->syncp);
479			packets = ring->stats.packets;
480			bytes   = ring->stats.bytes;
481		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
482
483		stats->rx_packets += packets;
484		stats->rx_bytes   += bytes;
485
486	}
487	rcu_read_unlock();
488
489	/* following stats updated by i40e_watchdog_subtask() */
490	stats->multicast	= vsi_stats->multicast;
491	stats->tx_errors	= vsi_stats->tx_errors;
492	stats->tx_dropped	= vsi_stats->tx_dropped;
493	stats->rx_errors	= vsi_stats->rx_errors;
494	stats->rx_dropped	= vsi_stats->rx_dropped;
495	stats->rx_crc_errors	= vsi_stats->rx_crc_errors;
496	stats->rx_length_errors	= vsi_stats->rx_length_errors;
497}
498
499/**
500 * i40e_vsi_reset_stats - Resets all stats of the given vsi
501 * @vsi: the VSI to have its stats reset
502 **/
503void i40e_vsi_reset_stats(struct i40e_vsi *vsi)
504{
505	struct rtnl_link_stats64 *ns;
506	int i;
507
508	if (!vsi)
509		return;
510
511	ns = i40e_get_vsi_stats_struct(vsi);
512	memset(ns, 0, sizeof(*ns));
513	memset(&vsi->net_stats_offsets, 0, sizeof(vsi->net_stats_offsets));
514	memset(&vsi->eth_stats, 0, sizeof(vsi->eth_stats));
515	memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets));
516	if (vsi->rx_rings && vsi->rx_rings[0]) {
517		for (i = 0; i < vsi->num_queue_pairs; i++) {
518			memset(&vsi->rx_rings[i]->stats, 0,
519			       sizeof(vsi->rx_rings[i]->stats));
520			memset(&vsi->rx_rings[i]->rx_stats, 0,
521			       sizeof(vsi->rx_rings[i]->rx_stats));
522			memset(&vsi->tx_rings[i]->stats, 0,
523			       sizeof(vsi->tx_rings[i]->stats));
524			memset(&vsi->tx_rings[i]->tx_stats, 0,
525			       sizeof(vsi->tx_rings[i]->tx_stats));
526		}
527	}
528	vsi->stat_offsets_loaded = false;
529}
530
531/**
532 * i40e_pf_reset_stats - Reset all of the stats for the given PF
533 * @pf: the PF to be reset
534 **/
535void i40e_pf_reset_stats(struct i40e_pf *pf)
536{
537	int i;
538
539	memset(&pf->stats, 0, sizeof(pf->stats));
540	memset(&pf->stats_offsets, 0, sizeof(pf->stats_offsets));
541	pf->stat_offsets_loaded = false;
542
543	for (i = 0; i < I40E_MAX_VEB; i++) {
544		if (pf->veb[i]) {
545			memset(&pf->veb[i]->stats, 0,
546			       sizeof(pf->veb[i]->stats));
547			memset(&pf->veb[i]->stats_offsets, 0,
548			       sizeof(pf->veb[i]->stats_offsets));
549			memset(&pf->veb[i]->tc_stats, 0,
550			       sizeof(pf->veb[i]->tc_stats));
551			memset(&pf->veb[i]->tc_stats_offsets, 0,
552			       sizeof(pf->veb[i]->tc_stats_offsets));
553			pf->veb[i]->stat_offsets_loaded = false;
554		}
555	}
556	pf->hw_csum_rx_error = 0;
557}
558
559/**
560 * i40e_compute_pci_to_hw_id - compute index form PCI function.
561 * @vsi: ptr to the VSI to read from.
562 * @hw: ptr to the hardware info.
563 **/
564static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw)
565{
566	int pf_count = i40e_get_pf_count(hw);
567
568	if (vsi->type == I40E_VSI_SRIOV)
569		return (hw->port * BIT(7)) / pf_count + vsi->vf_id;
570
571	return hw->port + BIT(7);
572}
573
574/**
575 * i40e_stat_update64 - read and update a 64 bit stat from the chip.
576 * @hw: ptr to the hardware info.
577 * @hireg: the high 32 bit reg to read.
578 * @loreg: the low 32 bit reg to read.
579 * @offset_loaded: has the initial offset been loaded yet.
580 * @offset: ptr to current offset value.
581 * @stat: ptr to the stat.
582 *
583 * Since the device stats are not reset at PFReset, they will not
584 * be zeroed when the driver starts.  We'll save the first values read
585 * and use them as offsets to be subtracted from the raw values in order
586 * to report stats that count from zero.
587 **/
588static void i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg,
589			       bool offset_loaded, u64 *offset, u64 *stat)
590{
591	u64 new_data;
592
593	new_data = rd64(hw, loreg);
594
595	if (!offset_loaded || new_data < *offset)
596		*offset = new_data;
597	*stat = new_data - *offset;
598}
599
600/**
601 * i40e_stat_update48 - read and update a 48 bit stat from the chip
602 * @hw: ptr to the hardware info
603 * @hireg: the high 32 bit reg to read
604 * @loreg: the low 32 bit reg to read
605 * @offset_loaded: has the initial offset been loaded yet
606 * @offset: ptr to current offset value
607 * @stat: ptr to the stat
608 *
609 * Since the device stats are not reset at PFReset, they likely will not
610 * be zeroed when the driver starts.  We'll save the first values read
611 * and use them as offsets to be subtracted from the raw values in order
612 * to report stats that count from zero.  In the process, we also manage
613 * the potential roll-over.
614 **/
615static void i40e_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg,
616			       bool offset_loaded, u64 *offset, u64 *stat)
617{
618	u64 new_data;
619
620	if (hw->device_id == I40E_DEV_ID_QEMU) {
621		new_data = rd32(hw, loreg);
622		new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32;
623	} else {
624		new_data = rd64(hw, loreg);
625	}
626	if (!offset_loaded)
627		*offset = new_data;
628	if (likely(new_data >= *offset))
629		*stat = new_data - *offset;
630	else
631		*stat = (new_data + BIT_ULL(48)) - *offset;
632	*stat &= 0xFFFFFFFFFFFFULL;
633}
634
635/**
636 * i40e_stat_update32 - read and update a 32 bit stat from the chip
637 * @hw: ptr to the hardware info
638 * @reg: the hw reg to read
639 * @offset_loaded: has the initial offset been loaded yet
640 * @offset: ptr to current offset value
641 * @stat: ptr to the stat
642 **/
643static void i40e_stat_update32(struct i40e_hw *hw, u32 reg,
644			       bool offset_loaded, u64 *offset, u64 *stat)
645{
646	u32 new_data;
647
648	new_data = rd32(hw, reg);
649	if (!offset_loaded)
650		*offset = new_data;
651	if (likely(new_data >= *offset))
652		*stat = (u32)(new_data - *offset);
653	else
654		*stat = (u32)((new_data + BIT_ULL(32)) - *offset);
655}
656
657/**
658 * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat
659 * @hw: ptr to the hardware info
660 * @reg: the hw reg to read and clear
661 * @stat: ptr to the stat
662 **/
663static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat)
664{
665	u32 new_data = rd32(hw, reg);
666
667	wr32(hw, reg, 1); /* must write a nonzero value to clear register */
668	*stat += new_data;
669}
670
671/**
672 * i40e_stats_update_rx_discards - update rx_discards.
673 * @vsi: ptr to the VSI to be updated.
674 * @hw: ptr to the hardware info.
675 * @stat_idx: VSI's stat_counter_idx.
676 * @offset_loaded: ptr to the VSI's stat_offsets_loaded.
677 * @stat_offset: ptr to stat_offset to store first read of specific register.
678 * @stat: ptr to VSI's stat to be updated.
679 **/
680static void
681i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw,
682			      int stat_idx, bool offset_loaded,
683			      struct i40e_eth_stats *stat_offset,
684			      struct i40e_eth_stats *stat)
685{
686	u64 rx_rdpc, rx_rxerr;
687
688	i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded,
689			   &stat_offset->rx_discards, &rx_rdpc);
690	i40e_stat_update64(hw,
691			   I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)),
692			   I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)),
693			   offset_loaded, &stat_offset->rx_discards_other,
694			   &rx_rxerr);
695
696	stat->rx_discards = rx_rdpc + rx_rxerr;
697}
698
699/**
700 * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
701 * @vsi: the VSI to be updated
702 **/
703void i40e_update_eth_stats(struct i40e_vsi *vsi)
704{
705	int stat_idx = le16_to_cpu(vsi->info.stat_counter_idx);
706	struct i40e_pf *pf = vsi->back;
707	struct i40e_hw *hw = &pf->hw;
708	struct i40e_eth_stats *oes;
709	struct i40e_eth_stats *es;     /* device's eth stats */
710
711	es = &vsi->eth_stats;
712	oes = &vsi->eth_stats_offsets;
713
714	/* Gather up the stats that the hw collects */
715	i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx),
716			   vsi->stat_offsets_loaded,
717			   &oes->tx_errors, &es->tx_errors);
718	i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx),
719			   vsi->stat_offsets_loaded,
720			   &oes->rx_discards, &es->rx_discards);
721	i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx),
722			   vsi->stat_offsets_loaded,
723			   &oes->rx_unknown_protocol, &es->rx_unknown_protocol);
724
725	i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx),
726			   I40E_GLV_GORCL(stat_idx),
727			   vsi->stat_offsets_loaded,
728			   &oes->rx_bytes, &es->rx_bytes);
729	i40e_stat_update48(hw, I40E_GLV_UPRCH(stat_idx),
730			   I40E_GLV_UPRCL(stat_idx),
731			   vsi->stat_offsets_loaded,
732			   &oes->rx_unicast, &es->rx_unicast);
733	i40e_stat_update48(hw, I40E_GLV_MPRCH(stat_idx),
734			   I40E_GLV_MPRCL(stat_idx),
735			   vsi->stat_offsets_loaded,
736			   &oes->rx_multicast, &es->rx_multicast);
737	i40e_stat_update48(hw, I40E_GLV_BPRCH(stat_idx),
738			   I40E_GLV_BPRCL(stat_idx),
739			   vsi->stat_offsets_loaded,
740			   &oes->rx_broadcast, &es->rx_broadcast);
741
742	i40e_stat_update48(hw, I40E_GLV_GOTCH(stat_idx),
743			   I40E_GLV_GOTCL(stat_idx),
744			   vsi->stat_offsets_loaded,
745			   &oes->tx_bytes, &es->tx_bytes);
746	i40e_stat_update48(hw, I40E_GLV_UPTCH(stat_idx),
747			   I40E_GLV_UPTCL(stat_idx),
748			   vsi->stat_offsets_loaded,
749			   &oes->tx_unicast, &es->tx_unicast);
750	i40e_stat_update48(hw, I40E_GLV_MPTCH(stat_idx),
751			   I40E_GLV_MPTCL(stat_idx),
752			   vsi->stat_offsets_loaded,
753			   &oes->tx_multicast, &es->tx_multicast);
754	i40e_stat_update48(hw, I40E_GLV_BPTCH(stat_idx),
755			   I40E_GLV_BPTCL(stat_idx),
756			   vsi->stat_offsets_loaded,
757			   &oes->tx_broadcast, &es->tx_broadcast);
758
759	i40e_stats_update_rx_discards(vsi, hw, stat_idx,
760				      vsi->stat_offsets_loaded, oes, es);
761
762	vsi->stat_offsets_loaded = true;
763}
764
765/**
766 * i40e_update_veb_stats - Update Switch component statistics
767 * @veb: the VEB being updated
768 **/
769void i40e_update_veb_stats(struct i40e_veb *veb)
770{
771	struct i40e_pf *pf = veb->pf;
772	struct i40e_hw *hw = &pf->hw;
773	struct i40e_eth_stats *oes;
774	struct i40e_eth_stats *es;     /* device's eth stats */
775	struct i40e_veb_tc_stats *veb_oes;
776	struct i40e_veb_tc_stats *veb_es;
777	int i, idx = 0;
778
779	idx = veb->stats_idx;
780	es = &veb->stats;
781	oes = &veb->stats_offsets;
782	veb_es = &veb->tc_stats;
783	veb_oes = &veb->tc_stats_offsets;
784
785	/* Gather up the stats that the hw collects */
786	i40e_stat_update32(hw, I40E_GLSW_TDPC(idx),
787			   veb->stat_offsets_loaded,
788			   &oes->tx_discards, &es->tx_discards);
789	if (hw->revision_id > 0)
790		i40e_stat_update32(hw, I40E_GLSW_RUPP(idx),
791				   veb->stat_offsets_loaded,
792				   &oes->rx_unknown_protocol,
793				   &es->rx_unknown_protocol);
794	i40e_stat_update48(hw, I40E_GLSW_GORCH(idx), I40E_GLSW_GORCL(idx),
795			   veb->stat_offsets_loaded,
796			   &oes->rx_bytes, &es->rx_bytes);
797	i40e_stat_update48(hw, I40E_GLSW_UPRCH(idx), I40E_GLSW_UPRCL(idx),
798			   veb->stat_offsets_loaded,
799			   &oes->rx_unicast, &es->rx_unicast);
800	i40e_stat_update48(hw, I40E_GLSW_MPRCH(idx), I40E_GLSW_MPRCL(idx),
801			   veb->stat_offsets_loaded,
802			   &oes->rx_multicast, &es->rx_multicast);
803	i40e_stat_update48(hw, I40E_GLSW_BPRCH(idx), I40E_GLSW_BPRCL(idx),
804			   veb->stat_offsets_loaded,
805			   &oes->rx_broadcast, &es->rx_broadcast);
806
807	i40e_stat_update48(hw, I40E_GLSW_GOTCH(idx), I40E_GLSW_GOTCL(idx),
808			   veb->stat_offsets_loaded,
809			   &oes->tx_bytes, &es->tx_bytes);
810	i40e_stat_update48(hw, I40E_GLSW_UPTCH(idx), I40E_GLSW_UPTCL(idx),
811			   veb->stat_offsets_loaded,
812			   &oes->tx_unicast, &es->tx_unicast);
813	i40e_stat_update48(hw, I40E_GLSW_MPTCH(idx), I40E_GLSW_MPTCL(idx),
814			   veb->stat_offsets_loaded,
815			   &oes->tx_multicast, &es->tx_multicast);
816	i40e_stat_update48(hw, I40E_GLSW_BPTCH(idx), I40E_GLSW_BPTCL(idx),
817			   veb->stat_offsets_loaded,
818			   &oes->tx_broadcast, &es->tx_broadcast);
819	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
820		i40e_stat_update48(hw, I40E_GLVEBTC_RPCH(i, idx),
821				   I40E_GLVEBTC_RPCL(i, idx),
822				   veb->stat_offsets_loaded,
823				   &veb_oes->tc_rx_packets[i],
824				   &veb_es->tc_rx_packets[i]);
825		i40e_stat_update48(hw, I40E_GLVEBTC_RBCH(i, idx),
826				   I40E_GLVEBTC_RBCL(i, idx),
827				   veb->stat_offsets_loaded,
828				   &veb_oes->tc_rx_bytes[i],
829				   &veb_es->tc_rx_bytes[i]);
830		i40e_stat_update48(hw, I40E_GLVEBTC_TPCH(i, idx),
831				   I40E_GLVEBTC_TPCL(i, idx),
832				   veb->stat_offsets_loaded,
833				   &veb_oes->tc_tx_packets[i],
834				   &veb_es->tc_tx_packets[i]);
835		i40e_stat_update48(hw, I40E_GLVEBTC_TBCH(i, idx),
836				   I40E_GLVEBTC_TBCL(i, idx),
837				   veb->stat_offsets_loaded,
838				   &veb_oes->tc_tx_bytes[i],
839				   &veb_es->tc_tx_bytes[i]);
840	}
841	veb->stat_offsets_loaded = true;
842}
843
844/**
845 * i40e_update_vsi_stats - Update the vsi statistics counters.
846 * @vsi: the VSI to be updated
847 *
848 * There are a few instances where we store the same stat in a
849 * couple of different structs.  This is partly because we have
850 * the netdev stats that need to be filled out, which is slightly
851 * different from the "eth_stats" defined by the chip and used in
852 * VF communications.  We sort it out here.
853 **/
854static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
855{
856	struct i40e_pf *pf = vsi->back;
857	struct rtnl_link_stats64 *ons;
858	struct rtnl_link_stats64 *ns;   /* netdev stats */
859	struct i40e_eth_stats *oes;
860	struct i40e_eth_stats *es;     /* device's eth stats */
861	u64 tx_restart, tx_busy;
862	struct i40e_ring *p;
863	u64 rx_page, rx_buf;
864	u64 bytes, packets;
865	unsigned int start;
866	u64 tx_linearize;
867	u64 tx_force_wb;
868	u64 rx_p, rx_b;
869	u64 tx_p, tx_b;
870	u16 q;
871
872	if (test_bit(__I40E_VSI_DOWN, vsi->state) ||
873	    test_bit(__I40E_CONFIG_BUSY, pf->state))
874		return;
875
876	ns = i40e_get_vsi_stats_struct(vsi);
877	ons = &vsi->net_stats_offsets;
878	es = &vsi->eth_stats;
879	oes = &vsi->eth_stats_offsets;
880
881	/* Gather up the netdev and vsi stats that the driver collects
882	 * on the fly during packet processing
883	 */
884	rx_b = rx_p = 0;
885	tx_b = tx_p = 0;
886	tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
887	rx_page = 0;
888	rx_buf = 0;
889	rcu_read_lock();
890	for (q = 0; q < vsi->num_queue_pairs; q++) {
891		/* locate Tx ring */
892		p = READ_ONCE(vsi->tx_rings[q]);
893		if (!p)
894			continue;
895
896		do {
897			start = u64_stats_fetch_begin_irq(&p->syncp);
898			packets = p->stats.packets;
899			bytes = p->stats.bytes;
900		} while (u64_stats_fetch_retry_irq(&p->syncp, start));
901		tx_b += bytes;
902		tx_p += packets;
903		tx_restart += p->tx_stats.restart_queue;
904		tx_busy += p->tx_stats.tx_busy;
905		tx_linearize += p->tx_stats.tx_linearize;
906		tx_force_wb += p->tx_stats.tx_force_wb;
907
908		/* locate Rx ring */
909		p = READ_ONCE(vsi->rx_rings[q]);
910		if (!p)
911			continue;
912
913		do {
914			start = u64_stats_fetch_begin_irq(&p->syncp);
915			packets = p->stats.packets;
916			bytes = p->stats.bytes;
917		} while (u64_stats_fetch_retry_irq(&p->syncp, start));
918		rx_b += bytes;
919		rx_p += packets;
920		rx_buf += p->rx_stats.alloc_buff_failed;
921		rx_page += p->rx_stats.alloc_page_failed;
922
923		if (i40e_enabled_xdp_vsi(vsi)) {
924			/* locate XDP ring */
925			p = READ_ONCE(vsi->xdp_rings[q]);
926			if (!p)
927				continue;
928
929			do {
930				start = u64_stats_fetch_begin_irq(&p->syncp);
931				packets = p->stats.packets;
932				bytes = p->stats.bytes;
933			} while (u64_stats_fetch_retry_irq(&p->syncp, start));
934			tx_b += bytes;
935			tx_p += packets;
936			tx_restart += p->tx_stats.restart_queue;
937			tx_busy += p->tx_stats.tx_busy;
938			tx_linearize += p->tx_stats.tx_linearize;
939			tx_force_wb += p->tx_stats.tx_force_wb;
940		}
941	}
942	rcu_read_unlock();
943	vsi->tx_restart = tx_restart;
944	vsi->tx_busy = tx_busy;
945	vsi->tx_linearize = tx_linearize;
946	vsi->tx_force_wb = tx_force_wb;
947	vsi->rx_page_failed = rx_page;
948	vsi->rx_buf_failed = rx_buf;
949
950	ns->rx_packets = rx_p;
951	ns->rx_bytes = rx_b;
952	ns->tx_packets = tx_p;
953	ns->tx_bytes = tx_b;
954
955	/* update netdev stats from eth stats */
956	i40e_update_eth_stats(vsi);
957	ons->tx_errors = oes->tx_errors;
958	ns->tx_errors = es->tx_errors;
959	ons->multicast = oes->rx_multicast;
960	ns->multicast = es->rx_multicast;
961	ons->rx_dropped = oes->rx_discards;
962	ns->rx_dropped = es->rx_discards;
963	ons->tx_dropped = oes->tx_discards;
964	ns->tx_dropped = es->tx_discards;
965
966	/* pull in a couple PF stats if this is the main vsi */
967	if (vsi == pf->vsi[pf->lan_vsi]) {
968		ns->rx_crc_errors = pf->stats.crc_errors;
969		ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes;
970		ns->rx_length_errors = pf->stats.rx_length_errors;
971	}
972}
973
974/**
975 * i40e_update_pf_stats - Update the PF statistics counters.
976 * @pf: the PF to be updated
977 **/
978static void i40e_update_pf_stats(struct i40e_pf *pf)
979{
980	struct i40e_hw_port_stats *osd = &pf->stats_offsets;
981	struct i40e_hw_port_stats *nsd = &pf->stats;
982	struct i40e_hw *hw = &pf->hw;
983	u32 val;
984	int i;
985
986	i40e_stat_update48(hw, I40E_GLPRT_GORCH(hw->port),
987			   I40E_GLPRT_GORCL(hw->port),
988			   pf->stat_offsets_loaded,
989			   &osd->eth.rx_bytes, &nsd->eth.rx_bytes);
990	i40e_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port),
991			   I40E_GLPRT_GOTCL(hw->port),
992			   pf->stat_offsets_loaded,
993			   &osd->eth.tx_bytes, &nsd->eth.tx_bytes);
994	i40e_stat_update32(hw, I40E_GLPRT_RDPC(hw->port),
995			   pf->stat_offsets_loaded,
996			   &osd->eth.rx_discards,
997			   &nsd->eth.rx_discards);
998	i40e_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port),
999			   I40E_GLPRT_UPRCL(hw->port),
1000			   pf->stat_offsets_loaded,
1001			   &osd->eth.rx_unicast,
1002			   &nsd->eth.rx_unicast);
1003	i40e_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port),
1004			   I40E_GLPRT_MPRCL(hw->port),
1005			   pf->stat_offsets_loaded,
1006			   &osd->eth.rx_multicast,
1007			   &nsd->eth.rx_multicast);
1008	i40e_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port),
1009			   I40E_GLPRT_BPRCL(hw->port),
1010			   pf->stat_offsets_loaded,
1011			   &osd->eth.rx_broadcast,
1012			   &nsd->eth.rx_broadcast);
1013	i40e_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port),
1014			   I40E_GLPRT_UPTCL(hw->port),
1015			   pf->stat_offsets_loaded,
1016			   &osd->eth.tx_unicast,
1017			   &nsd->eth.tx_unicast);
1018	i40e_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port),
1019			   I40E_GLPRT_MPTCL(hw->port),
1020			   pf->stat_offsets_loaded,
1021			   &osd->eth.tx_multicast,
1022			   &nsd->eth.tx_multicast);
1023	i40e_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port),
1024			   I40E_GLPRT_BPTCL(hw->port),
1025			   pf->stat_offsets_loaded,
1026			   &osd->eth.tx_broadcast,
1027			   &nsd->eth.tx_broadcast);
1028
1029	i40e_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port),
1030			   pf->stat_offsets_loaded,
1031			   &osd->tx_dropped_link_down,
1032			   &nsd->tx_dropped_link_down);
1033
1034	i40e_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port),
1035			   pf->stat_offsets_loaded,
1036			   &osd->crc_errors, &nsd->crc_errors);
1037
1038	i40e_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port),
1039			   pf->stat_offsets_loaded,
1040			   &osd->illegal_bytes, &nsd->illegal_bytes);
1041
1042	i40e_stat_update32(hw, I40E_GLPRT_MLFC(hw->port),
1043			   pf->stat_offsets_loaded,
1044			   &osd->mac_local_faults,
1045			   &nsd->mac_local_faults);
1046	i40e_stat_update32(hw, I40E_GLPRT_MRFC(hw->port),
1047			   pf->stat_offsets_loaded,
1048			   &osd->mac_remote_faults,
1049			   &nsd->mac_remote_faults);
1050
1051	i40e_stat_update32(hw, I40E_GLPRT_RLEC(hw->port),
1052			   pf->stat_offsets_loaded,
1053			   &osd->rx_length_errors,
1054			   &nsd->rx_length_errors);
1055
1056	i40e_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port),
1057			   pf->stat_offsets_loaded,
1058			   &osd->link_xon_rx, &nsd->link_xon_rx);
1059	i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port),
1060			   pf->stat_offsets_loaded,
1061			   &osd->link_xon_tx, &nsd->link_xon_tx);
1062	i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port),
1063			   pf->stat_offsets_loaded,
1064			   &osd->link_xoff_rx, &nsd->link_xoff_rx);
1065	i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port),
1066			   pf->stat_offsets_loaded,
1067			   &osd->link_xoff_tx, &nsd->link_xoff_tx);
1068
1069	for (i = 0; i < 8; i++) {
1070		i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i),
1071				   pf->stat_offsets_loaded,
1072				   &osd->priority_xoff_rx[i],
1073				   &nsd->priority_xoff_rx[i]);
1074		i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i),
1075				   pf->stat_offsets_loaded,
1076				   &osd->priority_xon_rx[i],
1077				   &nsd->priority_xon_rx[i]);
1078		i40e_stat_update32(hw, I40E_GLPRT_PXONTXC(hw->port, i),
1079				   pf->stat_offsets_loaded,
1080				   &osd->priority_xon_tx[i],
1081				   &nsd->priority_xon_tx[i]);
1082		i40e_stat_update32(hw, I40E_GLPRT_PXOFFTXC(hw->port, i),
1083				   pf->stat_offsets_loaded,
1084				   &osd->priority_xoff_tx[i],
1085				   &nsd->priority_xoff_tx[i]);
1086		i40e_stat_update32(hw,
1087				   I40E_GLPRT_RXON2OFFCNT(hw->port, i),
1088				   pf->stat_offsets_loaded,
1089				   &osd->priority_xon_2_xoff[i],
1090				   &nsd->priority_xon_2_xoff[i]);
1091	}
1092
1093	i40e_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port),
1094			   I40E_GLPRT_PRC64L(hw->port),
1095			   pf->stat_offsets_loaded,
1096			   &osd->rx_size_64, &nsd->rx_size_64);
1097	i40e_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port),
1098			   I40E_GLPRT_PRC127L(hw->port),
1099			   pf->stat_offsets_loaded,
1100			   &osd->rx_size_127, &nsd->rx_size_127);
1101	i40e_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port),
1102			   I40E_GLPRT_PRC255L(hw->port),
1103			   pf->stat_offsets_loaded,
1104			   &osd->rx_size_255, &nsd->rx_size_255);
1105	i40e_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port),
1106			   I40E_GLPRT_PRC511L(hw->port),
1107			   pf->stat_offsets_loaded,
1108			   &osd->rx_size_511, &nsd->rx_size_511);
1109	i40e_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port),
1110			   I40E_GLPRT_PRC1023L(hw->port),
1111			   pf->stat_offsets_loaded,
1112			   &osd->rx_size_1023, &nsd->rx_size_1023);
1113	i40e_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port),
1114			   I40E_GLPRT_PRC1522L(hw->port),
1115			   pf->stat_offsets_loaded,
1116			   &osd->rx_size_1522, &nsd->rx_size_1522);
1117	i40e_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port),
1118			   I40E_GLPRT_PRC9522L(hw->port),
1119			   pf->stat_offsets_loaded,
1120			   &osd->rx_size_big, &nsd->rx_size_big);
1121
1122	i40e_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port),
1123			   I40E_GLPRT_PTC64L(hw->port),
1124			   pf->stat_offsets_loaded,
1125			   &osd->tx_size_64, &nsd->tx_size_64);
1126	i40e_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port),
1127			   I40E_GLPRT_PTC127L(hw->port),
1128			   pf->stat_offsets_loaded,
1129			   &osd->tx_size_127, &nsd->tx_size_127);
1130	i40e_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port),
1131			   I40E_GLPRT_PTC255L(hw->port),
1132			   pf->stat_offsets_loaded,
1133			   &osd->tx_size_255, &nsd->tx_size_255);
1134	i40e_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port),
1135			   I40E_GLPRT_PTC511L(hw->port),
1136			   pf->stat_offsets_loaded,
1137			   &osd->tx_size_511, &nsd->tx_size_511);
1138	i40e_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port),
1139			   I40E_GLPRT_PTC1023L(hw->port),
1140			   pf->stat_offsets_loaded,
1141			   &osd->tx_size_1023, &nsd->tx_size_1023);
1142	i40e_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port),
1143			   I40E_GLPRT_PTC1522L(hw->port),
1144			   pf->stat_offsets_loaded,
1145			   &osd->tx_size_1522, &nsd->tx_size_1522);
1146	i40e_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port),
1147			   I40E_GLPRT_PTC9522L(hw->port),
1148			   pf->stat_offsets_loaded,
1149			   &osd->tx_size_big, &nsd->tx_size_big);
1150
1151	i40e_stat_update32(hw, I40E_GLPRT_RUC(hw->port),
1152			   pf->stat_offsets_loaded,
1153			   &osd->rx_undersize, &nsd->rx_undersize);
1154	i40e_stat_update32(hw, I40E_GLPRT_RFC(hw->port),
1155			   pf->stat_offsets_loaded,
1156			   &osd->rx_fragments, &nsd->rx_fragments);
1157	i40e_stat_update32(hw, I40E_GLPRT_ROC(hw->port),
1158			   pf->stat_offsets_loaded,
1159			   &osd->rx_oversize, &nsd->rx_oversize);
1160	i40e_stat_update32(hw, I40E_GLPRT_RJC(hw->port),
1161			   pf->stat_offsets_loaded,
1162			   &osd->rx_jabber, &nsd->rx_jabber);
1163
1164	/* FDIR stats */
1165	i40e_stat_update_and_clear32(hw,
1166			I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)),
1167			&nsd->fd_atr_match);
1168	i40e_stat_update_and_clear32(hw,
1169			I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)),
1170			&nsd->fd_sb_match);
1171	i40e_stat_update_and_clear32(hw,
1172			I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)),
1173			&nsd->fd_atr_tunnel_match);
1174
1175	val = rd32(hw, I40E_PRTPM_EEE_STAT);
1176	nsd->tx_lpi_status =
1177		       (val & I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK) >>
1178			I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT;
1179	nsd->rx_lpi_status =
1180		       (val & I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK) >>
1181			I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT;
1182	i40e_stat_update32(hw, I40E_PRTPM_TLPIC,
1183			   pf->stat_offsets_loaded,
1184			   &osd->tx_lpi_count, &nsd->tx_lpi_count);
1185	i40e_stat_update32(hw, I40E_PRTPM_RLPIC,
1186			   pf->stat_offsets_loaded,
1187			   &osd->rx_lpi_count, &nsd->rx_lpi_count);
1188
1189	if (pf->flags & I40E_FLAG_FD_SB_ENABLED &&
1190	    !test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
1191		nsd->fd_sb_status = true;
1192	else
1193		nsd->fd_sb_status = false;
1194
1195	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED &&
1196	    !test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
1197		nsd->fd_atr_status = true;
1198	else
1199		nsd->fd_atr_status = false;
1200
1201	pf->stat_offsets_loaded = true;
1202}
1203
1204/**
1205 * i40e_update_stats - Update the various statistics counters.
1206 * @vsi: the VSI to be updated
1207 *
1208 * Update the various stats for this VSI and its related entities.
1209 **/
1210void i40e_update_stats(struct i40e_vsi *vsi)
1211{
1212	struct i40e_pf *pf = vsi->back;
1213
1214	if (vsi == pf->vsi[pf->lan_vsi])
1215		i40e_update_pf_stats(pf);
1216
1217	i40e_update_vsi_stats(vsi);
1218}
1219
1220/**
1221 * i40e_count_filters - counts VSI mac filters
1222 * @vsi: the VSI to be searched
1223 *
1224 * Returns count of mac filters
1225 **/
1226int i40e_count_filters(struct i40e_vsi *vsi)
1227{
1228	struct i40e_mac_filter *f;
1229	struct hlist_node *h;
1230	int bkt;
1231	int cnt = 0;
1232
1233	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
1234		++cnt;
1235
1236	return cnt;
1237}
1238
1239/**
1240 * i40e_find_filter - Search VSI filter list for specific mac/vlan filter
1241 * @vsi: the VSI to be searched
1242 * @macaddr: the MAC address
1243 * @vlan: the vlan
1244 *
1245 * Returns ptr to the filter object or NULL
1246 **/
1247static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi,
1248						const u8 *macaddr, s16 vlan)
1249{
1250	struct i40e_mac_filter *f;
1251	u64 key;
1252
1253	if (!vsi || !macaddr)
1254		return NULL;
1255
1256	key = i40e_addr_to_hkey(macaddr);
1257	hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) {
1258		if ((ether_addr_equal(macaddr, f->macaddr)) &&
1259		    (vlan == f->vlan))
1260			return f;
1261	}
1262	return NULL;
1263}
1264
1265/**
1266 * i40e_find_mac - Find a mac addr in the macvlan filters list
1267 * @vsi: the VSI to be searched
1268 * @macaddr: the MAC address we are searching for
1269 *
1270 * Returns the first filter with the provided MAC address or NULL if
1271 * MAC address was not found
1272 **/
1273struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr)
1274{
1275	struct i40e_mac_filter *f;
1276	u64 key;
1277
1278	if (!vsi || !macaddr)
1279		return NULL;
1280
1281	key = i40e_addr_to_hkey(macaddr);
1282	hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) {
1283		if ((ether_addr_equal(macaddr, f->macaddr)))
1284			return f;
1285	}
1286	return NULL;
1287}
1288
1289/**
1290 * i40e_is_vsi_in_vlan - Check if VSI is in vlan mode
1291 * @vsi: the VSI to be searched
1292 *
1293 * Returns true if VSI is in vlan mode or false otherwise
1294 **/
1295bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi)
1296{
1297	/* If we have a PVID, always operate in VLAN mode */
1298	if (vsi->info.pvid)
1299		return true;
1300
1301	/* We need to operate in VLAN mode whenever we have any filters with
1302	 * a VLAN other than I40E_VLAN_ALL. We could check the table each
1303	 * time, incurring search cost repeatedly. However, we can notice two
1304	 * things:
1305	 *
1306	 * 1) the only place where we can gain a VLAN filter is in
1307	 *    i40e_add_filter.
1308	 *
1309	 * 2) the only place where filters are actually removed is in
1310	 *    i40e_sync_filters_subtask.
1311	 *
1312	 * Thus, we can simply use a boolean value, has_vlan_filters which we
1313	 * will set to true when we add a VLAN filter in i40e_add_filter. Then
1314	 * we have to perform the full search after deleting filters in
1315	 * i40e_sync_filters_subtask, but we already have to search
1316	 * filters here and can perform the check at the same time. This
1317	 * results in avoiding embedding a loop for VLAN mode inside another
1318	 * loop over all the filters, and should maintain correctness as noted
1319	 * above.
1320	 */
1321	return vsi->has_vlan_filter;
1322}
1323
1324/**
1325 * i40e_correct_mac_vlan_filters - Correct non-VLAN filters if necessary
1326 * @vsi: the VSI to configure
1327 * @tmp_add_list: list of filters ready to be added
1328 * @tmp_del_list: list of filters ready to be deleted
1329 * @vlan_filters: the number of active VLAN filters
1330 *
1331 * Update VLAN=0 and VLAN=-1 (I40E_VLAN_ANY) filters properly so that they
1332 * behave as expected. If we have any active VLAN filters remaining or about
1333 * to be added then we need to update non-VLAN filters to be marked as VLAN=0
1334 * so that they only match against untagged traffic. If we no longer have any
1335 * active VLAN filters, we need to make all non-VLAN filters marked as VLAN=-1
1336 * so that they match against both tagged and untagged traffic. In this way,
1337 * we ensure that we correctly receive the desired traffic. This ensures that
1338 * when we have an active VLAN we will receive only untagged traffic and
1339 * traffic matching active VLANs. If we have no active VLANs then we will
1340 * operate in non-VLAN mode and receive all traffic, tagged or untagged.
1341 *
1342 * Finally, in a similar fashion, this function also corrects filters when
1343 * there is an active PVID assigned to this VSI.
1344 *
1345 * In case of memory allocation failure return -ENOMEM. Otherwise, return 0.
1346 *
1347 * This function is only expected to be called from within
1348 * i40e_sync_vsi_filters.
1349 *
1350 * NOTE: This function expects to be called while under the
1351 * mac_filter_hash_lock
1352 */
1353static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi,
1354					 struct hlist_head *tmp_add_list,
1355					 struct hlist_head *tmp_del_list,
1356					 int vlan_filters)
1357{
1358	s16 pvid = le16_to_cpu(vsi->info.pvid);
1359	struct i40e_mac_filter *f, *add_head;
1360	struct i40e_new_mac_filter *new;
1361	struct hlist_node *h;
1362	int bkt, new_vlan;
1363
1364	/* To determine if a particular filter needs to be replaced we
1365	 * have the three following conditions:
1366	 *
1367	 * a) if we have a PVID assigned, then all filters which are
1368	 *    not marked as VLAN=PVID must be replaced with filters that
1369	 *    are.
1370	 * b) otherwise, if we have any active VLANS, all filters
1371	 *    which are marked as VLAN=-1 must be replaced with
1372	 *    filters marked as VLAN=0
1373	 * c) finally, if we do not have any active VLANS, all filters
1374	 *    which are marked as VLAN=0 must be replaced with filters
1375	 *    marked as VLAN=-1
1376	 */
1377
1378	/* Update the filters about to be added in place */
1379	hlist_for_each_entry(new, tmp_add_list, hlist) {
1380		if (pvid && new->f->vlan != pvid)
1381			new->f->vlan = pvid;
1382		else if (vlan_filters && new->f->vlan == I40E_VLAN_ANY)
1383			new->f->vlan = 0;
1384		else if (!vlan_filters && new->f->vlan == 0)
1385			new->f->vlan = I40E_VLAN_ANY;
1386	}
1387
1388	/* Update the remaining active filters */
1389	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
1390		/* Combine the checks for whether a filter needs to be changed
1391		 * and then determine the new VLAN inside the if block, in
1392		 * order to avoid duplicating code for adding the new filter
1393		 * then deleting the old filter.
1394		 */
1395		if ((pvid && f->vlan != pvid) ||
1396		    (vlan_filters && f->vlan == I40E_VLAN_ANY) ||
1397		    (!vlan_filters && f->vlan == 0)) {
1398			/* Determine the new vlan we will be adding */
1399			if (pvid)
1400				new_vlan = pvid;
1401			else if (vlan_filters)
1402				new_vlan = 0;
1403			else
1404				new_vlan = I40E_VLAN_ANY;
1405
1406			/* Create the new filter */
1407			add_head = i40e_add_filter(vsi, f->macaddr, new_vlan);
1408			if (!add_head)
1409				return -ENOMEM;
1410
1411			/* Create a temporary i40e_new_mac_filter */
1412			new = kzalloc(sizeof(*new), GFP_ATOMIC);
1413			if (!new)
1414				return -ENOMEM;
1415
1416			new->f = add_head;
1417			new->state = add_head->state;
1418
1419			/* Add the new filter to the tmp list */
1420			hlist_add_head(&new->hlist, tmp_add_list);
1421
1422			/* Put the original filter into the delete list */
1423			f->state = I40E_FILTER_REMOVE;
1424			hash_del(&f->hlist);
1425			hlist_add_head(&f->hlist, tmp_del_list);
1426		}
1427	}
1428
1429	vsi->has_vlan_filter = !!vlan_filters;
1430
1431	return 0;
1432}
1433
1434/**
1435 * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM
1436 * @vsi: the PF Main VSI - inappropriate for any other VSI
1437 * @macaddr: the MAC address
1438 *
1439 * Remove whatever filter the firmware set up so the driver can manage
1440 * its own filtering intelligently.
1441 **/
1442static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
1443{
1444	struct i40e_aqc_remove_macvlan_element_data element;
1445	struct i40e_pf *pf = vsi->back;
1446
1447	/* Only appropriate for the PF main VSI */
1448	if (vsi->type != I40E_VSI_MAIN)
1449		return;
1450
1451	memset(&element, 0, sizeof(element));
1452	ether_addr_copy(element.mac_addr, macaddr);
1453	element.vlan_tag = 0;
1454	/* Ignore error returns, some firmware does it this way... */
1455	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
1456	i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
1457
1458	memset(&element, 0, sizeof(element));
1459	ether_addr_copy(element.mac_addr, macaddr);
1460	element.vlan_tag = 0;
1461	/* ...and some firmware does it this way. */
1462	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
1463			I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
1464	i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
1465}
1466
1467/**
1468 * i40e_add_filter - Add a mac/vlan filter to the VSI
1469 * @vsi: the VSI to be searched
1470 * @macaddr: the MAC address
1471 * @vlan: the vlan
1472 *
1473 * Returns ptr to the filter object or NULL when no memory available.
1474 *
1475 * NOTE: This function is expected to be called with mac_filter_hash_lock
1476 * being held.
1477 **/
1478struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
1479					const u8 *macaddr, s16 vlan)
1480{
1481	struct i40e_mac_filter *f;
1482	u64 key;
1483
1484	if (!vsi || !macaddr)
1485		return NULL;
1486
1487	f = i40e_find_filter(vsi, macaddr, vlan);
1488	if (!f) {
1489		f = kzalloc(sizeof(*f), GFP_ATOMIC);
1490		if (!f)
1491			return NULL;
1492
1493		/* Update the boolean indicating if we need to function in
1494		 * VLAN mode.
1495		 */
1496		if (vlan >= 0)
1497			vsi->has_vlan_filter = true;
1498
1499		ether_addr_copy(f->macaddr, macaddr);
1500		f->vlan = vlan;
1501		f->state = I40E_FILTER_NEW;
1502		INIT_HLIST_NODE(&f->hlist);
1503
1504		key = i40e_addr_to_hkey(macaddr);
1505		hash_add(vsi->mac_filter_hash, &f->hlist, key);
1506
1507		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
1508		set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
1509	}
1510
1511	/* If we're asked to add a filter that has been marked for removal, it
1512	 * is safe to simply restore it to active state. __i40e_del_filter
1513	 * will have simply deleted any filters which were previously marked
1514	 * NEW or FAILED, so if it is currently marked REMOVE it must have
1515	 * previously been ACTIVE. Since we haven't yet run the sync filters
1516	 * task, just restore this filter to the ACTIVE state so that the
1517	 * sync task leaves it in place
1518	 */
1519	if (f->state == I40E_FILTER_REMOVE)
1520		f->state = I40E_FILTER_ACTIVE;
1521
1522	return f;
1523}
1524
1525/**
1526 * __i40e_del_filter - Remove a specific filter from the VSI
1527 * @vsi: VSI to remove from
1528 * @f: the filter to remove from the list
1529 *
1530 * This function should be called instead of i40e_del_filter only if you know
1531 * the exact filter you will remove already, such as via i40e_find_filter or
1532 * i40e_find_mac.
1533 *
1534 * NOTE: This function is expected to be called with mac_filter_hash_lock
1535 * being held.
1536 * ANOTHER NOTE: This function MUST be called from within the context of
1537 * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
1538 * instead of list_for_each_entry().
1539 **/
1540void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
1541{
1542	if (!f)
1543		return;
1544
1545	/* If the filter was never added to firmware then we can just delete it
1546	 * directly and we don't want to set the status to remove or else an
1547	 * admin queue command will unnecessarily fire.
1548	 */
1549	if ((f->state == I40E_FILTER_FAILED) ||
1550	    (f->state == I40E_FILTER_NEW)) {
1551		hash_del(&f->hlist);
1552		kfree(f);
1553	} else {
1554		f->state = I40E_FILTER_REMOVE;
1555	}
1556
1557	vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
1558	set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
1559}
1560
1561/**
1562 * i40e_del_filter - Remove a MAC/VLAN filter from the VSI
1563 * @vsi: the VSI to be searched
1564 * @macaddr: the MAC address
1565 * @vlan: the VLAN
1566 *
1567 * NOTE: This function is expected to be called with mac_filter_hash_lock
1568 * being held.
1569 * ANOTHER NOTE: This function MUST be called from within the context of
1570 * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
1571 * instead of list_for_each_entry().
1572 **/
1573void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan)
1574{
1575	struct i40e_mac_filter *f;
1576
1577	if (!vsi || !macaddr)
1578		return;
1579
1580	f = i40e_find_filter(vsi, macaddr, vlan);
1581	__i40e_del_filter(vsi, f);
1582}
1583
1584/**
1585 * i40e_add_mac_filter - Add a MAC filter for all active VLANs
1586 * @vsi: the VSI to be searched
1587 * @macaddr: the mac address to be filtered
1588 *
1589 * If we're not in VLAN mode, just add the filter to I40E_VLAN_ANY. Otherwise,
1590 * go through all the macvlan filters and add a macvlan filter for each
1591 * unique vlan that already exists. If a PVID has been assigned, instead only
1592 * add the macaddr to that VLAN.
1593 *
1594 * Returns last filter added on success, else NULL
1595 **/
1596struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
1597					    const u8 *macaddr)
1598{
1599	struct i40e_mac_filter *f, *add = NULL;
1600	struct hlist_node *h;
1601	int bkt;
1602
1603	if (vsi->info.pvid)
1604		return i40e_add_filter(vsi, macaddr,
1605				       le16_to_cpu(vsi->info.pvid));
1606
1607	if (!i40e_is_vsi_in_vlan(vsi))
1608		return i40e_add_filter(vsi, macaddr, I40E_VLAN_ANY);
1609
1610	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
1611		if (f->state == I40E_FILTER_REMOVE)
1612			continue;
1613		add = i40e_add_filter(vsi, macaddr, f->vlan);
1614		if (!add)
1615			return NULL;
1616	}
1617
1618	return add;
1619}
1620
1621/**
1622 * i40e_del_mac_filter - Remove a MAC filter from all VLANs
1623 * @vsi: the VSI to be searched
1624 * @macaddr: the mac address to be removed
1625 *
1626 * Removes a given MAC address from a VSI regardless of what VLAN it has been
1627 * associated with.
1628 *
1629 * Returns 0 for success, or error
1630 **/
1631int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr)
1632{
1633	struct i40e_mac_filter *f;
1634	struct hlist_node *h;
1635	bool found = false;
1636	int bkt;
1637
1638	lockdep_assert_held(&vsi->mac_filter_hash_lock);
1639	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
1640		if (ether_addr_equal(macaddr, f->macaddr)) {
1641			__i40e_del_filter(vsi, f);
1642			found = true;
1643		}
1644	}
1645
1646	if (found)
1647		return 0;
1648	else
1649		return -ENOENT;
1650}
1651
1652/**
1653 * i40e_set_mac - NDO callback to set mac address
1654 * @netdev: network interface device structure
1655 * @p: pointer to an address structure
1656 *
1657 * Returns 0 on success, negative on failure
1658 **/
1659static int i40e_set_mac(struct net_device *netdev, void *p)
1660{
1661	struct i40e_netdev_priv *np = netdev_priv(netdev);
1662	struct i40e_vsi *vsi = np->vsi;
1663	struct i40e_pf *pf = vsi->back;
1664	struct i40e_hw *hw = &pf->hw;
1665	struct sockaddr *addr = p;
1666
1667	if (!is_valid_ether_addr(addr->sa_data))
1668		return -EADDRNOTAVAIL;
1669
1670	if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) {
1671		netdev_info(netdev, "already using mac address %pM\n",
1672			    addr->sa_data);
1673		return 0;
1674	}
1675
1676	if (test_bit(__I40E_DOWN, pf->state) ||
1677	    test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
1678		return -EADDRNOTAVAIL;
1679
1680	if (ether_addr_equal(hw->mac.addr, addr->sa_data))
1681		netdev_info(netdev, "returning to hw mac address %pM\n",
1682			    hw->mac.addr);
1683	else
1684		netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
1685
1686	/* Copy the address first, so that we avoid a possible race with
1687	 * .set_rx_mode().
1688	 * - Remove old address from MAC filter
1689	 * - Copy new address
1690	 * - Add new address to MAC filter
1691	 */
1692	spin_lock_bh(&vsi->mac_filter_hash_lock);
1693	i40e_del_mac_filter(vsi, netdev->dev_addr);
1694	ether_addr_copy(netdev->dev_addr, addr->sa_data);
1695	i40e_add_mac_filter(vsi, netdev->dev_addr);
1696	spin_unlock_bh(&vsi->mac_filter_hash_lock);
1697
1698	if (vsi->type == I40E_VSI_MAIN) {
1699		i40e_status ret;
1700
1701		ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL,
1702						addr->sa_data, NULL);
1703		if (ret)
1704			netdev_info(netdev, "Ignoring error from firmware on LAA update, status %s, AQ ret %s\n",
1705				    i40e_stat_str(hw, ret),
1706				    i40e_aq_str(hw, hw->aq.asq_last_status));
1707	}
1708
1709	/* schedule our worker thread which will take care of
1710	 * applying the new filter changes
1711	 */
1712	i40e_service_event_schedule(pf);
1713	return 0;
1714}
1715
1716/**
1717 * i40e_config_rss_aq - Prepare for RSS using AQ commands
1718 * @vsi: vsi structure
1719 * @seed: RSS hash seed
1720 * @lut: pointer to lookup table of lut_size
1721 * @lut_size: size of the lookup table
1722 **/
1723static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
1724			      u8 *lut, u16 lut_size)
1725{
1726	struct i40e_pf *pf = vsi->back;
1727	struct i40e_hw *hw = &pf->hw;
1728	int ret = 0;
1729
1730	if (seed) {
1731		struct i40e_aqc_get_set_rss_key_data *seed_dw =
1732			(struct i40e_aqc_get_set_rss_key_data *)seed;
1733		ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
1734		if (ret) {
1735			dev_info(&pf->pdev->dev,
1736				 "Cannot set RSS key, err %s aq_err %s\n",
1737				 i40e_stat_str(hw, ret),
1738				 i40e_aq_str(hw, hw->aq.asq_last_status));
1739			return ret;
1740		}
1741	}
1742	if (lut) {
1743		bool pf_lut = vsi->type == I40E_VSI_MAIN;
1744
1745		ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
1746		if (ret) {
1747			dev_info(&pf->pdev->dev,
1748				 "Cannot set RSS lut, err %s aq_err %s\n",
1749				 i40e_stat_str(hw, ret),
1750				 i40e_aq_str(hw, hw->aq.asq_last_status));
1751			return ret;
1752		}
1753	}
1754	return ret;
1755}
1756
1757/**
1758 * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
1759 * @vsi: VSI structure
1760 **/
1761static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
1762{
1763	struct i40e_pf *pf = vsi->back;
1764	u8 seed[I40E_HKEY_ARRAY_SIZE];
1765	u8 *lut;
1766	int ret;
1767
1768	if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
1769		return 0;
1770	if (!vsi->rss_size)
1771		vsi->rss_size = min_t(int, pf->alloc_rss_size,
1772				      vsi->num_queue_pairs);
1773	if (!vsi->rss_size)
1774		return -EINVAL;
1775	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
1776	if (!lut)
1777		return -ENOMEM;
1778
1779	/* Use the user configured hash keys and lookup table if there is one,
1780	 * otherwise use default
1781	 */
1782	if (vsi->rss_lut_user)
1783		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
1784	else
1785		i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
1786	if (vsi->rss_hkey_user)
1787		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
1788	else
1789		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
1790	ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
1791	kfree(lut);
1792	return ret;
1793}
1794
1795/**
1796 * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config
1797 * @vsi: the VSI being configured,
1798 * @ctxt: VSI context structure
1799 * @enabled_tc: number of traffic classes to enable
1800 *
1801 * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
1802 **/
1803static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi,
1804					   struct i40e_vsi_context *ctxt,
1805					   u8 enabled_tc)
1806{
1807	u16 qcount = 0, max_qcount, qmap, sections = 0;
1808	int i, override_q, pow, num_qps, ret;
1809	u8 netdev_tc = 0, offset = 0;
1810
1811	if (vsi->type != I40E_VSI_MAIN)
1812		return -EINVAL;
1813	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
1814	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
1815	vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc;
1816	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
1817	num_qps = vsi->mqprio_qopt.qopt.count[0];
1818
1819	/* find the next higher power-of-2 of num queue pairs */
1820	pow = ilog2(num_qps);
1821	if (!is_power_of_2(num_qps))
1822		pow++;
1823	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
1824		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
1825
1826	/* Setup queue offset/count for all TCs for given VSI */
1827	max_qcount = vsi->mqprio_qopt.qopt.count[0];
1828	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
1829		/* See if the given TC is enabled for the given VSI */
1830		if (vsi->tc_config.enabled_tc & BIT(i)) {
1831			offset = vsi->mqprio_qopt.qopt.offset[i];
1832			qcount = vsi->mqprio_qopt.qopt.count[i];
1833			if (qcount > max_qcount)
1834				max_qcount = qcount;
1835			vsi->tc_config.tc_info[i].qoffset = offset;
1836			vsi->tc_config.tc_info[i].qcount = qcount;
1837			vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
1838		} else {
1839			/* TC is not enabled so set the offset to
1840			 * default queue and allocate one queue
1841			 * for the given TC.
1842			 */
1843			vsi->tc_config.tc_info[i].qoffset = 0;
1844			vsi->tc_config.tc_info[i].qcount = 1;
1845			vsi->tc_config.tc_info[i].netdev_tc = 0;
1846		}
1847	}
1848
1849	/* Set actual Tx/Rx queue pairs */
1850	vsi->num_queue_pairs = offset + qcount;
1851
1852	/* Setup queue TC[0].qmap for given VSI context */
1853	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
1854	ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
1855	ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
1856	ctxt->info.valid_sections |= cpu_to_le16(sections);
1857
1858	/* Reconfigure RSS for main VSI with max queue count */
1859	vsi->rss_size = max_qcount;
1860	ret = i40e_vsi_config_rss(vsi);
1861	if (ret) {
1862		dev_info(&vsi->back->pdev->dev,
1863			 "Failed to reconfig rss for num_queues (%u)\n",
1864			 max_qcount);
1865		return ret;
1866	}
1867	vsi->reconfig_rss = true;
1868	dev_dbg(&vsi->back->pdev->dev,
1869		"Reconfigured rss with num_queues (%u)\n", max_qcount);
1870
1871	/* Find queue count available for channel VSIs and starting offset
1872	 * for channel VSIs
1873	 */
1874	override_q = vsi->mqprio_qopt.qopt.count[0];
1875	if (override_q && override_q < vsi->num_queue_pairs) {
1876		vsi->cnt_q_avail = vsi->num_queue_pairs - override_q;
1877		vsi->next_base_queue = override_q;
1878	}
1879	return 0;
1880}
1881
1882/**
1883 * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc
1884 * @vsi: the VSI being setup
1885 * @ctxt: VSI context structure
1886 * @enabled_tc: Enabled TCs bitmap
1887 * @is_add: True if called before Add VSI
1888 *
1889 * Setup VSI queue mapping for enabled traffic classes.
1890 **/
1891static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
1892				     struct i40e_vsi_context *ctxt,
1893				     u8 enabled_tc,
1894				     bool is_add)
1895{
1896	struct i40e_pf *pf = vsi->back;
1897	u16 num_tc_qps = 0;
1898	u16 sections = 0;
1899	u8 netdev_tc = 0;
1900	u16 numtc = 1;
1901	u16 qcount;
1902	u8 offset;
1903	u16 qmap;
1904	int i;
1905
1906	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
1907	offset = 0;
1908	/* zero out queue mapping, it will get updated on the end of the function */
1909	memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping));
1910
1911	if (vsi->type == I40E_VSI_MAIN) {
1912		/* This code helps add more queue to the VSI if we have
1913		 * more cores than RSS can support, the higher cores will
1914		 * be served by ATR or other filters. Furthermore, the
1915		 * non-zero req_queue_pairs says that user requested a new
1916		 * queue count via ethtool's set_channels, so use this
1917		 * value for queues distribution across traffic classes
1918		 * We need at least one queue pair for the interface
1919		 * to be usable as we see in else statement.
1920		 */
1921		if (vsi->req_queue_pairs > 0)
1922			vsi->num_queue_pairs = vsi->req_queue_pairs;
1923		else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
1924			vsi->num_queue_pairs = pf->num_lan_msix;
1925		else
1926			vsi->num_queue_pairs = 1;
1927	}
1928
1929	/* Number of queues per enabled TC */
1930	if (vsi->type == I40E_VSI_MAIN ||
1931	    (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0))
1932		num_tc_qps = vsi->num_queue_pairs;
1933	else
1934		num_tc_qps = vsi->alloc_queue_pairs;
1935
1936	if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
1937		/* Find numtc from enabled TC bitmap */
1938		for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
1939			if (enabled_tc & BIT(i)) /* TC is enabled */
1940				numtc++;
1941		}
1942		if (!numtc) {
1943			dev_warn(&pf->pdev->dev, "DCB is enabled but no TC enabled, forcing TC0\n");
1944			numtc = 1;
1945		}
1946		num_tc_qps = num_tc_qps / numtc;
1947		num_tc_qps = min_t(int, num_tc_qps,
1948				   i40e_pf_get_max_q_per_tc(pf));
1949	}
1950
1951	vsi->tc_config.numtc = numtc;
1952	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
1953
1954	/* Do not allow use more TC queue pairs than MSI-X vectors exist */
1955	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
1956		num_tc_qps = min_t(int, num_tc_qps, pf->num_lan_msix);
1957
1958	/* Setup queue offset/count for all TCs for given VSI */
1959	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
1960		/* See if the given TC is enabled for the given VSI */
1961		if (vsi->tc_config.enabled_tc & BIT(i)) {
1962			/* TC is enabled */
1963			int pow, num_qps;
1964
1965			switch (vsi->type) {
1966			case I40E_VSI_MAIN:
1967				if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED |
1968				    I40E_FLAG_FD_ATR_ENABLED)) ||
1969				    vsi->tc_config.enabled_tc != 1) {
1970					qcount = min_t(int, pf->alloc_rss_size,
1971						       num_tc_qps);
1972					break;
1973				}
1974				fallthrough;
1975			case I40E_VSI_FDIR:
1976			case I40E_VSI_SRIOV:
1977			case I40E_VSI_VMDQ2:
1978			default:
1979				qcount = num_tc_qps;
1980				WARN_ON(i != 0);
1981				break;
1982			}
1983			vsi->tc_config.tc_info[i].qoffset = offset;
1984			vsi->tc_config.tc_info[i].qcount = qcount;
1985
1986			/* find the next higher power-of-2 of num queue pairs */
1987			num_qps = qcount;
1988			pow = 0;
1989			while (num_qps && (BIT_ULL(pow) < qcount)) {
1990				pow++;
1991				num_qps >>= 1;
1992			}
1993
1994			vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
1995			qmap =
1996			    (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
1997			    (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
1998
1999			offset += qcount;
2000		} else {
2001			/* TC is not enabled so set the offset to
2002			 * default queue and allocate one queue
2003			 * for the given TC.
2004			 */
2005			vsi->tc_config.tc_info[i].qoffset = 0;
2006			vsi->tc_config.tc_info[i].qcount = 1;
2007			vsi->tc_config.tc_info[i].netdev_tc = 0;
2008
2009			qmap = 0;
2010		}
2011		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
2012	}
2013	/* Do not change previously set num_queue_pairs for PFs and VFs*/
2014	if ((vsi->type == I40E_VSI_MAIN && numtc != 1) ||
2015	    (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) ||
2016	    (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV))
2017		vsi->num_queue_pairs = offset;
2018
2019	/* Scheduler section valid can only be set for ADD VSI */
2020	if (is_add) {
2021		sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
2022
2023		ctxt->info.up_enable_bits = enabled_tc;
2024	}
2025	if (vsi->type == I40E_VSI_SRIOV) {
2026		ctxt->info.mapping_flags |=
2027				     cpu_to_le16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);
2028		for (i = 0; i < vsi->num_queue_pairs; i++)
2029			ctxt->info.queue_mapping[i] =
2030					       cpu_to_le16(vsi->base_queue + i);
2031	} else {
2032		ctxt->info.mapping_flags |=
2033					cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
2034		ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
2035	}
2036	ctxt->info.valid_sections |= cpu_to_le16(sections);
2037}
2038
2039/**
2040 * i40e_addr_sync - Callback for dev_(mc|uc)_sync to add address
2041 * @netdev: the netdevice
2042 * @addr: address to add
2043 *
2044 * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
2045 * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
2046 */
2047static int i40e_addr_sync(struct net_device *netdev, const u8 *addr)
2048{
2049	struct i40e_netdev_priv *np = netdev_priv(netdev);
2050	struct i40e_vsi *vsi = np->vsi;
2051
2052	if (i40e_add_mac_filter(vsi, addr))
2053		return 0;
2054	else
2055		return -ENOMEM;
2056}
2057
2058/**
2059 * i40e_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
2060 * @netdev: the netdevice
2061 * @addr: address to add
2062 *
2063 * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
2064 * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
2065 */
2066static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr)
2067{
2068	struct i40e_netdev_priv *np = netdev_priv(netdev);
2069	struct i40e_vsi *vsi = np->vsi;
2070
2071	/* Under some circumstances, we might receive a request to delete
2072	 * our own device address from our uc list. Because we store the
2073	 * device address in the VSI's MAC/VLAN filter list, we need to ignore
2074	 * such requests and not delete our device address from this list.
2075	 */
2076	if (ether_addr_equal(addr, netdev->dev_addr))
2077		return 0;
2078
2079	i40e_del_mac_filter(vsi, addr);
2080
2081	return 0;
2082}
2083
2084/**
2085 * i40e_set_rx_mode - NDO callback to set the netdev filters
2086 * @netdev: network interface device structure
2087 **/
2088static void i40e_set_rx_mode(struct net_device *netdev)
2089{
2090	struct i40e_netdev_priv *np = netdev_priv(netdev);
2091	struct i40e_vsi *vsi = np->vsi;
2092
2093	spin_lock_bh(&vsi->mac_filter_hash_lock);
2094
2095	__dev_uc_sync(netdev, i40e_addr_sync, i40e_addr_unsync);
2096	__dev_mc_sync(netdev, i40e_addr_sync, i40e_addr_unsync);
2097
2098	spin_unlock_bh(&vsi->mac_filter_hash_lock);
2099
2100	/* check for other flag changes */
2101	if (vsi->current_netdev_flags != vsi->netdev->flags) {
2102		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
2103		set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
2104	}
2105}
2106
2107/**
2108 * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries
2109 * @vsi: Pointer to VSI struct
2110 * @from: Pointer to list which contains MAC filter entries - changes to
2111 *        those entries needs to be undone.
2112 *
2113 * MAC filter entries from this list were slated for deletion.
2114 **/
2115static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi,
2116					 struct hlist_head *from)
2117{
2118	struct i40e_mac_filter *f;
2119	struct hlist_node *h;
2120
2121	hlist_for_each_entry_safe(f, h, from, hlist) {
2122		u64 key = i40e_addr_to_hkey(f->macaddr);
2123
2124		/* Move the element back into MAC filter list*/
2125		hlist_del(&f->hlist);
2126		hash_add(vsi->mac_filter_hash, &f->hlist, key);
2127	}
2128}
2129
2130/**
2131 * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries
2132 * @vsi: Pointer to vsi struct
2133 * @from: Pointer to list which contains MAC filter entries - changes to
2134 *        those entries needs to be undone.
2135 *
2136 * MAC filter entries from this list were slated for addition.
2137 **/
2138static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi,
2139					 struct hlist_head *from)
2140{
2141	struct i40e_new_mac_filter *new;
2142	struct hlist_node *h;
2143
2144	hlist_for_each_entry_safe(new, h, from, hlist) {
2145		/* We can simply free the wrapper structure */
2146		hlist_del(&new->hlist);
2147		netdev_hw_addr_refcnt(new->f, vsi->netdev, -1);
2148		kfree(new);
2149	}
2150}
2151
2152/**
2153 * i40e_next_entry - Get the next non-broadcast filter from a list
2154 * @next: pointer to filter in list
2155 *
2156 * Returns the next non-broadcast filter in the list. Required so that we
2157 * ignore broadcast filters within the list, since these are not handled via
2158 * the normal firmware update path.
2159 */
2160static
2161struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next)
2162{
2163	hlist_for_each_entry_continue(next, hlist) {
2164		if (!is_broadcast_ether_addr(next->f->macaddr))
2165			return next;
2166	}
2167
2168	return NULL;
2169}
2170
2171/**
2172 * i40e_update_filter_state - Update filter state based on return data
2173 * from firmware
2174 * @count: Number of filters added
2175 * @add_list: return data from fw
2176 * @add_head: pointer to first filter in current batch
2177 *
2178 * MAC filter entries from list were slated to be added to device. Returns
2179 * number of successful filters. Note that 0 does NOT mean success!
2180 **/
2181static int
2182i40e_update_filter_state(int count,
2183			 struct i40e_aqc_add_macvlan_element_data *add_list,
2184			 struct i40e_new_mac_filter *add_head)
2185{
2186	int retval = 0;
2187	int i;
2188
2189	for (i = 0; i < count; i++) {
2190		/* Always check status of each filter. We don't need to check
2191		 * the firmware return status because we pre-set the filter
2192		 * status to I40E_AQC_MM_ERR_NO_RES when sending the filter
2193		 * request to the adminq. Thus, if it no longer matches then
2194		 * we know the filter is active.
2195		 */
2196		if (add_list[i].match_method == I40E_AQC_MM_ERR_NO_RES) {
2197			add_head->state = I40E_FILTER_FAILED;
2198		} else {
2199			add_head->state = I40E_FILTER_ACTIVE;
2200			retval++;
2201		}
2202
2203		add_head = i40e_next_filter(add_head);
2204		if (!add_head)
2205			break;
2206	}
2207
2208	return retval;
2209}
2210
2211/**
2212 * i40e_aqc_del_filters - Request firmware to delete a set of filters
2213 * @vsi: ptr to the VSI
2214 * @vsi_name: name to display in messages
2215 * @list: the list of filters to send to firmware
2216 * @num_del: the number of filters to delete
2217 * @retval: Set to -EIO on failure to delete
2218 *
2219 * Send a request to firmware via AdminQ to delete a set of filters. Uses
2220 * *retval instead of a return value so that success does not force ret_val to
2221 * be set to 0. This ensures that a sequence of calls to this function
2222 * preserve the previous value of *retval on successful delete.
2223 */
2224static
2225void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
2226			  struct i40e_aqc_remove_macvlan_element_data *list,
2227			  int num_del, int *retval)
2228{
2229	struct i40e_hw *hw = &vsi->back->hw;
2230	i40e_status aq_ret;
2231	int aq_err;
2232
2233	aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL);
2234	aq_err = hw->aq.asq_last_status;
2235
2236	/* Explicitly ignore and do not report when firmware returns ENOENT */
2237	if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
2238		*retval = -EIO;
2239		dev_info(&vsi->back->pdev->dev,
2240			 "ignoring delete macvlan error on %s, err %s, aq_err %s\n",
2241			 vsi_name, i40e_stat_str(hw, aq_ret),
2242			 i40e_aq_str(hw, aq_err));
2243	}
2244}
2245
2246/**
2247 * i40e_aqc_add_filters - Request firmware to add a set of filters
2248 * @vsi: ptr to the VSI
2249 * @vsi_name: name to display in messages
2250 * @list: the list of filters to send to firmware
2251 * @add_head: Position in the add hlist
2252 * @num_add: the number of filters to add
2253 *
2254 * Send a request to firmware via AdminQ to add a chunk of filters. Will set
2255 * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of
2256 * space for more filters.
2257 */
2258static
2259void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
2260			  struct i40e_aqc_add_macvlan_element_data *list,
2261			  struct i40e_new_mac_filter *add_head,
2262			  int num_add)
2263{
2264	struct i40e_hw *hw = &vsi->back->hw;
2265	int aq_err, fcnt;
2266
2267	i40e_aq_add_macvlan(hw, vsi->seid, list, num_add, NULL);
2268	aq_err = hw->aq.asq_last_status;
2269	fcnt = i40e_update_filter_state(num_add, list, add_head);
2270
2271	if (fcnt != num_add) {
2272		if (vsi->type == I40E_VSI_MAIN) {
2273			set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2274			dev_warn(&vsi->back->pdev->dev,
2275				 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
2276				 i40e_aq_str(hw, aq_err), vsi_name);
2277		} else if (vsi->type == I40E_VSI_SRIOV ||
2278			   vsi->type == I40E_VSI_VMDQ1 ||
2279			   vsi->type == I40E_VSI_VMDQ2) {
2280			dev_warn(&vsi->back->pdev->dev,
2281				 "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n",
2282				 i40e_aq_str(hw, aq_err), vsi_name, vsi_name);
2283		} else {
2284			dev_warn(&vsi->back->pdev->dev,
2285				 "Error %s adding RX filters on %s, incorrect VSI type: %i.\n",
2286				 i40e_aq_str(hw, aq_err), vsi_name, vsi->type);
2287		}
2288	}
2289}
2290
2291/**
2292 * i40e_aqc_broadcast_filter - Set promiscuous broadcast flags
2293 * @vsi: pointer to the VSI
2294 * @vsi_name: the VSI name
2295 * @f: filter data
2296 *
2297 * This function sets or clears the promiscuous broadcast flags for VLAN
2298 * filters in order to properly receive broadcast frames. Assumes that only
2299 * broadcast filters are passed.
2300 *
2301 * Returns status indicating success or failure;
2302 **/
2303static i40e_status
2304i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name,
2305			  struct i40e_mac_filter *f)
2306{
2307	bool enable = f->state == I40E_FILTER_NEW;
2308	struct i40e_hw *hw = &vsi->back->hw;
2309	i40e_status aq_ret;
2310
2311	if (f->vlan == I40E_VLAN_ANY) {
2312		aq_ret = i40e_aq_set_vsi_broadcast(hw,
2313						   vsi->seid,
2314						   enable,
2315						   NULL);
2316	} else {
2317		aq_ret = i40e_aq_set_vsi_bc_promisc_on_vlan(hw,
2318							    vsi->seid,
2319							    enable,
2320							    f->vlan,
2321							    NULL);
2322	}
2323
2324	if (aq_ret) {
2325		set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2326		dev_warn(&vsi->back->pdev->dev,
2327			 "Error %s, forcing overflow promiscuous on %s\n",
2328			 i40e_aq_str(hw, hw->aq.asq_last_status),
2329			 vsi_name);
2330	}
2331
2332	return aq_ret;
2333}
2334
2335/**
2336 * i40e_set_promiscuous - set promiscuous mode
2337 * @pf: board private structure
2338 * @promisc: promisc on or off
2339 *
2340 * There are different ways of setting promiscuous mode on a PF depending on
2341 * what state/environment we're in.  This identifies and sets it appropriately.
2342 * Returns 0 on success.
2343 **/
2344static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc)
2345{
2346	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
2347	struct i40e_hw *hw = &pf->hw;
2348	i40e_status aq_ret;
2349
2350	if (vsi->type == I40E_VSI_MAIN &&
2351	    pf->lan_veb != I40E_NO_VEB &&
2352	    !(pf->flags & I40E_FLAG_MFP_ENABLED)) {
2353		/* set defport ON for Main VSI instead of true promisc
2354		 * this way we will get all unicast/multicast and VLAN
2355		 * promisc behavior but will not get VF or VMDq traffic
2356		 * replicated on the Main VSI.
2357		 */
2358		if (promisc)
2359			aq_ret = i40e_aq_set_default_vsi(hw,
2360							 vsi->seid,
2361							 NULL);
2362		else
2363			aq_ret = i40e_aq_clear_default_vsi(hw,
2364							   vsi->seid,
2365							   NULL);
2366		if (aq_ret) {
2367			dev_info(&pf->pdev->dev,
2368				 "Set default VSI failed, err %s, aq_err %s\n",
2369				 i40e_stat_str(hw, aq_ret),
2370				 i40e_aq_str(hw, hw->aq.asq_last_status));
2371		}
2372	} else {
2373		aq_ret = i40e_aq_set_vsi_unicast_promiscuous(
2374						  hw,
2375						  vsi->seid,
2376						  promisc, NULL,
2377						  true);
2378		if (aq_ret) {
2379			dev_info(&pf->pdev->dev,
2380				 "set unicast promisc failed, err %s, aq_err %s\n",
2381				 i40e_stat_str(hw, aq_ret),
2382				 i40e_aq_str(hw, hw->aq.asq_last_status));
2383		}
2384		aq_ret = i40e_aq_set_vsi_multicast_promiscuous(
2385						  hw,
2386						  vsi->seid,
2387						  promisc, NULL);
2388		if (aq_ret) {
2389			dev_info(&pf->pdev->dev,
2390				 "set multicast promisc failed, err %s, aq_err %s\n",
2391				 i40e_stat_str(hw, aq_ret),
2392				 i40e_aq_str(hw, hw->aq.asq_last_status));
2393		}
2394	}
2395
2396	if (!aq_ret)
2397		pf->cur_promisc = promisc;
2398
2399	return aq_ret;
2400}
2401
2402/**
2403 * i40e_sync_vsi_filters - Update the VSI filter list to the HW
2404 * @vsi: ptr to the VSI
2405 *
2406 * Push any outstanding VSI filter changes through the AdminQ.
2407 *
2408 * Returns 0 or error value
2409 **/
2410int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
2411{
2412	struct hlist_head tmp_add_list, tmp_del_list;
2413	struct i40e_mac_filter *f;
2414	struct i40e_new_mac_filter *new, *add_head = NULL;
2415	struct i40e_hw *hw = &vsi->back->hw;
2416	bool old_overflow, new_overflow;
2417	unsigned int failed_filters = 0;
2418	unsigned int vlan_filters = 0;
2419	char vsi_name[16] = "PF";
2420	int filter_list_len = 0;
2421	i40e_status aq_ret = 0;
2422	u32 changed_flags = 0;
2423	struct hlist_node *h;
2424	struct i40e_pf *pf;
2425	int num_add = 0;
2426	int num_del = 0;
2427	int retval = 0;
2428	u16 cmd_flags;
2429	int list_size;
2430	int bkt;
2431
2432	/* empty array typed pointers, kcalloc later */
2433	struct i40e_aqc_add_macvlan_element_data *add_list;
2434	struct i40e_aqc_remove_macvlan_element_data *del_list;
2435
2436	while (test_and_set_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state))
2437		usleep_range(1000, 2000);
2438	pf = vsi->back;
2439
2440	old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2441
2442	if (vsi->netdev) {
2443		changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
2444		vsi->current_netdev_flags = vsi->netdev->flags;
2445	}
2446
2447	INIT_HLIST_HEAD(&tmp_add_list);
2448	INIT_HLIST_HEAD(&tmp_del_list);
2449
2450	if (vsi->type == I40E_VSI_SRIOV)
2451		snprintf(vsi_name, sizeof(vsi_name) - 1, "VF %d", vsi->vf_id);
2452	else if (vsi->type != I40E_VSI_MAIN)
2453		snprintf(vsi_name, sizeof(vsi_name) - 1, "vsi %d", vsi->seid);
2454
2455	if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
2456		vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
2457
2458		spin_lock_bh(&vsi->mac_filter_hash_lock);
2459		/* Create a list of filters to delete. */
2460		hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
2461			if (f->state == I40E_FILTER_REMOVE) {
2462				/* Move the element into temporary del_list */
2463				hash_del(&f->hlist);
2464				hlist_add_head(&f->hlist, &tmp_del_list);
2465
2466				/* Avoid counting removed filters */
2467				continue;
2468			}
2469			if (f->state == I40E_FILTER_NEW) {
2470				/* Create a temporary i40e_new_mac_filter */
2471				new = kzalloc(sizeof(*new), GFP_ATOMIC);
2472				if (!new)
2473					goto err_no_memory_locked;
2474
2475				/* Store pointer to the real filter */
2476				new->f = f;
2477				new->state = f->state;
2478
2479				/* Add it to the hash list */
2480				hlist_add_head(&new->hlist, &tmp_add_list);
2481			}
2482
2483			/* Count the number of active (current and new) VLAN
2484			 * filters we have now. Does not count filters which
2485			 * are marked for deletion.
2486			 */
2487			if (f->vlan > 0)
2488				vlan_filters++;
2489		}
2490
2491		retval = i40e_correct_mac_vlan_filters(vsi,
2492						       &tmp_add_list,
2493						       &tmp_del_list,
2494						       vlan_filters);
2495
2496		hlist_for_each_entry(new, &tmp_add_list, hlist)
2497			netdev_hw_addr_refcnt(new->f, vsi->netdev, 1);
2498
2499		if (retval)
2500			goto err_no_memory_locked;
2501
2502		spin_unlock_bh(&vsi->mac_filter_hash_lock);
2503	}
2504
2505	/* Now process 'del_list' outside the lock */
2506	if (!hlist_empty(&tmp_del_list)) {
2507		filter_list_len = hw->aq.asq_buf_size /
2508			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
2509		list_size = filter_list_len *
2510			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
2511		del_list = kzalloc(list_size, GFP_ATOMIC);
2512		if (!del_list)
2513			goto err_no_memory;
2514
2515		hlist_for_each_entry_safe(f, h, &tmp_del_list, hlist) {
2516			cmd_flags = 0;
2517
2518			/* handle broadcast filters by updating the broadcast
2519			 * promiscuous flag and release filter list.
2520			 */
2521			if (is_broadcast_ether_addr(f->macaddr)) {
2522				i40e_aqc_broadcast_filter(vsi, vsi_name, f);
2523
2524				hlist_del(&f->hlist);
2525				kfree(f);
2526				continue;
2527			}
2528
2529			/* add to delete list */
2530			ether_addr_copy(del_list[num_del].mac_addr, f->macaddr);
2531			if (f->vlan == I40E_VLAN_ANY) {
2532				del_list[num_del].vlan_tag = 0;
2533				cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
2534			} else {
2535				del_list[num_del].vlan_tag =
2536					cpu_to_le16((u16)(f->vlan));
2537			}
2538
2539			cmd_flags |= I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
2540			del_list[num_del].flags = cmd_flags;
2541			num_del++;
2542
2543			/* flush a full buffer */
2544			if (num_del == filter_list_len) {
2545				i40e_aqc_del_filters(vsi, vsi_name, del_list,
2546						     num_del, &retval);
2547				memset(del_list, 0, list_size);
2548				num_del = 0;
2549			}
2550			/* Release memory for MAC filter entries which were
2551			 * synced up with HW.
2552			 */
2553			hlist_del(&f->hlist);
2554			kfree(f);
2555		}
2556
2557		if (num_del) {
2558			i40e_aqc_del_filters(vsi, vsi_name, del_list,
2559					     num_del, &retval);
2560		}
2561
2562		kfree(del_list);
2563		del_list = NULL;
2564	}
2565
2566	if (!hlist_empty(&tmp_add_list)) {
2567		/* Do all the adds now. */
2568		filter_list_len = hw->aq.asq_buf_size /
2569			       sizeof(struct i40e_aqc_add_macvlan_element_data);
2570		list_size = filter_list_len *
2571			       sizeof(struct i40e_aqc_add_macvlan_element_data);
2572		add_list = kzalloc(list_size, GFP_ATOMIC);
2573		if (!add_list)
2574			goto err_no_memory;
2575
2576		num_add = 0;
2577		hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
2578			/* handle broadcast filters by updating the broadcast
2579			 * promiscuous flag instead of adding a MAC filter.
2580			 */
2581			if (is_broadcast_ether_addr(new->f->macaddr)) {
2582				if (i40e_aqc_broadcast_filter(vsi, vsi_name,
2583							      new->f))
2584					new->state = I40E_FILTER_FAILED;
2585				else
2586					new->state = I40E_FILTER_ACTIVE;
2587				continue;
2588			}
2589
2590			/* add to add array */
2591			if (num_add == 0)
2592				add_head = new;
2593			cmd_flags = 0;
2594			ether_addr_copy(add_list[num_add].mac_addr,
2595					new->f->macaddr);
2596			if (new->f->vlan == I40E_VLAN_ANY) {
2597				add_list[num_add].vlan_tag = 0;
2598				cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
2599			} else {
2600				add_list[num_add].vlan_tag =
2601					cpu_to_le16((u16)(new->f->vlan));
2602			}
2603			add_list[num_add].queue_number = 0;
2604			/* set invalid match method for later detection */
2605			add_list[num_add].match_method = I40E_AQC_MM_ERR_NO_RES;
2606			cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
2607			add_list[num_add].flags = cpu_to_le16(cmd_flags);
2608			num_add++;
2609
2610			/* flush a full buffer */
2611			if (num_add == filter_list_len) {
2612				i40e_aqc_add_filters(vsi, vsi_name, add_list,
2613						     add_head, num_add);
2614				memset(add_list, 0, list_size);
2615				num_add = 0;
2616			}
2617		}
2618		if (num_add) {
2619			i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
2620					     num_add);
2621		}
2622		/* Now move all of the filters from the temp add list back to
2623		 * the VSI's list.
2624		 */
2625		spin_lock_bh(&vsi->mac_filter_hash_lock);
2626		hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
2627			/* Only update the state if we're still NEW */
2628			if (new->f->state == I40E_FILTER_NEW)
2629				new->f->state = new->state;
2630			hlist_del(&new->hlist);
2631			netdev_hw_addr_refcnt(new->f, vsi->netdev, -1);
2632			kfree(new);
2633		}
2634		spin_unlock_bh(&vsi->mac_filter_hash_lock);
2635		kfree(add_list);
2636		add_list = NULL;
2637	}
2638
2639	/* Determine the number of active and failed filters. */
2640	spin_lock_bh(&vsi->mac_filter_hash_lock);
2641	vsi->active_filters = 0;
2642	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
2643		if (f->state == I40E_FILTER_ACTIVE)
2644			vsi->active_filters++;
2645		else if (f->state == I40E_FILTER_FAILED)
2646			failed_filters++;
2647	}
2648	spin_unlock_bh(&vsi->mac_filter_hash_lock);
2649
2650	/* Check if we are able to exit overflow promiscuous mode. We can
2651	 * safely exit if we didn't just enter, we no longer have any failed
2652	 * filters, and we have reduced filters below the threshold value.
2653	 */
2654	if (old_overflow && !failed_filters &&
2655	    vsi->active_filters < vsi->promisc_threshold) {
2656		dev_info(&pf->pdev->dev,
2657			 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
2658			 vsi_name);
2659		clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2660		vsi->promisc_threshold = 0;
2661	}
2662
2663	/* if the VF is not trusted do not do promisc */
2664	if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) {
2665		clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2666		goto out;
2667	}
2668
2669	new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
2670
2671	/* If we are entering overflow promiscuous, we need to calculate a new
2672	 * threshold for when we are safe to exit
2673	 */
2674	if (!old_overflow && new_overflow)
2675		vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
2676
2677	/* check for changes in promiscuous modes */
2678	if (changed_flags & IFF_ALLMULTI) {
2679		bool cur_multipromisc;
2680
2681		cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI);
2682		aq_ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw,
2683							       vsi->seid,
2684							       cur_multipromisc,
2685							       NULL);
2686		if (aq_ret) {
2687			retval = i40e_aq_rc_to_posix(aq_ret,
2688						     hw->aq.asq_last_status);
2689			dev_info(&pf->pdev->dev,
2690				 "set multi promisc failed on %s, err %s aq_err %s\n",
2691				 vsi_name,
2692				 i40e_stat_str(hw, aq_ret),
2693				 i40e_aq_str(hw, hw->aq.asq_last_status));
2694		} else {
2695			dev_info(&pf->pdev->dev, "%s allmulti mode.\n",
2696				 cur_multipromisc ? "entering" : "leaving");
2697		}
2698	}
2699
2700	if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) {
2701		bool cur_promisc;
2702
2703		cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
2704			       new_overflow);
2705		aq_ret = i40e_set_promiscuous(pf, cur_promisc);
2706		if (aq_ret) {
2707			retval = i40e_aq_rc_to_posix(aq_ret,
2708						     hw->aq.asq_last_status);
2709			dev_info(&pf->pdev->dev,
2710				 "Setting promiscuous %s failed on %s, err %s aq_err %s\n",
2711				 cur_promisc ? "on" : "off",
2712				 vsi_name,
2713				 i40e_stat_str(hw, aq_ret),
2714				 i40e_aq_str(hw, hw->aq.asq_last_status));
2715		}
2716	}
2717out:
2718	/* if something went wrong then set the changed flag so we try again */
2719	if (retval)
2720		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
2721
2722	clear_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state);
2723	return retval;
2724
2725err_no_memory:
2726	/* Restore elements on the temporary add and delete lists */
2727	spin_lock_bh(&vsi->mac_filter_hash_lock);
2728err_no_memory_locked:
2729	i40e_undo_del_filter_entries(vsi, &tmp_del_list);
2730	i40e_undo_add_filter_entries(vsi, &tmp_add_list);
2731	spin_unlock_bh(&vsi->mac_filter_hash_lock);
2732
2733	vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
2734	clear_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state);
2735	return -ENOMEM;
2736}
2737
2738/**
2739 * i40e_sync_filters_subtask - Sync the VSI filter list with HW
2740 * @pf: board private structure
2741 **/
2742static void i40e_sync_filters_subtask(struct i40e_pf *pf)
2743{
2744	int v;
2745
2746	if (!pf)
2747		return;
2748	if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state))
2749		return;
2750	if (test_bit(__I40E_VF_DISABLE, pf->state)) {
2751		set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
2752		return;
2753	}
2754
2755	for (v = 0; v < pf->num_alloc_vsi; v++) {
2756		if (pf->vsi[v] &&
2757		    (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED) &&
2758		    !test_bit(__I40E_VSI_RELEASING, pf->vsi[v]->state)) {
2759			int ret = i40e_sync_vsi_filters(pf->vsi[v]);
2760
2761			if (ret) {
2762				/* come back and try again later */
2763				set_bit(__I40E_MACVLAN_SYNC_PENDING,
2764					pf->state);
2765				break;
2766			}
2767		}
2768	}
2769}
2770
2771/**
2772 * i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP
2773 * @vsi: the vsi
2774 **/
2775static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi)
2776{
2777	if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
2778		return I40E_RXBUFFER_2048;
2779	else
2780		return I40E_RXBUFFER_3072;
2781}
2782
2783/**
2784 * i40e_change_mtu - NDO callback to change the Maximum Transfer Unit
2785 * @netdev: network interface device structure
2786 * @new_mtu: new value for maximum frame size
2787 *
2788 * Returns 0 on success, negative on failure
2789 **/
2790static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
2791{
2792	struct i40e_netdev_priv *np = netdev_priv(netdev);
2793	struct i40e_vsi *vsi = np->vsi;
2794	struct i40e_pf *pf = vsi->back;
2795
2796	if (i40e_enabled_xdp_vsi(vsi)) {
2797		int frame_size = new_mtu + I40E_PACKET_HDR_PAD;
2798
2799		if (frame_size > i40e_max_xdp_frame_size(vsi))
2800			return -EINVAL;
2801	}
2802
2803	netdev_dbg(netdev, "changing MTU from %d to %d\n",
2804		   netdev->mtu, new_mtu);
2805	netdev->mtu = new_mtu;
2806	if (netif_running(netdev))
2807		i40e_vsi_reinit_locked(vsi);
2808	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
2809	set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
2810	return 0;
2811}
2812
2813/**
2814 * i40e_ioctl - Access the hwtstamp interface
2815 * @netdev: network interface device structure
2816 * @ifr: interface request data
2817 * @cmd: ioctl command
2818 **/
2819int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
2820{
2821	struct i40e_netdev_priv *np = netdev_priv(netdev);
2822	struct i40e_pf *pf = np->vsi->back;
2823
2824	switch (cmd) {
2825	case SIOCGHWTSTAMP:
2826		return i40e_ptp_get_ts_config(pf, ifr);
2827	case SIOCSHWTSTAMP:
2828		return i40e_ptp_set_ts_config(pf, ifr);
2829	default:
2830		return -EOPNOTSUPP;
2831	}
2832}
2833
2834/**
2835 * i40e_vlan_stripping_enable - Turn on vlan stripping for the VSI
2836 * @vsi: the vsi being adjusted
2837 **/
2838void i40e_vlan_stripping_enable(struct i40e_vsi *vsi)
2839{
2840	struct i40e_vsi_context ctxt;
2841	i40e_status ret;
2842
2843	/* Don't modify stripping options if a port VLAN is active */
2844	if (vsi->info.pvid)
2845		return;
2846
2847	if ((vsi->info.valid_sections &
2848	     cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
2849	    ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_MODE_MASK) == 0))
2850		return;  /* already enabled */
2851
2852	vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
2853	vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
2854				    I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH;
2855
2856	ctxt.seid = vsi->seid;
2857	ctxt.info = vsi->info;
2858	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
2859	if (ret) {
2860		dev_info(&vsi->back->pdev->dev,
2861			 "update vlan stripping failed, err %s aq_err %s\n",
2862			 i40e_stat_str(&vsi->back->hw, ret),
2863			 i40e_aq_str(&vsi->back->hw,
2864				     vsi->back->hw.aq.asq_last_status));
2865	}
2866}
2867
2868/**
2869 * i40e_vlan_stripping_disable - Turn off vlan stripping for the VSI
2870 * @vsi: the vsi being adjusted
2871 **/
2872void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
2873{
2874	struct i40e_vsi_context ctxt;
2875	i40e_status ret;
2876
2877	/* Don't modify stripping options if a port VLAN is active */
2878	if (vsi->info.pvid)
2879		return;
2880
2881	if ((vsi->info.valid_sections &
2882	     cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
2883	    ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_EMOD_MASK) ==
2884	     I40E_AQ_VSI_PVLAN_EMOD_MASK))
2885		return;  /* already disabled */
2886
2887	vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
2888	vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
2889				    I40E_AQ_VSI_PVLAN_EMOD_NOTHING;
2890
2891	ctxt.seid = vsi->seid;
2892	ctxt.info = vsi->info;
2893	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
2894	if (ret) {
2895		dev_info(&vsi->back->pdev->dev,
2896			 "update vlan stripping failed, err %s aq_err %s\n",
2897			 i40e_stat_str(&vsi->back->hw, ret),
2898			 i40e_aq_str(&vsi->back->hw,
2899				     vsi->back->hw.aq.asq_last_status));
2900	}
2901}
2902
2903/**
2904 * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address
2905 * @vsi: the vsi being configured
2906 * @vid: vlan id to be added (0 = untagged only , -1 = any)
2907 *
2908 * This is a helper function for adding a new MAC/VLAN filter with the
2909 * specified VLAN for each existing MAC address already in the hash table.
2910 * This function does *not* perform any accounting to update filters based on
2911 * VLAN mode.
2912 *
2913 * NOTE: this function expects to be called while under the
2914 * mac_filter_hash_lock
2915 **/
2916int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
2917{
2918	struct i40e_mac_filter *f, *add_f;
2919	struct hlist_node *h;
2920	int bkt;
2921
2922	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
2923		if (f->state == I40E_FILTER_REMOVE)
2924			continue;
2925		add_f = i40e_add_filter(vsi, f->macaddr, vid);
2926		if (!add_f) {
2927			dev_info(&vsi->back->pdev->dev,
2928				 "Could not add vlan filter %d for %pM\n",
2929				 vid, f->macaddr);
2930			return -ENOMEM;
2931		}
2932	}
2933
2934	return 0;
2935}
2936
2937/**
2938 * i40e_vsi_add_vlan - Add VSI membership for given VLAN
2939 * @vsi: the VSI being configured
2940 * @vid: VLAN id to be added
2941 **/
2942int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid)
2943{
2944	int err;
2945
2946	if (vsi->info.pvid)
2947		return -EINVAL;
2948
2949	/* The network stack will attempt to add VID=0, with the intention to
2950	 * receive priority tagged packets with a VLAN of 0. Our HW receives
2951	 * these packets by default when configured to receive untagged
2952	 * packets, so we don't need to add a filter for this case.
2953	 * Additionally, HW interprets adding a VID=0 filter as meaning to
2954	 * receive *only* tagged traffic and stops receiving untagged traffic.
2955	 * Thus, we do not want to actually add a filter for VID=0
2956	 */
2957	if (!vid)
2958		return 0;
2959
2960	/* Locked once because all functions invoked below iterates list*/
2961	spin_lock_bh(&vsi->mac_filter_hash_lock);
2962	err = i40e_add_vlan_all_mac(vsi, vid);
2963	spin_unlock_bh(&vsi->mac_filter_hash_lock);
2964	if (err)
2965		return err;
2966
2967	/* schedule our worker thread which will take care of
2968	 * applying the new filter changes
2969	 */
2970	i40e_service_event_schedule(vsi->back);
2971	return 0;
2972}
2973
2974/**
2975 * i40e_rm_vlan_all_mac - Remove MAC/VLAN pair for all MAC with the given VLAN
2976 * @vsi: the vsi being configured
2977 * @vid: vlan id to be removed (0 = untagged only , -1 = any)
2978 *
2979 * This function should be used to remove all VLAN filters which match the
2980 * given VID. It does not schedule the service event and does not take the
2981 * mac_filter_hash_lock so it may be combined with other operations under
2982 * a single invocation of the mac_filter_hash_lock.
2983 *
2984 * NOTE: this function expects to be called while under the
2985 * mac_filter_hash_lock
2986 */
2987void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
2988{
2989	struct i40e_mac_filter *f;
2990	struct hlist_node *h;
2991	int bkt;
2992
2993	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
2994		if (f->vlan == vid)
2995			__i40e_del_filter(vsi, f);
2996	}
2997}
2998
2999/**
3000 * i40e_vsi_kill_vlan - Remove VSI membership for given VLAN
3001 * @vsi: the VSI being configured
3002 * @vid: VLAN id to be removed
3003 **/
3004void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid)
3005{
3006	if (!vid || vsi->info.pvid)
3007		return;
3008
3009	spin_lock_bh(&vsi->mac_filter_hash_lock);
3010	i40e_rm_vlan_all_mac(vsi, vid);
3011	spin_unlock_bh(&vsi->mac_filter_hash_lock);
3012
3013	/* schedule our worker thread which will take care of
3014	 * applying the new filter changes
3015	 */
3016	i40e_service_event_schedule(vsi->back);
3017}
3018
3019/**
3020 * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload
3021 * @netdev: network interface to be adjusted
3022 * @proto: unused protocol value
3023 * @vid: vlan id to be added
3024 *
3025 * net_device_ops implementation for adding vlan ids
3026 **/
3027static int i40e_vlan_rx_add_vid(struct net_device *netdev,
3028				__always_unused __be16 proto, u16 vid)
3029{
3030	struct i40e_netdev_priv *np = netdev_priv(netdev);
3031	struct i40e_vsi *vsi = np->vsi;
3032	int ret = 0;
3033
3034	if (vid >= VLAN_N_VID)
3035		return -EINVAL;
3036
3037	ret = i40e_vsi_add_vlan(vsi, vid);
3038	if (!ret)
3039		set_bit(vid, vsi->active_vlans);
3040
3041	return ret;
3042}
3043
3044/**
3045 * i40e_vlan_rx_add_vid_up - Add a vlan id filter to HW offload in UP path
3046 * @netdev: network interface to be adjusted
3047 * @proto: unused protocol value
3048 * @vid: vlan id to be added
3049 **/
3050static void i40e_vlan_rx_add_vid_up(struct net_device *netdev,
3051				    __always_unused __be16 proto, u16 vid)
3052{
3053	struct i40e_netdev_priv *np = netdev_priv(netdev);
3054	struct i40e_vsi *vsi = np->vsi;
3055
3056	if (vid >= VLAN_N_VID)
3057		return;
3058	set_bit(vid, vsi->active_vlans);
3059}
3060
3061/**
3062 * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
3063 * @netdev: network interface to be adjusted
3064 * @proto: unused protocol value
3065 * @vid: vlan id to be removed
3066 *
3067 * net_device_ops implementation for removing vlan ids
3068 **/
3069static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
3070				 __always_unused __be16 proto, u16 vid)
3071{
3072	struct i40e_netdev_priv *np = netdev_priv(netdev);
3073	struct i40e_vsi *vsi = np->vsi;
3074
3075	/* return code is ignored as there is nothing a user
3076	 * can do about failure to remove and a log message was
3077	 * already printed from the other function
3078	 */
3079	i40e_vsi_kill_vlan(vsi, vid);
3080
3081	clear_bit(vid, vsi->active_vlans);
3082
3083	return 0;
3084}
3085
3086/**
3087 * i40e_restore_vlan - Reinstate vlans when vsi/netdev comes back up
3088 * @vsi: the vsi being brought back up
3089 **/
3090static void i40e_restore_vlan(struct i40e_vsi *vsi)
3091{
3092	u16 vid;
3093
3094	if (!vsi->netdev)
3095		return;
3096
3097	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
3098		i40e_vlan_stripping_enable(vsi);
3099	else
3100		i40e_vlan_stripping_disable(vsi);
3101
3102	for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID)
3103		i40e_vlan_rx_add_vid_up(vsi->netdev, htons(ETH_P_8021Q),
3104					vid);
3105}
3106
3107/**
3108 * i40e_vsi_add_pvid - Add pvid for the VSI
3109 * @vsi: the vsi being adjusted
3110 * @vid: the vlan id to set as a PVID
3111 **/
3112int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid)
3113{
3114	struct i40e_vsi_context ctxt;
3115	i40e_status ret;
3116
3117	vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
3118	vsi->info.pvid = cpu_to_le16(vid);
3119	vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_TAGGED |
3120				    I40E_AQ_VSI_PVLAN_INSERT_PVID |
3121				    I40E_AQ_VSI_PVLAN_EMOD_STR;
3122
3123	ctxt.seid = vsi->seid;
3124	ctxt.info = vsi->info;
3125	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
3126	if (ret) {
3127		dev_info(&vsi->back->pdev->dev,
3128			 "add pvid failed, err %s aq_err %s\n",
3129			 i40e_stat_str(&vsi->back->hw, ret),
3130			 i40e_aq_str(&vsi->back->hw,
3131				     vsi->back->hw.aq.asq_last_status));
3132		return -ENOENT;
3133	}
3134
3135	return 0;
3136}
3137
3138/**
3139 * i40e_vsi_remove_pvid - Remove the pvid from the VSI
3140 * @vsi: the vsi being adjusted
3141 *
3142 * Just use the vlan_rx_register() service to put it back to normal
3143 **/
3144void i40e_vsi_remove_pvid(struct i40e_vsi *vsi)
3145{
3146	vsi->info.pvid = 0;
3147
3148	i40e_vlan_stripping_disable(vsi);
3149}
3150
3151/**
3152 * i40e_vsi_setup_tx_resources - Allocate VSI Tx queue resources
3153 * @vsi: ptr to the VSI
3154 *
3155 * If this function returns with an error, then it's possible one or
3156 * more of the rings is populated (while the rest are not).  It is the
3157 * callers duty to clean those orphaned rings.
3158 *
3159 * Return 0 on success, negative on failure
3160 **/
3161static int i40e_vsi_setup_tx_resources(struct i40e_vsi *vsi)
3162{
3163	int i, err = 0;
3164
3165	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
3166		err = i40e_setup_tx_descriptors(vsi->tx_rings[i]);
3167
3168	if (!i40e_enabled_xdp_vsi(vsi))
3169		return err;
3170
3171	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
3172		err = i40e_setup_tx_descriptors(vsi->xdp_rings[i]);
3173
3174	return err;
3175}
3176
3177/**
3178 * i40e_vsi_free_tx_resources - Free Tx resources for VSI queues
3179 * @vsi: ptr to the VSI
3180 *
3181 * Free VSI's transmit software resources
3182 **/
3183static void i40e_vsi_free_tx_resources(struct i40e_vsi *vsi)
3184{
3185	int i;
3186
3187	if (vsi->tx_rings) {
3188		for (i = 0; i < vsi->num_queue_pairs; i++)
3189			if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
3190				i40e_free_tx_resources(vsi->tx_rings[i]);
3191	}
3192
3193	if (vsi->xdp_rings) {
3194		for (i = 0; i < vsi->num_queue_pairs; i++)
3195			if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
3196				i40e_free_tx_resources(vsi->xdp_rings[i]);
3197	}
3198}
3199
3200/**
3201 * i40e_vsi_setup_rx_resources - Allocate VSI queues Rx resources
3202 * @vsi: ptr to the VSI
3203 *
3204 * If this function returns with an error, then it's possible one or
3205 * more of the rings is populated (while the rest are not).  It is the
3206 * callers duty to clean those orphaned rings.
3207 *
3208 * Return 0 on success, negative on failure
3209 **/
3210static int i40e_vsi_setup_rx_resources(struct i40e_vsi *vsi)
3211{
3212	int i, err = 0;
3213
3214	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
3215		err = i40e_setup_rx_descriptors(vsi->rx_rings[i]);
3216	return err;
3217}
3218
3219/**
3220 * i40e_vsi_free_rx_resources - Free Rx Resources for VSI queues
3221 * @vsi: ptr to the VSI
3222 *
3223 * Free all receive software resources
3224 **/
3225static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
3226{
3227	int i;
3228
3229	if (!vsi->rx_rings)
3230		return;
3231
3232	for (i = 0; i < vsi->num_queue_pairs; i++)
3233		if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
3234			i40e_free_rx_resources(vsi->rx_rings[i]);
3235}
3236
3237/**
3238 * i40e_config_xps_tx_ring - Configure XPS for a Tx ring
3239 * @ring: The Tx ring to configure
3240 *
3241 * This enables/disables XPS for a given Tx descriptor ring
3242 * based on the TCs enabled for the VSI that ring belongs to.
3243 **/
3244static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
3245{
3246	int cpu;
3247
3248	if (!ring->q_vector || !ring->netdev || ring->ch)
3249		return;
3250
3251	/* We only initialize XPS once, so as not to overwrite user settings */
3252	if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state))
3253		return;
3254
3255	cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
3256	netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
3257			    ring->queue_index);
3258}
3259
3260/**
3261 * i40e_xsk_pool - Retrieve the AF_XDP buffer pool if XDP and ZC is enabled
3262 * @ring: The Tx or Rx ring
3263 *
3264 * Returns the AF_XDP buffer pool or NULL.
3265 **/
3266static struct xsk_buff_pool *i40e_xsk_pool(struct i40e_ring *ring)
3267{
3268	bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
3269	int qid = ring->queue_index;
3270
3271	if (ring_is_xdp(ring))
3272		qid -= ring->vsi->alloc_queue_pairs;
3273
3274	if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps))
3275		return NULL;
3276
3277	return xsk_get_pool_from_qid(ring->vsi->netdev, qid);
3278}
3279
3280/**
3281 * i40e_configure_tx_ring - Configure a transmit ring context and rest
3282 * @ring: The Tx ring to configure
3283 *
3284 * Configure the Tx descriptor ring in the HMC context.
3285 **/
3286static int i40e_configure_tx_ring(struct i40e_ring *ring)
3287{
3288	struct i40e_vsi *vsi = ring->vsi;
3289	u16 pf_q = vsi->base_queue + ring->queue_index;
3290	struct i40e_hw *hw = &vsi->back->hw;
3291	struct i40e_hmc_obj_txq tx_ctx;
3292	i40e_status err = 0;
3293	u32 qtx_ctl = 0;
3294
3295	if (ring_is_xdp(ring))
3296		ring->xsk_pool = i40e_xsk_pool(ring);
3297
3298	/* some ATR related tx ring init */
3299	if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {
3300		ring->atr_sample_rate = vsi->back->atr_sample_rate;
3301		ring->atr_count = 0;
3302	} else {
3303		ring->atr_sample_rate = 0;
3304	}
3305
3306	/* configure XPS */
3307	i40e_config_xps_tx_ring(ring);
3308
3309	/* clear the context structure first */
3310	memset(&tx_ctx, 0, sizeof(tx_ctx));
3311
3312	tx_ctx.new_context = 1;
3313	tx_ctx.base = (ring->dma / 128);
3314	tx_ctx.qlen = ring->count;
3315	tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
3316					       I40E_FLAG_FD_ATR_ENABLED));
3317	tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
3318	/* FDIR VSI tx ring can still use RS bit and writebacks */
3319	if (vsi->type != I40E_VSI_FDIR)
3320		tx_ctx.head_wb_ena = 1;
3321	tx_ctx.head_wb_addr = ring->dma +
3322			      (ring->count * sizeof(struct i40e_tx_desc));
3323
3324	/* As part of VSI creation/update, FW allocates certain
3325	 * Tx arbitration queue sets for each TC enabled for
3326	 * the VSI. The FW returns the handles to these queue
3327	 * sets as part of the response buffer to Add VSI,
3328	 * Update VSI, etc. AQ commands. It is expected that
3329	 * these queue set handles be associated with the Tx
3330	 * queues by the driver as part of the TX queue context
3331	 * initialization. This has to be done regardless of
3332	 * DCB as by default everything is mapped to TC0.
3333	 */
3334
3335	if (ring->ch)
3336		tx_ctx.rdylist =
3337			le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]);
3338
3339	else
3340		tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
3341
3342	tx_ctx.rdylist_act = 0;
3343
3344	/* clear the context in the HMC */
3345	err = i40e_clear_lan_tx_queue_context(hw, pf_q);
3346	if (err) {
3347		dev_info(&vsi->back->pdev->dev,
3348			 "Failed to clear LAN Tx queue context on Tx ring %d (pf_q %d), error: %d\n",
3349			 ring->queue_index, pf_q, err);
3350		return -ENOMEM;
3351	}
3352
3353	/* set the context in the HMC */
3354	err = i40e_set_lan_tx_queue_context(hw, pf_q, &tx_ctx);
3355	if (err) {
3356		dev_info(&vsi->back->pdev->dev,
3357			 "Failed to set LAN Tx queue context on Tx ring %d (pf_q %d, error: %d\n",
3358			 ring->queue_index, pf_q, err);
3359		return -ENOMEM;
3360	}
3361
3362	/* Now associate this queue with this PCI function */
3363	if (ring->ch) {
3364		if (ring->ch->type == I40E_VSI_VMDQ2)
3365			qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
3366		else
3367			return -EINVAL;
3368
3369		qtx_ctl |= (ring->ch->vsi_number <<
3370			    I40E_QTX_CTL_VFVM_INDX_SHIFT) &
3371			    I40E_QTX_CTL_VFVM_INDX_MASK;
3372	} else {
3373		if (vsi->type == I40E_VSI_VMDQ2) {
3374			qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
3375			qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
3376				    I40E_QTX_CTL_VFVM_INDX_MASK;
3377		} else {
3378			qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
3379		}
3380	}
3381
3382	qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
3383		    I40E_QTX_CTL_PF_INDX_MASK);
3384	wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl);
3385	i40e_flush(hw);
3386
3387	/* cache tail off for easier writes later */
3388	ring->tail = hw->hw_addr + I40E_QTX_TAIL(pf_q);
3389
3390	return 0;
3391}
3392
3393/**
3394 * i40e_configure_rx_ring - Configure a receive ring context
3395 * @ring: The Rx ring to configure
3396 *
3397 * Configure the Rx descriptor ring in the HMC context.
3398 **/
3399static int i40e_configure_rx_ring(struct i40e_ring *ring)
3400{
3401	struct i40e_vsi *vsi = ring->vsi;
3402	u32 chain_len = vsi->back->hw.func_caps.rx_buf_chain_len;
3403	u16 pf_q = vsi->base_queue + ring->queue_index;
3404	struct i40e_hw *hw = &vsi->back->hw;
3405	struct i40e_hmc_obj_rxq rx_ctx;
3406	i40e_status err = 0;
3407	bool ok;
3408	int ret;
3409
3410	bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
3411
3412	/* clear the context structure first */
3413	memset(&rx_ctx, 0, sizeof(rx_ctx));
3414
3415	if (ring->vsi->type == I40E_VSI_MAIN)
3416		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
3417
3418	ring->xsk_pool = i40e_xsk_pool(ring);
3419	if (ring->xsk_pool) {
3420		ring->rx_buf_len =
3421		  xsk_pool_get_rx_frame_size(ring->xsk_pool);
3422		/* For AF_XDP ZC, we disallow packets to span on
3423		 * multiple buffers, thus letting us skip that
3424		 * handling in the fast-path.
3425		 */
3426		chain_len = 1;
3427		ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
3428						 MEM_TYPE_XSK_BUFF_POOL,
3429						 NULL);
3430		if (ret)
3431			return ret;
3432		dev_info(&vsi->back->pdev->dev,
3433			 "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
3434			 ring->queue_index);
3435
3436	} else {
3437		ring->rx_buf_len = vsi->rx_buf_len;
3438		if (ring->vsi->type == I40E_VSI_MAIN) {
3439			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
3440							 MEM_TYPE_PAGE_SHARED,
3441							 NULL);
3442			if (ret)
3443				return ret;
3444		}
3445	}
3446
3447	rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
3448				    BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
3449
3450	rx_ctx.base = (ring->dma / 128);
3451	rx_ctx.qlen = ring->count;
3452
3453	/* use 16 byte descriptors */
3454	rx_ctx.dsize = 0;
3455
3456	/* descriptor type is always zero
3457	 * rx_ctx.dtype = 0;
3458	 */
3459	rx_ctx.hsplit_0 = 0;
3460
3461	rx_ctx.rxmax = min_t(u16, vsi->max_frame, chain_len * ring->rx_buf_len);
3462	if (hw->revision_id == 0)
3463		rx_ctx.lrxqthresh = 0;
3464	else
3465		rx_ctx.lrxqthresh = 1;
3466	rx_ctx.crcstrip = 1;
3467	rx_ctx.l2tsel = 1;
3468	/* this controls whether VLAN is stripped from inner headers */
3469	rx_ctx.showiv = 0;
3470	/* set the prefena field to 1 because the manual says to */
3471	rx_ctx.prefena = 1;
3472
3473	/* clear the context in the HMC */
3474	err = i40e_clear_lan_rx_queue_context(hw, pf_q);
3475	if (err) {
3476		dev_info(&vsi->back->pdev->dev,
3477			 "Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
3478			 ring->queue_index, pf_q, err);
3479		return -ENOMEM;
3480	}
3481
3482	/* set the context in the HMC */
3483	err = i40e_set_lan_rx_queue_context(hw, pf_q, &rx_ctx);
3484	if (err) {
3485		dev_info(&vsi->back->pdev->dev,
3486			 "Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
3487			 ring->queue_index, pf_q, err);
3488		return -ENOMEM;
3489	}
3490
3491	/* configure Rx buffer alignment */
3492	if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
3493		clear_ring_build_skb_enabled(ring);
3494	else
3495		set_ring_build_skb_enabled(ring);
3496
3497	/* cache tail for quicker writes, and clear the reg before use */
3498	ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
3499	writel(0, ring->tail);
3500
3501	if (ring->xsk_pool) {
3502		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
3503		ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring));
3504	} else {
3505		ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
3506	}
3507	if (!ok) {
3508		/* Log this in case the user has forgotten to give the kernel
3509		 * any buffers, even later in the application.
3510		 */
3511		dev_info(&vsi->back->pdev->dev,
3512			 "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n",
3513			 ring->xsk_pool ? "AF_XDP ZC enabled " : "",
3514			 ring->queue_index, pf_q);
3515	}
3516
3517	return 0;
3518}
3519
3520/**
3521 * i40e_vsi_configure_tx - Configure the VSI for Tx
3522 * @vsi: VSI structure describing this set of rings and resources
3523 *
3524 * Configure the Tx VSI for operation.
3525 **/
3526static int i40e_vsi_configure_tx(struct i40e_vsi *vsi)
3527{
3528	int err = 0;
3529	u16 i;
3530
3531	for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
3532		err = i40e_configure_tx_ring(vsi->tx_rings[i]);
3533
3534	if (err || !i40e_enabled_xdp_vsi(vsi))
3535		return err;
3536
3537	for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
3538		err = i40e_configure_tx_ring(vsi->xdp_rings[i]);
3539
3540	return err;
3541}
3542
3543/**
3544 * i40e_vsi_configure_rx - Configure the VSI for Rx
3545 * @vsi: the VSI being configured
3546 *
3547 * Configure the Rx VSI for operation.
3548 **/
3549static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
3550{
3551	int err = 0;
3552	u16 i;
3553
3554	if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
3555		vsi->max_frame = I40E_MAX_RXBUFFER;
3556		vsi->rx_buf_len = I40E_RXBUFFER_2048;
3557#if (PAGE_SIZE < 8192)
3558	} else if (!I40E_2K_TOO_SMALL_WITH_PADDING &&
3559		   (vsi->netdev->mtu <= ETH_DATA_LEN)) {
3560		vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
3561		vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
3562#endif
3563	} else {
3564		vsi->max_frame = I40E_MAX_RXBUFFER;
3565		vsi->rx_buf_len = (PAGE_SIZE < 8192) ? I40E_RXBUFFER_3072 :
3566						       I40E_RXBUFFER_2048;
3567	}
3568
3569	/* set up individual rings */
3570	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
3571		err = i40e_configure_rx_ring(vsi->rx_rings[i]);
3572
3573	return err;
3574}
3575
3576/**
3577 * i40e_vsi_config_dcb_rings - Update rings to reflect DCB TC
3578 * @vsi: ptr to the VSI
3579 **/
3580static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
3581{
3582	struct i40e_ring *tx_ring, *rx_ring;
3583	u16 qoffset, qcount;
3584	int i, n;
3585
3586	if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
3587		/* Reset the TC information */
3588		for (i = 0; i < vsi->num_queue_pairs; i++) {
3589			rx_ring = vsi->rx_rings[i];
3590			tx_ring = vsi->tx_rings[i];
3591			rx_ring->dcb_tc = 0;
3592			tx_ring->dcb_tc = 0;
3593		}
3594		return;
3595	}
3596
3597	for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
3598		if (!(vsi->tc_config.enabled_tc & BIT_ULL(n)))
3599			continue;
3600
3601		qoffset = vsi->tc_config.tc_info[n].qoffset;
3602		qcount = vsi->tc_config.tc_info[n].qcount;
3603		for (i = qoffset; i < (qoffset + qcount); i++) {
3604			rx_ring = vsi->rx_rings[i];
3605			tx_ring = vsi->tx_rings[i];
3606			rx_ring->dcb_tc = n;
3607			tx_ring->dcb_tc = n;
3608		}
3609	}
3610}
3611
3612/**
3613 * i40e_set_vsi_rx_mode - Call set_rx_mode on a VSI
3614 * @vsi: ptr to the VSI
3615 **/
3616static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
3617{
3618	if (vsi->netdev)
3619		i40e_set_rx_mode(vsi->netdev);
3620}
3621
3622/**
3623 * i40e_fdir_filter_restore - Restore the Sideband Flow Director filters
3624 * @vsi: Pointer to the targeted VSI
3625 *
3626 * This function replays the hlist on the hw where all the SB Flow Director
3627 * filters were saved.
3628 **/
3629static void i40e_fdir_filter_restore(struct i40e_vsi *vsi)
3630{
3631	struct i40e_fdir_filter *filter;
3632	struct i40e_pf *pf = vsi->back;
3633	struct hlist_node *node;
3634
3635	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
3636		return;
3637
3638	/* Reset FDir counters as we're replaying all existing filters */
3639	pf->fd_tcp4_filter_cnt = 0;
3640	pf->fd_udp4_filter_cnt = 0;
3641	pf->fd_sctp4_filter_cnt = 0;
3642	pf->fd_ip4_filter_cnt = 0;
3643
3644	hlist_for_each_entry_safe(filter, node,
3645				  &pf->fdir_filter_list, fdir_node) {
3646		i40e_add_del_fdir(vsi, filter, true);
3647	}
3648}
3649
3650/**
3651 * i40e_vsi_configure - Set up the VSI for action
3652 * @vsi: the VSI being configured
3653 **/
3654static int i40e_vsi_configure(struct i40e_vsi *vsi)
3655{
3656	int err;
3657
3658	i40e_set_vsi_rx_mode(vsi);
3659	i40e_restore_vlan(vsi);
3660	i40e_vsi_config_dcb_rings(vsi);
3661	err = i40e_vsi_configure_tx(vsi);
3662	if (!err)
3663		err = i40e_vsi_configure_rx(vsi);
3664
3665	return err;
3666}
3667
3668/**
3669 * i40e_vsi_configure_msix - MSIX mode Interrupt Config in the HW
3670 * @vsi: the VSI being configured
3671 **/
3672static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
3673{
3674	bool has_xdp = i40e_enabled_xdp_vsi(vsi);
3675	struct i40e_pf *pf = vsi->back;
3676	struct i40e_hw *hw = &pf->hw;
3677	u16 vector;
3678	int i, q;
3679	u32 qp;
3680
3681	/* The interrupt indexing is offset by 1 in the PFINT_ITRn
3682	 * and PFINT_LNKLSTn registers, e.g.:
3683	 *   PFINT_ITRn[0..n-1] gets msix-1..msix-n  (qpair interrupts)
3684	 */
3685	qp = vsi->base_queue;
3686	vector = vsi->base_vector;
3687	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
3688		struct i40e_q_vector *q_vector = vsi->q_vectors[i];
3689
3690		q_vector->rx.next_update = jiffies + 1;
3691		q_vector->rx.target_itr =
3692			ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
3693		wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
3694		     q_vector->rx.target_itr >> 1);
3695		q_vector->rx.current_itr = q_vector->rx.target_itr;
3696
3697		q_vector->tx.next_update = jiffies + 1;
3698		q_vector->tx.target_itr =
3699			ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
3700		wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
3701		     q_vector->tx.target_itr >> 1);
3702		q_vector->tx.current_itr = q_vector->tx.target_itr;
3703
3704		wr32(hw, I40E_PFINT_RATEN(vector - 1),
3705		     i40e_intrl_usec_to_reg(vsi->int_rate_limit));
3706
3707		/* Linked list for the queuepairs assigned to this vector */
3708		wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp);
3709		for (q = 0; q < q_vector->num_ringpairs; q++) {
3710			u32 nextqp = has_xdp ? qp + vsi->alloc_queue_pairs : qp;
3711			u32 val;
3712
3713			val = I40E_QINT_RQCTL_CAUSE_ENA_MASK |
3714			      (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
3715			      (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
3716			      (nextqp << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
3717			      (I40E_QUEUE_TYPE_TX <<
3718			       I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT);
3719
3720			wr32(hw, I40E_QINT_RQCTL(qp), val);
3721
3722			if (has_xdp) {
3723				val = I40E_QINT_TQCTL_CAUSE_ENA_MASK |
3724				      (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
3725				      (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
3726				      (qp << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
3727				      (I40E_QUEUE_TYPE_TX <<
3728				       I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
3729
3730				wr32(hw, I40E_QINT_TQCTL(nextqp), val);
3731			}
3732
3733			val = I40E_QINT_TQCTL_CAUSE_ENA_MASK |
3734			      (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
3735			      (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
3736			      ((qp + 1) << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
3737			      (I40E_QUEUE_TYPE_RX <<
3738			       I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
3739
3740			/* Terminate the linked list */
3741			if (q == (q_vector->num_ringpairs - 1))
3742				val |= (I40E_QUEUE_END_OF_LIST <<
3743					I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT);
3744
3745			wr32(hw, I40E_QINT_TQCTL(qp), val);
3746			qp++;
3747		}
3748	}
3749
3750	i40e_flush(hw);
3751}
3752
3753/**
3754 * i40e_enable_misc_int_causes - enable the non-queue interrupts
3755 * @pf: pointer to private device data structure
3756 **/
3757static void i40e_enable_misc_int_causes(struct i40e_pf *pf)
3758{
3759	struct i40e_hw *hw = &pf->hw;
3760	u32 val;
3761
3762	/* clear things first */
3763	wr32(hw, I40E_PFINT_ICR0_ENA, 0);  /* disable all */
3764	rd32(hw, I40E_PFINT_ICR0);         /* read to clear */
3765
3766	val = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK       |
3767	      I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK    |
3768	      I40E_PFINT_ICR0_ENA_GRST_MASK          |
3769	      I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK |
3770	      I40E_PFINT_ICR0_ENA_GPIO_MASK          |
3771	      I40E_PFINT_ICR0_ENA_HMC_ERR_MASK       |
3772	      I40E_PFINT_ICR0_ENA_VFLR_MASK          |
3773	      I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
3774
3775	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
3776		val |= I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
3777
3778	if (pf->flags & I40E_FLAG_PTP)
3779		val |= I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
3780
3781	wr32(hw, I40E_PFINT_ICR0_ENA, val);
3782
3783	/* SW_ITR_IDX = 0, but don't change INTENA */
3784	wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK |
3785					I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK);
3786
3787	/* OTHER_ITR_IDX = 0 */
3788	wr32(hw, I40E_PFINT_STAT_CTL0, 0);
3789}
3790
3791/**
3792 * i40e_configure_msi_and_legacy - Legacy mode interrupt config in the HW
3793 * @vsi: the VSI being configured
3794 **/
3795static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
3796{
3797	u32 nextqp = i40e_enabled_xdp_vsi(vsi) ? vsi->alloc_queue_pairs : 0;
3798	struct i40e_q_vector *q_vector = vsi->q_vectors[0];
3799	struct i40e_pf *pf = vsi->back;
3800	struct i40e_hw *hw = &pf->hw;
3801	u32 val;
3802
3803	/* set the ITR configuration */
3804	q_vector->rx.next_update = jiffies + 1;
3805	q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
3806	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr >> 1);
3807	q_vector->rx.current_itr = q_vector->rx.target_itr;
3808	q_vector->tx.next_update = jiffies + 1;
3809	q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
3810	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr >> 1);
3811	q_vector->tx.current_itr = q_vector->tx.target_itr;
3812
3813	i40e_enable_misc_int_causes(pf);
3814
3815	/* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */
3816	wr32(hw, I40E_PFINT_LNKLST0, 0);
3817
3818	/* Associate the queue pair to the vector and enable the queue int */
3819	val = I40E_QINT_RQCTL_CAUSE_ENA_MASK		       |
3820	      (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT)  |
3821	      (nextqp	   << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT)|
3822	      (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
3823
3824	wr32(hw, I40E_QINT_RQCTL(0), val);
3825
3826	if (i40e_enabled_xdp_vsi(vsi)) {
3827		val = I40E_QINT_TQCTL_CAUSE_ENA_MASK		     |
3828		      (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT)|
3829		      (I40E_QUEUE_TYPE_TX
3830		       << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
3831
3832		wr32(hw, I40E_QINT_TQCTL(nextqp), val);
3833	}
3834
3835	val = I40E_QINT_TQCTL_CAUSE_ENA_MASK		      |
3836	      (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
3837	      (I40E_QUEUE_END_OF_LIST << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT);
3838
3839	wr32(hw, I40E_QINT_TQCTL(0), val);
3840	i40e_flush(hw);
3841}
3842
3843/**
3844 * i40e_irq_dynamic_disable_icr0 - Disable default interrupt generation for icr0
3845 * @pf: board private structure
3846 **/
3847void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf)
3848{
3849	struct i40e_hw *hw = &pf->hw;
3850
3851	wr32(hw, I40E_PFINT_DYN_CTL0,
3852	     I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
3853	i40e_flush(hw);
3854}
3855
3856/**
3857 * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
3858 * @pf: board private structure
3859 **/
3860void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
3861{
3862	struct i40e_hw *hw = &pf->hw;
3863	u32 val;
3864
3865	val = I40E_PFINT_DYN_CTL0_INTENA_MASK   |
3866	      I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
3867	      (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
3868
3869	wr32(hw, I40E_PFINT_DYN_CTL0, val);
3870	i40e_flush(hw);
3871}
3872
3873/**
3874 * i40e_msix_clean_rings - MSIX mode Interrupt Handler
3875 * @irq: interrupt number
3876 * @data: pointer to a q_vector
3877 **/
3878static irqreturn_t i40e_msix_clean_rings(int irq, void *data)
3879{
3880	struct i40e_q_vector *q_vector = data;
3881
3882	if (!q_vector->tx.ring && !q_vector->rx.ring)
3883		return IRQ_HANDLED;
3884
3885	napi_schedule_irqoff(&q_vector->napi);
3886
3887	return IRQ_HANDLED;
3888}
3889
3890/**
3891 * i40e_irq_affinity_notify - Callback for affinity changes
3892 * @notify: context as to what irq was changed
3893 * @mask: the new affinity mask
3894 *
3895 * This is a callback function used by the irq_set_affinity_notifier function
3896 * so that we may register to receive changes to the irq affinity masks.
3897 **/
3898static void i40e_irq_affinity_notify(struct irq_affinity_notify *notify,
3899				     const cpumask_t *mask)
3900{
3901	struct i40e_q_vector *q_vector =
3902		container_of(notify, struct i40e_q_vector, affinity_notify);
3903
3904	cpumask_copy(&q_vector->affinity_mask, mask);
3905}
3906
3907/**
3908 * i40e_irq_affinity_release - Callback for affinity notifier release
3909 * @ref: internal core kernel usage
3910 *
3911 * This is a callback function used by the irq_set_affinity_notifier function
3912 * to inform the current notification subscriber that they will no longer
3913 * receive notifications.
3914 **/
3915static void i40e_irq_affinity_release(struct kref *ref) {}
3916
3917/**
3918 * i40e_vsi_request_irq_msix - Initialize MSI-X interrupts
3919 * @vsi: the VSI being configured
3920 * @basename: name for the vector
3921 *
3922 * Allocates MSI-X vectors and requests interrupts from the kernel.
3923 **/
3924static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
3925{
3926	int q_vectors = vsi->num_q_vectors;
3927	struct i40e_pf *pf = vsi->back;
3928	int base = vsi->base_vector;
3929	int rx_int_idx = 0;
3930	int tx_int_idx = 0;
3931	int vector, err;
3932	int irq_num;
3933	int cpu;
3934
3935	for (vector = 0; vector < q_vectors; vector++) {
3936		struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
3937
3938		irq_num = pf->msix_entries[base + vector].vector;
3939
3940		if (q_vector->tx.ring && q_vector->rx.ring) {
3941			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
3942				 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
3943			tx_int_idx++;
3944		} else if (q_vector->rx.ring) {
3945			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
3946				 "%s-%s-%d", basename, "rx", rx_int_idx++);
3947		} else if (q_vector->tx.ring) {
3948			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
3949				 "%s-%s-%d", basename, "tx", tx_int_idx++);
3950		} else {
3951			/* skip this unused q_vector */
3952			continue;
3953		}
3954		err = request_irq(irq_num,
3955				  vsi->irq_handler,
3956				  0,
3957				  q_vector->name,
3958				  q_vector);
3959		if (err) {
3960			dev_info(&pf->pdev->dev,
3961				 "MSIX request_irq failed, error: %d\n", err);
3962			goto free_queue_irqs;
3963		}
3964
3965		/* register for affinity change notifications */
3966		q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
3967		q_vector->affinity_notify.release = i40e_irq_affinity_release;
3968		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
3969		/* Spread affinity hints out across online CPUs.
3970		 *
3971		 * get_cpu_mask returns a static constant mask with
3972		 * a permanent lifetime so it's ok to pass to
3973		 * irq_set_affinity_hint without making a copy.
3974		 */
3975		cpu = cpumask_local_spread(q_vector->v_idx, -1);
3976		irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
3977	}
3978
3979	vsi->irqs_ready = true;
3980	return 0;
3981
3982free_queue_irqs:
3983	while (vector) {
3984		vector--;
3985		irq_num = pf->msix_entries[base + vector].vector;
3986		irq_set_affinity_notifier(irq_num, NULL);
3987		irq_set_affinity_hint(irq_num, NULL);
3988		free_irq(irq_num, &vsi->q_vectors[vector]);
3989	}
3990	return err;
3991}
3992
3993/**
3994 * i40e_vsi_disable_irq - Mask off queue interrupt generation on the VSI
3995 * @vsi: the VSI being un-configured
3996 **/
3997static void i40e_vsi_disable_irq(struct i40e_vsi *vsi)
3998{
3999	struct i40e_pf *pf = vsi->back;
4000	struct i40e_hw *hw = &pf->hw;
4001	int base = vsi->base_vector;
4002	int i;
4003
4004	/* disable interrupt causation from each queue */
4005	for (i = 0; i < vsi->num_queue_pairs; i++) {
4006		u32 val;
4007
4008		val = rd32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx));
4009		val &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
4010		wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val);
4011
4012		val = rd32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx));
4013		val &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
4014		wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), val);
4015
4016		if (!i40e_enabled_xdp_vsi(vsi))
4017			continue;
4018		wr32(hw, I40E_QINT_TQCTL(vsi->xdp_rings[i]->reg_idx), 0);
4019	}
4020
4021	/* disable each interrupt */
4022	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
4023		for (i = vsi->base_vector;
4024		     i < (vsi->num_q_vectors + vsi->base_vector); i++)
4025			wr32(hw, I40E_PFINT_DYN_CTLN(i - 1), 0);
4026
4027		i40e_flush(hw);
4028		for (i = 0; i < vsi->num_q_vectors; i++)
4029			synchronize_irq(pf->msix_entries[i + base].vector);
4030	} else {
4031		/* Legacy and MSI mode - this stops all interrupt handling */
4032		wr32(hw, I40E_PFINT_ICR0_ENA, 0);
4033		wr32(hw, I40E_PFINT_DYN_CTL0, 0);
4034		i40e_flush(hw);
4035		synchronize_irq(pf->pdev->irq);
4036	}
4037}
4038
4039/**
4040 * i40e_vsi_enable_irq - Enable IRQ for the given VSI
4041 * @vsi: the VSI being configured
4042 **/
4043static int i40e_vsi_enable_irq(struct i40e_vsi *vsi)
4044{
4045	struct i40e_pf *pf = vsi->back;
4046	int i;
4047
4048	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
4049		for (i = 0; i < vsi->num_q_vectors; i++)
4050			i40e_irq_dynamic_enable(vsi, i);
4051	} else {
4052		i40e_irq_dynamic_enable_icr0(pf);
4053	}
4054
4055	i40e_flush(&pf->hw);
4056	return 0;
4057}
4058
4059/**
4060 * i40e_free_misc_vector - Free the vector that handles non-queue events
4061 * @pf: board private structure
4062 **/
4063static void i40e_free_misc_vector(struct i40e_pf *pf)
4064{
4065	/* Disable ICR 0 */
4066	wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
4067	i40e_flush(&pf->hw);
4068
4069	if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
4070		synchronize_irq(pf->msix_entries[0].vector);
4071		free_irq(pf->msix_entries[0].vector, pf);
4072		clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
4073	}
4074}
4075
4076/**
4077 * i40e_intr - MSI/Legacy and non-queue interrupt handler
4078 * @irq: interrupt number
4079 * @data: pointer to a q_vector
4080 *
4081 * This is the handler used for all MSI/Legacy interrupts, and deals
4082 * with both queue and non-queue interrupts.  This is also used in
4083 * MSIX mode to handle the non-queue interrupts.
4084 **/
4085static irqreturn_t i40e_intr(int irq, void *data)
4086{
4087	struct i40e_pf *pf = (struct i40e_pf *)data;
4088	struct i40e_hw *hw = &pf->hw;
4089	irqreturn_t ret = IRQ_NONE;
4090	u32 icr0, icr0_remaining;
4091	u32 val, ena_mask;
4092
4093	icr0 = rd32(hw, I40E_PFINT_ICR0);
4094	ena_mask = rd32(hw, I40E_PFINT_ICR0_ENA);
4095
4096	/* if sharing a legacy IRQ, we might get called w/o an intr pending */
4097	if ((icr0 & I40E_PFINT_ICR0_INTEVENT_MASK) == 0)
4098		goto enable_intr;
4099
4100	/* if interrupt but no bits showing, must be SWINT */
4101	if (((icr0 & ~I40E_PFINT_ICR0_INTEVENT_MASK) == 0) ||
4102	    (icr0 & I40E_PFINT_ICR0_SWINT_MASK))
4103		pf->sw_int_count++;
4104
4105	if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
4106	    (icr0 & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) {
4107		ena_mask &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
4108		dev_dbg(&pf->pdev->dev, "cleared PE_CRITERR\n");
4109		set_bit(__I40E_CORE_RESET_REQUESTED, pf->state);
4110	}
4111
4112	/* only q0 is used in MSI/Legacy mode, and none are used in MSIX */
4113	if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) {
4114		struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
4115		struct i40e_q_vector *q_vector = vsi->q_vectors[0];
4116
4117		/* We do not have a way to disarm Queue causes while leaving
4118		 * interrupt enabled for all other causes, ideally
4119		 * interrupt should be disabled while we are in NAPI but
4120		 * this is not a performance path and napi_schedule()
4121		 * can deal with rescheduling.
4122		 */
4123		if (!test_bit(__I40E_DOWN, pf->state))
4124			napi_schedule_irqoff(&q_vector->napi);
4125	}
4126
4127	if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
4128		ena_mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
4129		set_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state);
4130		i40e_debug(&pf->hw, I40E_DEBUG_NVM, "AdminQ event\n");
4131	}
4132
4133	if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) {
4134		ena_mask &= ~I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
4135		set_bit(__I40E_MDD_EVENT_PENDING, pf->state);
4136	}
4137
4138	if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) {
4139		/* disable any further VFLR event notifications */
4140		if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state)) {
4141			u32 reg = rd32(hw, I40E_PFINT_ICR0_ENA);
4142
4143			reg &= ~I40E_PFINT_ICR0_VFLR_MASK;
4144			wr32(hw, I40E_PFINT_ICR0_ENA, reg);
4145		} else {
4146			ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK;
4147			set_bit(__I40E_VFLR_EVENT_PENDING, pf->state);
4148		}
4149	}
4150
4151	if (icr0 & I40E_PFINT_ICR0_GRST_MASK) {
4152		if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
4153			set_bit(__I40E_RESET_INTR_RECEIVED, pf->state);
4154		ena_mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK;
4155		val = rd32(hw, I40E_GLGEN_RSTAT);
4156		val = (val & I40E_GLGEN_RSTAT_RESET_TYPE_MASK)
4157		       >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT;
4158		if (val == I40E_RESET_CORER) {
4159			pf->corer_count++;
4160		} else if (val == I40E_RESET_GLOBR) {
4161			pf->globr_count++;
4162		} else if (val == I40E_RESET_EMPR) {
4163			pf->empr_count++;
4164			set_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state);
4165		}
4166	}
4167
4168	if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK) {
4169		icr0 &= ~I40E_PFINT_ICR0_HMC_ERR_MASK;
4170		dev_info(&pf->pdev->dev, "HMC error interrupt\n");
4171		dev_info(&pf->pdev->dev, "HMC error info 0x%x, HMC error data 0x%x\n",
4172			 rd32(hw, I40E_PFHMC_ERRORINFO),
4173			 rd32(hw, I40E_PFHMC_ERRORDATA));
4174	}
4175
4176	if (icr0 & I40E_PFINT_ICR0_TIMESYNC_MASK) {
4177		u32 prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_0);
4178
4179		if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK) {
4180			icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
4181			i40e_ptp_tx_hwtstamp(pf);
4182		}
4183	}
4184
4185	/* If a critical error is pending we have no choice but to reset the
4186	 * device.
4187	 * Report and mask out any remaining unexpected interrupts.
4188	 */
4189	icr0_remaining = icr0 & ena_mask;
4190	if (icr0_remaining) {
4191		dev_info(&pf->pdev->dev, "unhandled interrupt icr0=0x%08x\n",
4192			 icr0_remaining);
4193		if ((icr0_remaining & I40E_PFINT_ICR0_PE_CRITERR_MASK) ||
4194		    (icr0_remaining & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) ||
4195		    (icr0_remaining & I40E_PFINT_ICR0_ECC_ERR_MASK)) {
4196			dev_info(&pf->pdev->dev, "device will be reset\n");
4197			set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
4198			i40e_service_event_schedule(pf);
4199		}
4200		ena_mask &= ~icr0_remaining;
4201	}
4202	ret = IRQ_HANDLED;
4203
4204enable_intr:
4205	/* re-enable interrupt causes */
4206	wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
4207	if (!test_bit(__I40E_DOWN, pf->state) ||
4208	    test_bit(__I40E_RECOVERY_MODE, pf->state)) {
4209		i40e_service_event_schedule(pf);
4210		i40e_irq_dynamic_enable_icr0(pf);
4211	}
4212
4213	return ret;
4214}
4215
4216/**
4217 * i40e_clean_fdir_tx_irq - Reclaim resources after transmit completes
4218 * @tx_ring:  tx ring to clean
4219 * @budget:   how many cleans we're allowed
4220 *
4221 * Returns true if there's any budget left (e.g. the clean is finished)
4222 **/
4223static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget)
4224{
4225	struct i40e_vsi *vsi = tx_ring->vsi;
4226	u16 i = tx_ring->next_to_clean;
4227	struct i40e_tx_buffer *tx_buf;
4228	struct i40e_tx_desc *tx_desc;
4229
4230	tx_buf = &tx_ring->tx_bi[i];
4231	tx_desc = I40E_TX_DESC(tx_ring, i);
4232	i -= tx_ring->count;
4233
4234	do {
4235		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
4236
4237		/* if next_to_watch is not set then there is no work pending */
4238		if (!eop_desc)
4239			break;
4240
4241		/* prevent any other reads prior to eop_desc */
4242		smp_rmb();
4243
4244		/* if the descriptor isn't done, no work yet to do */
4245		if (!(eop_desc->cmd_type_offset_bsz &
4246		      cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
4247			break;
4248
4249		/* clear next_to_watch to prevent false hangs */
4250		tx_buf->next_to_watch = NULL;
4251
4252		tx_desc->buffer_addr = 0;
4253		tx_desc->cmd_type_offset_bsz = 0;
4254		/* move past filter desc */
4255		tx_buf++;
4256		tx_desc++;
4257		i++;
4258		if (unlikely(!i)) {
4259			i -= tx_ring->count;
4260			tx_buf = tx_ring->tx_bi;
4261			tx_desc = I40E_TX_DESC(tx_ring, 0);
4262		}
4263		/* unmap skb header data */
4264		dma_unmap_single(tx_ring->dev,
4265				 dma_unmap_addr(tx_buf, dma),
4266				 dma_unmap_len(tx_buf, len),
4267				 DMA_TO_DEVICE);
4268		if (tx_buf->tx_flags & I40E_TX_FLAGS_FD_SB)
4269			kfree(tx_buf->raw_buf);
4270
4271		tx_buf->raw_buf = NULL;
4272		tx_buf->tx_flags = 0;
4273		tx_buf->next_to_watch = NULL;
4274		dma_unmap_len_set(tx_buf, len, 0);
4275		tx_desc->buffer_addr = 0;
4276		tx_desc->cmd_type_offset_bsz = 0;
4277
4278		/* move us past the eop_desc for start of next FD desc */
4279		tx_buf++;
4280		tx_desc++;
4281		i++;
4282		if (unlikely(!i)) {
4283			i -= tx_ring->count;
4284			tx_buf = tx_ring->tx_bi;
4285			tx_desc = I40E_TX_DESC(tx_ring, 0);
4286		}
4287
4288		/* update budget accounting */
4289		budget--;
4290	} while (likely(budget));
4291
4292	i += tx_ring->count;
4293	tx_ring->next_to_clean = i;
4294
4295	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED)
4296		i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx);
4297
4298	return budget > 0;
4299}
4300
4301/**
4302 * i40e_fdir_clean_ring - Interrupt Handler for FDIR SB ring
4303 * @irq: interrupt number
4304 * @data: pointer to a q_vector
4305 **/
4306static irqreturn_t i40e_fdir_clean_ring(int irq, void *data)
4307{
4308	struct i40e_q_vector *q_vector = data;
4309	struct i40e_vsi *vsi;
4310
4311	if (!q_vector->tx.ring)
4312		return IRQ_HANDLED;
4313
4314	vsi = q_vector->tx.ring->vsi;
4315	i40e_clean_fdir_tx_irq(q_vector->tx.ring, vsi->work_limit);
4316
4317	return IRQ_HANDLED;
4318}
4319
4320/**
4321 * i40e_map_vector_to_qp - Assigns the queue pair to the vector
4322 * @vsi: the VSI being configured
4323 * @v_idx: vector index
4324 * @qp_idx: queue pair index
4325 **/
4326static void i40e_map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx)
4327{
4328	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
4329	struct i40e_ring *tx_ring = vsi->tx_rings[qp_idx];
4330	struct i40e_ring *rx_ring = vsi->rx_rings[qp_idx];
4331
4332	tx_ring->q_vector = q_vector;
4333	tx_ring->next = q_vector->tx.ring;
4334	q_vector->tx.ring = tx_ring;
4335	q_vector->tx.count++;
4336
4337	/* Place XDP Tx ring in the same q_vector ring list as regular Tx */
4338	if (i40e_enabled_xdp_vsi(vsi)) {
4339		struct i40e_ring *xdp_ring = vsi->xdp_rings[qp_idx];
4340
4341		xdp_ring->q_vector = q_vector;
4342		xdp_ring->next = q_vector->tx.ring;
4343		q_vector->tx.ring = xdp_ring;
4344		q_vector->tx.count++;
4345	}
4346
4347	rx_ring->q_vector = q_vector;
4348	rx_ring->next = q_vector->rx.ring;
4349	q_vector->rx.ring = rx_ring;
4350	q_vector->rx.count++;
4351}
4352
4353/**
4354 * i40e_vsi_map_rings_to_vectors - Maps descriptor rings to vectors
4355 * @vsi: the VSI being configured
4356 *
4357 * This function maps descriptor rings to the queue-specific vectors
4358 * we were allotted through the MSI-X enabling code.  Ideally, we'd have
4359 * one vector per queue pair, but on a constrained vector budget, we
4360 * group the queue pairs as "efficiently" as possible.
4361 **/
4362static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi)
4363{
4364	int qp_remaining = vsi->num_queue_pairs;
4365	int q_vectors = vsi->num_q_vectors;
4366	int num_ringpairs;
4367	int v_start = 0;
4368	int qp_idx = 0;
4369
4370	/* If we don't have enough vectors for a 1-to-1 mapping, we'll have to
4371	 * group them so there are multiple queues per vector.
4372	 * It is also important to go through all the vectors available to be
4373	 * sure that if we don't use all the vectors, that the remaining vectors
4374	 * are cleared. This is especially important when decreasing the
4375	 * number of queues in use.
4376	 */
4377	for (; v_start < q_vectors; v_start++) {
4378		struct i40e_q_vector *q_vector = vsi->q_vectors[v_start];
4379
4380		num_ringpairs = DIV_ROUND_UP(qp_remaining, q_vectors - v_start);
4381
4382		q_vector->num_ringpairs = num_ringpairs;
4383		q_vector->reg_idx = q_vector->v_idx + vsi->base_vector - 1;
4384
4385		q_vector->rx.count = 0;
4386		q_vector->tx.count = 0;
4387		q_vector->rx.ring = NULL;
4388		q_vector->tx.ring = NULL;
4389
4390		while (num_ringpairs--) {
4391			i40e_map_vector_to_qp(vsi, v_start, qp_idx);
4392			qp_idx++;
4393			qp_remaining--;
4394		}
4395	}
4396}
4397
4398/**
4399 * i40e_vsi_request_irq - Request IRQ from the OS
4400 * @vsi: the VSI being configured
4401 * @basename: name for the vector
4402 **/
4403static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename)
4404{
4405	struct i40e_pf *pf = vsi->back;
4406	int err;
4407
4408	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
4409		err = i40e_vsi_request_irq_msix(vsi, basename);
4410	else if (pf->flags & I40E_FLAG_MSI_ENABLED)
4411		err = request_irq(pf->pdev->irq, i40e_intr, 0,
4412				  pf->int_name, pf);
4413	else
4414		err = request_irq(pf->pdev->irq, i40e_intr, IRQF_SHARED,
4415				  pf->int_name, pf);
4416
4417	if (err)
4418		dev_info(&pf->pdev->dev, "request_irq failed, Error %d\n", err);
4419
4420	return err;
4421}
4422
4423#ifdef CONFIG_NET_POLL_CONTROLLER
4424/**
4425 * i40e_netpoll - A Polling 'interrupt' handler
4426 * @netdev: network interface device structure
4427 *
4428 * This is used by netconsole to send skbs without having to re-enable
4429 * interrupts.  It's not called while the normal interrupt routine is executing.
4430 **/
4431static void i40e_netpoll(struct net_device *netdev)
4432{
4433	struct i40e_netdev_priv *np = netdev_priv(netdev);
4434	struct i40e_vsi *vsi = np->vsi;
4435	struct i40e_pf *pf = vsi->back;
4436	int i;
4437
4438	/* if interface is down do nothing */
4439	if (test_bit(__I40E_VSI_DOWN, vsi->state))
4440		return;
4441
4442	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
4443		for (i = 0; i < vsi->num_q_vectors; i++)
4444			i40e_msix_clean_rings(0, vsi->q_vectors[i]);
4445	} else {
4446		i40e_intr(pf->pdev->irq, netdev);
4447	}
4448}
4449#endif
4450
4451#define I40E_QTX_ENA_WAIT_COUNT 50
4452
4453/**
4454 * i40e_pf_txq_wait - Wait for a PF's Tx queue to be enabled or disabled
4455 * @pf: the PF being configured
4456 * @pf_q: the PF queue
4457 * @enable: enable or disable state of the queue
4458 *
4459 * This routine will wait for the given Tx queue of the PF to reach the
4460 * enabled or disabled state.
4461 * Returns -ETIMEDOUT in case of failing to reach the requested state after
4462 * multiple retries; else will return 0 in case of success.
4463 **/
4464static int i40e_pf_txq_wait(struct i40e_pf *pf, int pf_q, bool enable)
4465{
4466	int i;
4467	u32 tx_reg;
4468
4469	for (i = 0; i < I40E_QUEUE_WAIT_RETRY_LIMIT; i++) {
4470		tx_reg = rd32(&pf->hw, I40E_QTX_ENA(pf_q));
4471		if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
4472			break;
4473
4474		usleep_range(10, 20);
4475	}
4476	if (i >= I40E_QUEUE_WAIT_RETRY_LIMIT)
4477		return -ETIMEDOUT;
4478
4479	return 0;
4480}
4481
4482/**
4483 * i40e_control_tx_q - Start or stop a particular Tx queue
4484 * @pf: the PF structure
4485 * @pf_q: the PF queue to configure
4486 * @enable: start or stop the queue
4487 *
4488 * This function enables or disables a single queue. Note that any delay
4489 * required after the operation is expected to be handled by the caller of
4490 * this function.
4491 **/
4492static void i40e_control_tx_q(struct i40e_pf *pf, int pf_q, bool enable)
4493{
4494	struct i40e_hw *hw = &pf->hw;
4495	u32 tx_reg;
4496	int i;
4497
4498	/* warn the TX unit of coming changes */
4499	i40e_pre_tx_queue_cfg(&pf->hw, pf_q, enable);
4500	if (!enable)
4501		usleep_range(10, 20);
4502
4503	for (i = 0; i < I40E_QTX_ENA_WAIT_COUNT; i++) {
4504		tx_reg = rd32(hw, I40E_QTX_ENA(pf_q));
4505		if (((tx_reg >> I40E_QTX_ENA_QENA_REQ_SHIFT) & 1) ==
4506		    ((tx_reg >> I40E_QTX_ENA_QENA_STAT_SHIFT) & 1))
4507			break;
4508		usleep_range(1000, 2000);
4509	}
4510
4511	/* Skip if the queue is already in the requested state */
4512	if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
4513		return;
4514
4515	/* turn on/off the queue */
4516	if (enable) {
4517		wr32(hw, I40E_QTX_HEAD(pf_q), 0);
4518		tx_reg |= I40E_QTX_ENA_QENA_REQ_MASK;
4519	} else {
4520		tx_reg &= ~I40E_QTX_ENA_QENA_REQ_MASK;
4521	}
4522
4523	wr32(hw, I40E_QTX_ENA(pf_q), tx_reg);
4524}
4525
4526/**
4527 * i40e_control_wait_tx_q - Start/stop Tx queue and wait for completion
4528 * @seid: VSI SEID
4529 * @pf: the PF structure
4530 * @pf_q: the PF queue to configure
4531 * @is_xdp: true if the queue is used for XDP
4532 * @enable: start or stop the queue
4533 **/
4534int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q,
4535			   bool is_xdp, bool enable)
4536{
4537	int ret;
4538
4539	i40e_control_tx_q(pf, pf_q, enable);
4540
4541	/* wait for the change to finish */
4542	ret = i40e_pf_txq_wait(pf, pf_q, enable);
4543	if (ret) {
4544		dev_info(&pf->pdev->dev,
4545			 "VSI seid %d %sTx ring %d %sable timeout\n",
4546			 seid, (is_xdp ? "XDP " : ""), pf_q,
4547			 (enable ? "en" : "dis"));
4548	}
4549
4550	return ret;
4551}
4552
4553/**
4554 * i40e_vsi_enable_tx - Start a VSI's rings
4555 * @vsi: the VSI being configured
4556 **/
4557static int i40e_vsi_enable_tx(struct i40e_vsi *vsi)
4558{
4559	struct i40e_pf *pf = vsi->back;
4560	int i, pf_q, ret = 0;
4561
4562	pf_q = vsi->base_queue;
4563	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
4564		ret = i40e_control_wait_tx_q(vsi->seid, pf,
4565					     pf_q,
4566					     false /*is xdp*/, true);
4567		if (ret)
4568			break;
4569
4570		if (!i40e_enabled_xdp_vsi(vsi))
4571			continue;
4572
4573		ret = i40e_control_wait_tx_q(vsi->seid, pf,
4574					     pf_q + vsi->alloc_queue_pairs,
4575					     true /*is xdp*/, true);
4576		if (ret)
4577			break;
4578	}
4579	return ret;
4580}
4581
4582/**
4583 * i40e_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
4584 * @pf: the PF being configured
4585 * @pf_q: the PF queue
4586 * @enable: enable or disable state of the queue
4587 *
4588 * This routine will wait for the given Rx queue of the PF to reach the
4589 * enabled or disabled state.
4590 * Returns -ETIMEDOUT in case of failing to reach the requested state after
4591 * multiple retries; else will return 0 in case of success.
4592 **/
4593static int i40e_pf_rxq_wait(struct i40e_pf *pf, int pf_q, bool enable)
4594{
4595	int i;
4596	u32 rx_reg;
4597
4598	for (i = 0; i < I40E_QUEUE_WAIT_RETRY_LIMIT; i++) {
4599		rx_reg = rd32(&pf->hw, I40E_QRX_ENA(pf_q));
4600		if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
4601			break;
4602
4603		usleep_range(10, 20);
4604	}
4605	if (i >= I40E_QUEUE_WAIT_RETRY_LIMIT)
4606		return -ETIMEDOUT;
4607
4608	return 0;
4609}
4610
4611/**
4612 * i40e_control_rx_q - Start or stop a particular Rx queue
4613 * @pf: the PF structure
4614 * @pf_q: the PF queue to configure
4615 * @enable: start or stop the queue
4616 *
4617 * This function enables or disables a single queue. Note that
4618 * any delay required after the operation is expected to be
4619 * handled by the caller of this function.
4620 **/
4621static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
4622{
4623	struct i40e_hw *hw = &pf->hw;
4624	u32 rx_reg;
4625	int i;
4626
4627	for (i = 0; i < I40E_QTX_ENA_WAIT_COUNT; i++) {
4628		rx_reg = rd32(hw, I40E_QRX_ENA(pf_q));
4629		if (((rx_reg >> I40E_QRX_ENA_QENA_REQ_SHIFT) & 1) ==
4630		    ((rx_reg >> I40E_QRX_ENA_QENA_STAT_SHIFT) & 1))
4631			break;
4632		usleep_range(1000, 2000);
4633	}
4634
4635	/* Skip if the queue is already in the requested state */
4636	if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
4637		return;
4638
4639	/* turn on/off the queue */
4640	if (enable)
4641		rx_reg |= I40E_QRX_ENA_QENA_REQ_MASK;
4642	else
4643		rx_reg &= ~I40E_QRX_ENA_QENA_REQ_MASK;
4644
4645	wr32(hw, I40E_QRX_ENA(pf_q), rx_reg);
4646}
4647
4648/**
4649 * i40e_control_wait_rx_q
4650 * @pf: the PF structure
4651 * @pf_q: queue being configured
4652 * @enable: start or stop the rings
4653 *
4654 * This function enables or disables a single queue along with waiting
4655 * for the change to finish. The caller of this function should handle
4656 * the delays needed in the case of disabling queues.
4657 **/
4658int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
4659{
4660	int ret = 0;
4661
4662	i40e_control_rx_q(pf, pf_q, enable);
4663
4664	/* wait for the change to finish */
4665	ret = i40e_pf_rxq_wait(pf, pf_q, enable);
4666	if (ret)
4667		return ret;
4668
4669	return ret;
4670}
4671
4672/**
4673 * i40e_vsi_enable_rx - Start a VSI's rings
4674 * @vsi: the VSI being configured
4675 **/
4676static int i40e_vsi_enable_rx(struct i40e_vsi *vsi)
4677{
4678	struct i40e_pf *pf = vsi->back;
4679	int i, pf_q, ret = 0;
4680
4681	pf_q = vsi->base_queue;
4682	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
4683		ret = i40e_control_wait_rx_q(pf, pf_q, true);
4684		if (ret) {
4685			dev_info(&pf->pdev->dev,
4686				 "VSI seid %d Rx ring %d enable timeout\n",
4687				 vsi->seid, pf_q);
4688			break;
4689		}
4690	}
4691
4692	return ret;
4693}
4694
4695/**
4696 * i40e_vsi_start_rings - Start a VSI's rings
4697 * @vsi: the VSI being configured
4698 **/
4699int i40e_vsi_start_rings(struct i40e_vsi *vsi)
4700{
4701	int ret = 0;
4702
4703	/* do rx first for enable and last for disable */
4704	ret = i40e_vsi_enable_rx(vsi);
4705	if (ret)
4706		return ret;
4707	ret = i40e_vsi_enable_tx(vsi);
4708
4709	return ret;
4710}
4711
4712#define I40E_DISABLE_TX_GAP_MSEC	50
4713
4714/**
4715 * i40e_vsi_stop_rings - Stop a VSI's rings
4716 * @vsi: the VSI being configured
4717 **/
4718void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
4719{
4720	struct i40e_pf *pf = vsi->back;
4721	int pf_q, err, q_end;
4722
4723	/* When port TX is suspended, don't wait */
4724	if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state))
4725		return i40e_vsi_stop_rings_no_wait(vsi);
4726
4727	q_end = vsi->base_queue + vsi->num_queue_pairs;
4728	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
4729		i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false);
4730
4731	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) {
4732		err = i40e_control_wait_rx_q(pf, pf_q, false);
4733		if (err)
4734			dev_info(&pf->pdev->dev,
4735				 "VSI seid %d Rx ring %d dissable timeout\n",
4736				 vsi->seid, pf_q);
4737	}
4738
4739	msleep(I40E_DISABLE_TX_GAP_MSEC);
4740	pf_q = vsi->base_queue;
4741	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
4742		wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0);
4743
4744	i40e_vsi_wait_queues_disabled(vsi);
4745}
4746
4747/**
4748 * i40e_vsi_stop_rings_no_wait - Stop a VSI's rings and do not delay
4749 * @vsi: the VSI being shutdown
4750 *
4751 * This function stops all the rings for a VSI but does not delay to verify
4752 * that rings have been disabled. It is expected that the caller is shutting
4753 * down multiple VSIs at once and will delay together for all the VSIs after
4754 * initiating the shutdown. This is particularly useful for shutting down lots
4755 * of VFs together. Otherwise, a large delay can be incurred while configuring
4756 * each VSI in serial.
4757 **/
4758void i40e_vsi_stop_rings_no_wait(struct i40e_vsi *vsi)
4759{
4760	struct i40e_pf *pf = vsi->back;
4761	int i, pf_q;
4762
4763	pf_q = vsi->base_queue;
4764	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
4765		i40e_control_tx_q(pf, pf_q, false);
4766		i40e_control_rx_q(pf, pf_q, false);
4767	}
4768}
4769
4770/**
4771 * i40e_vsi_free_irq - Free the irq association with the OS
4772 * @vsi: the VSI being configured
4773 **/
4774static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
4775{
4776	struct i40e_pf *pf = vsi->back;
4777	struct i40e_hw *hw = &pf->hw;
4778	int base = vsi->base_vector;
4779	u32 val, qp;
4780	int i;
4781
4782	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
4783		if (!vsi->q_vectors)
4784			return;
4785
4786		if (!vsi->irqs_ready)
4787			return;
4788
4789		vsi->irqs_ready = false;
4790		for (i = 0; i < vsi->num_q_vectors; i++) {
4791			int irq_num;
4792			u16 vector;
4793
4794			vector = i + base;
4795			irq_num = pf->msix_entries[vector].vector;
4796
4797			/* free only the irqs that were actually requested */
4798			if (!vsi->q_vectors[i] ||
4799			    !vsi->q_vectors[i]->num_ringpairs)
4800				continue;
4801
4802			/* clear the affinity notifier in the IRQ descriptor */
4803			irq_set_affinity_notifier(irq_num, NULL);
4804			/* remove our suggested affinity mask for this IRQ */
4805			irq_set_affinity_hint(irq_num, NULL);
4806			synchronize_irq(irq_num);
4807			free_irq(irq_num, vsi->q_vectors[i]);
4808
4809			/* Tear down the interrupt queue link list
4810			 *
4811			 * We know that they come in pairs and always
4812			 * the Rx first, then the Tx.  To clear the
4813			 * link list, stick the EOL value into the
4814			 * next_q field of the registers.
4815			 */
4816			val = rd32(hw, I40E_PFINT_LNKLSTN(vector - 1));
4817			qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK)
4818				>> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
4819			val |= I40E_QUEUE_END_OF_LIST
4820				<< I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
4821			wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), val);
4822
4823			while (qp != I40E_QUEUE_END_OF_LIST) {
4824				u32 next;
4825
4826				val = rd32(hw, I40E_QINT_RQCTL(qp));
4827
4828				val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK  |
4829					 I40E_QINT_RQCTL_MSIX0_INDX_MASK |
4830					 I40E_QINT_RQCTL_CAUSE_ENA_MASK  |
4831					 I40E_QINT_RQCTL_INTEVENT_MASK);
4832
4833				val |= (I40E_QINT_RQCTL_ITR_INDX_MASK |
4834					 I40E_QINT_RQCTL_NEXTQ_INDX_MASK);
4835
4836				wr32(hw, I40E_QINT_RQCTL(qp), val);
4837
4838				val = rd32(hw, I40E_QINT_TQCTL(qp));
4839
4840				next = (val & I40E_QINT_TQCTL_NEXTQ_INDX_MASK)
4841					>> I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT;
4842
4843				val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK  |
4844					 I40E_QINT_TQCTL_MSIX0_INDX_MASK |
4845					 I40E_QINT_TQCTL_CAUSE_ENA_MASK  |
4846					 I40E_QINT_TQCTL_INTEVENT_MASK);
4847
4848				val |= (I40E_QINT_TQCTL_ITR_INDX_MASK |
4849					 I40E_QINT_TQCTL_NEXTQ_INDX_MASK);
4850
4851				wr32(hw, I40E_QINT_TQCTL(qp), val);
4852				qp = next;
4853			}
4854		}
4855	} else {
4856		free_irq(pf->pdev->irq, pf);
4857
4858		val = rd32(hw, I40E_PFINT_LNKLST0);
4859		qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK)
4860			>> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
4861		val |= I40E_QUEUE_END_OF_LIST
4862			<< I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT;
4863		wr32(hw, I40E_PFINT_LNKLST0, val);
4864
4865		val = rd32(hw, I40E_QINT_RQCTL(qp));
4866		val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK  |
4867			 I40E_QINT_RQCTL_MSIX0_INDX_MASK |
4868			 I40E_QINT_RQCTL_CAUSE_ENA_MASK  |
4869			 I40E_QINT_RQCTL_INTEVENT_MASK);
4870
4871		val |= (I40E_QINT_RQCTL_ITR_INDX_MASK |
4872			I40E_QINT_RQCTL_NEXTQ_INDX_MASK);
4873
4874		wr32(hw, I40E_QINT_RQCTL(qp), val);
4875
4876		val = rd32(hw, I40E_QINT_TQCTL(qp));
4877
4878		val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK  |
4879			 I40E_QINT_TQCTL_MSIX0_INDX_MASK |
4880			 I40E_QINT_TQCTL_CAUSE_ENA_MASK  |
4881			 I40E_QINT_TQCTL_INTEVENT_MASK);
4882
4883		val |= (I40E_QINT_TQCTL_ITR_INDX_MASK |
4884			I40E_QINT_TQCTL_NEXTQ_INDX_MASK);
4885
4886		wr32(hw, I40E_QINT_TQCTL(qp), val);
4887	}
4888}
4889
4890/**
4891 * i40e_free_q_vector - Free memory allocated for specific interrupt vector
4892 * @vsi: the VSI being configured
4893 * @v_idx: Index of vector to be freed
4894 *
4895 * This function frees the memory allocated to the q_vector.  In addition if
4896 * NAPI is enabled it will delete any references to the NAPI struct prior
4897 * to freeing the q_vector.
4898 **/
4899static void i40e_free_q_vector(struct i40e_vsi *vsi, int v_idx)
4900{
4901	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
4902	struct i40e_ring *ring;
4903
4904	if (!q_vector)
4905		return;
4906
4907	/* disassociate q_vector from rings */
4908	i40e_for_each_ring(ring, q_vector->tx)
4909		ring->q_vector = NULL;
4910
4911	i40e_for_each_ring(ring, q_vector->rx)
4912		ring->q_vector = NULL;
4913
4914	/* only VSI w/ an associated netdev is set up w/ NAPI */
4915	if (vsi->netdev)
4916		netif_napi_del(&q_vector->napi);
4917
4918	vsi->q_vectors[v_idx] = NULL;
4919
4920	kfree_rcu(q_vector, rcu);
4921}
4922
4923/**
4924 * i40e_vsi_free_q_vectors - Free memory allocated for interrupt vectors
4925 * @vsi: the VSI being un-configured
4926 *
4927 * This frees the memory allocated to the q_vectors and
4928 * deletes references to the NAPI struct.
4929 **/
4930static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi)
4931{
4932	int v_idx;
4933
4934	for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++)
4935		i40e_free_q_vector(vsi, v_idx);
4936}
4937
4938/**
4939 * i40e_reset_interrupt_capability - Disable interrupt setup in OS
4940 * @pf: board private structure
4941 **/
4942static void i40e_reset_interrupt_capability(struct i40e_pf *pf)
4943{
4944	/* If we're in Legacy mode, the interrupt was cleaned in vsi_close */
4945	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
4946		pci_disable_msix(pf->pdev);
4947		kfree(pf->msix_entries);
4948		pf->msix_entries = NULL;
4949		kfree(pf->irq_pile);
4950		pf->irq_pile = NULL;
4951	} else if (pf->flags & I40E_FLAG_MSI_ENABLED) {
4952		pci_disable_msi(pf->pdev);
4953	}
4954	pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
4955}
4956
4957/**
4958 * i40e_clear_interrupt_scheme - Clear the current interrupt scheme settings
4959 * @pf: board private structure
4960 *
4961 * We go through and clear interrupt specific resources and reset the structure
4962 * to pre-load conditions
4963 **/
4964static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
4965{
4966	int i;
4967
4968	if (test_bit(__I40E_MISC_IRQ_REQUESTED, pf->state))
4969		i40e_free_misc_vector(pf);
4970
4971	i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
4972		      I40E_IWARP_IRQ_PILE_ID);
4973
4974	i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
4975	for (i = 0; i < pf->num_alloc_vsi; i++)
4976		if (pf->vsi[i])
4977			i40e_vsi_free_q_vectors(pf->vsi[i]);
4978	i40e_reset_interrupt_capability(pf);
4979}
4980
4981/**
4982 * i40e_napi_enable_all - Enable NAPI for all q_vectors in the VSI
4983 * @vsi: the VSI being configured
4984 **/
4985static void i40e_napi_enable_all(struct i40e_vsi *vsi)
4986{
4987	int q_idx;
4988
4989	if (!vsi->netdev)
4990		return;
4991
4992	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
4993		struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
4994
4995		if (q_vector->rx.ring || q_vector->tx.ring)
4996			napi_enable(&q_vector->napi);
4997	}
4998}
4999
5000/**
5001 * i40e_napi_disable_all - Disable NAPI for all q_vectors in the VSI
5002 * @vsi: the VSI being configured
5003 **/
5004static void i40e_napi_disable_all(struct i40e_vsi *vsi)
5005{
5006	int q_idx;
5007
5008	if (!vsi->netdev)
5009		return;
5010
5011	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
5012		struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
5013
5014		if (q_vector->rx.ring || q_vector->tx.ring)
5015			napi_disable(&q_vector->napi);
5016	}
5017}
5018
5019/**
5020 * i40e_vsi_close - Shut down a VSI
5021 * @vsi: the vsi to be quelled
5022 **/
5023static void i40e_vsi_close(struct i40e_vsi *vsi)
5024{
5025	struct i40e_pf *pf = vsi->back;
5026	if (!test_and_set_bit(__I40E_VSI_DOWN, vsi->state))
5027		i40e_down(vsi);
5028	i40e_vsi_free_irq(vsi);
5029	i40e_vsi_free_tx_resources(vsi);
5030	i40e_vsi_free_rx_resources(vsi);
5031	vsi->current_netdev_flags = 0;
5032	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
5033	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
5034		set_bit(__I40E_CLIENT_RESET, pf->state);
5035}
5036
5037/**
5038 * i40e_quiesce_vsi - Pause a given VSI
5039 * @vsi: the VSI being paused
5040 **/
5041static void i40e_quiesce_vsi(struct i40e_vsi *vsi)
5042{
5043	if (test_bit(__I40E_VSI_DOWN, vsi->state))
5044		return;
5045
5046	set_bit(__I40E_VSI_NEEDS_RESTART, vsi->state);
5047	if (vsi->netdev && netif_running(vsi->netdev))
5048		vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
5049	else
5050		i40e_vsi_close(vsi);
5051}
5052
5053/**
5054 * i40e_unquiesce_vsi - Resume a given VSI
5055 * @vsi: the VSI being resumed
5056 **/
5057static void i40e_unquiesce_vsi(struct i40e_vsi *vsi)
5058{
5059	if (!test_and_clear_bit(__I40E_VSI_NEEDS_RESTART, vsi->state))
5060		return;
5061
5062	if (vsi->netdev && netif_running(vsi->netdev))
5063		vsi->netdev->netdev_ops->ndo_open(vsi->netdev);
5064	else
5065		i40e_vsi_open(vsi);   /* this clears the DOWN bit */
5066}
5067
5068/**
5069 * i40e_pf_quiesce_all_vsi - Pause all VSIs on a PF
5070 * @pf: the PF
5071 **/
5072static void i40e_pf_quiesce_all_vsi(struct i40e_pf *pf)
5073{
5074	int v;
5075
5076	for (v = 0; v < pf->num_alloc_vsi; v++) {
5077		if (pf->vsi[v])
5078			i40e_quiesce_vsi(pf->vsi[v]);
5079	}
5080}
5081
5082/**
5083 * i40e_pf_unquiesce_all_vsi - Resume all VSIs on a PF
5084 * @pf: the PF
5085 **/
5086static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf)
5087{
5088	int v;
5089
5090	for (v = 0; v < pf->num_alloc_vsi; v++) {
5091		if (pf->vsi[v])
5092			i40e_unquiesce_vsi(pf->vsi[v]);
5093	}
5094}
5095
5096/**
5097 * i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled
5098 * @vsi: the VSI being configured
5099 *
5100 * Wait until all queues on a given VSI have been disabled.
5101 **/
5102int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi)
5103{
5104	struct i40e_pf *pf = vsi->back;
5105	int i, pf_q, ret;
5106
5107	pf_q = vsi->base_queue;
5108	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
5109		/* Check and wait for the Tx queue */
5110		ret = i40e_pf_txq_wait(pf, pf_q, false);
5111		if (ret) {
5112			dev_info(&pf->pdev->dev,
5113				 "VSI seid %d Tx ring %d disable timeout\n",
5114				 vsi->seid, pf_q);
5115			return ret;
5116		}
5117
5118		if (!i40e_enabled_xdp_vsi(vsi))
5119			goto wait_rx;
5120
5121		/* Check and wait for the XDP Tx queue */
5122		ret = i40e_pf_txq_wait(pf, pf_q + vsi->alloc_queue_pairs,
5123				       false);
5124		if (ret) {
5125			dev_info(&pf->pdev->dev,
5126				 "VSI seid %d XDP Tx ring %d disable timeout\n",
5127				 vsi->seid, pf_q);
5128			return ret;
5129		}
5130wait_rx:
5131		/* Check and wait for the Rx queue */
5132		ret = i40e_pf_rxq_wait(pf, pf_q, false);
5133		if (ret) {
5134			dev_info(&pf->pdev->dev,
5135				 "VSI seid %d Rx ring %d disable timeout\n",
5136				 vsi->seid, pf_q);
5137			return ret;
5138		}
5139	}
5140
5141	return 0;
5142}
5143
5144#ifdef CONFIG_I40E_DCB
5145/**
5146 * i40e_pf_wait_queues_disabled - Wait for all queues of PF VSIs to be disabled
5147 * @pf: the PF
5148 *
5149 * This function waits for the queues to be in disabled state for all the
5150 * VSIs that are managed by this PF.
5151 **/
5152static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
5153{
5154	int v, ret = 0;
5155
5156	for (v = 0; v < pf->num_alloc_vsi; v++) {
5157		if (pf->vsi[v]) {
5158			ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]);
5159			if (ret)
5160				break;
5161		}
5162	}
5163
5164	return ret;
5165}
5166
5167#endif
5168
5169/**
5170 * i40e_get_iscsi_tc_map - Return TC map for iSCSI APP
5171 * @pf: pointer to PF
5172 *
5173 * Get TC map for ISCSI PF type that will include iSCSI TC
5174 * and LAN TC.
5175 **/
5176static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf)
5177{
5178	struct i40e_dcb_app_priority_table app;
5179	struct i40e_hw *hw = &pf->hw;
5180	u8 enabled_tc = 1; /* TC0 is always enabled */
5181	u8 tc, i;
5182	/* Get the iSCSI APP TLV */
5183	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
5184
5185	for (i = 0; i < dcbcfg->numapps; i++) {
5186		app = dcbcfg->app[i];
5187		if (app.selector == I40E_APP_SEL_TCPIP &&
5188		    app.protocolid == I40E_APP_PROTOID_ISCSI) {
5189			tc = dcbcfg->etscfg.prioritytable[app.priority];
5190			enabled_tc |= BIT(tc);
5191			break;
5192		}
5193	}
5194
5195	return enabled_tc;
5196}
5197
5198/**
5199 * i40e_dcb_get_num_tc -  Get the number of TCs from DCBx config
5200 * @dcbcfg: the corresponding DCBx configuration structure
5201 *
5202 * Return the number of TCs from given DCBx configuration
5203 **/
5204static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
5205{
5206	int i, tc_unused = 0;
5207	u8 num_tc = 0;
5208	u8 ret = 0;
5209
5210	/* Scan the ETS Config Priority Table to find
5211	 * traffic class enabled for a given priority
5212	 * and create a bitmask of enabled TCs
5213	 */
5214	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
5215		num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]);
5216
5217	/* Now scan the bitmask to check for
5218	 * contiguous TCs starting with TC0
5219	 */
5220	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
5221		if (num_tc & BIT(i)) {
5222			if (!tc_unused) {
5223				ret++;
5224			} else {
5225				pr_err("Non-contiguous TC - Disabling DCB\n");
5226				return 1;
5227			}
5228		} else {
5229			tc_unused = 1;
5230		}
5231	}
5232
5233	/* There is always at least TC0 */
5234	if (!ret)
5235		ret = 1;
5236
5237	return ret;
5238}
5239
5240/**
5241 * i40e_dcb_get_enabled_tc - Get enabled traffic classes
5242 * @dcbcfg: the corresponding DCBx configuration structure
5243 *
5244 * Query the current DCB configuration and return the number of
5245 * traffic classes enabled from the given DCBX config
5246 **/
5247static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg)
5248{
5249	u8 num_tc = i40e_dcb_get_num_tc(dcbcfg);
5250	u8 enabled_tc = 1;
5251	u8 i;
5252
5253	for (i = 0; i < num_tc; i++)
5254		enabled_tc |= BIT(i);
5255
5256	return enabled_tc;
5257}
5258
5259/**
5260 * i40e_mqprio_get_enabled_tc - Get enabled traffic classes
5261 * @pf: PF being queried
5262 *
5263 * Query the current MQPRIO configuration and return the number of
5264 * traffic classes enabled.
5265 **/
5266static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf)
5267{
5268	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
5269	u8 num_tc = vsi->mqprio_qopt.qopt.num_tc;
5270	u8 enabled_tc = 1, i;
5271
5272	for (i = 1; i < num_tc; i++)
5273		enabled_tc |= BIT(i);
5274	return enabled_tc;
5275}
5276
5277/**
5278 * i40e_pf_get_num_tc - Get enabled traffic classes for PF
5279 * @pf: PF being queried
5280 *
5281 * Return number of traffic classes enabled for the given PF
5282 **/
5283static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
5284{
5285	struct i40e_hw *hw = &pf->hw;
5286	u8 i, enabled_tc = 1;
5287	u8 num_tc = 0;
5288	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
5289
5290	if (pf->flags & I40E_FLAG_TC_MQPRIO)
5291		return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
5292
5293	/* If neither MQPRIO nor DCB is enabled, then always use single TC */
5294	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
5295		return 1;
5296
5297	/* SFP mode will be enabled for all TCs on port */
5298	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
5299		return i40e_dcb_get_num_tc(dcbcfg);
5300
5301	/* MFP mode return count of enabled TCs for this PF */
5302	if (pf->hw.func_caps.iscsi)
5303		enabled_tc =  i40e_get_iscsi_tc_map(pf);
5304	else
5305		return 1; /* Only TC0 */
5306
5307	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
5308		if (enabled_tc & BIT(i))
5309			num_tc++;
5310	}
5311	return num_tc;
5312}
5313
5314/**
5315 * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes
5316 * @pf: PF being queried
5317 *
5318 * Return a bitmap for enabled traffic classes for this PF.
5319 **/
5320static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
5321{
5322	if (pf->flags & I40E_FLAG_TC_MQPRIO)
5323		return i40e_mqprio_get_enabled_tc(pf);
5324
5325	/* If neither MQPRIO nor DCB is enabled for this PF then just return
5326	 * default TC
5327	 */
5328	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
5329		return I40E_DEFAULT_TRAFFIC_CLASS;
5330
5331	/* SFP mode we want PF to be enabled for all TCs */
5332	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
5333		return i40e_dcb_get_enabled_tc(&pf->hw.local_dcbx_config);
5334
5335	/* MFP enabled and iSCSI PF type */
5336	if (pf->hw.func_caps.iscsi)
5337		return i40e_get_iscsi_tc_map(pf);
5338	else
5339		return I40E_DEFAULT_TRAFFIC_CLASS;
5340}
5341
5342/**
5343 * i40e_vsi_get_bw_info - Query VSI BW Information
5344 * @vsi: the VSI being queried
5345 *
5346 * Returns 0 on success, negative value on failure
5347 **/
5348static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
5349{
5350	struct i40e_aqc_query_vsi_ets_sla_config_resp bw_ets_config = {0};
5351	struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
5352	struct i40e_pf *pf = vsi->back;
5353	struct i40e_hw *hw = &pf->hw;
5354	i40e_status ret;
5355	u32 tc_bw_max;
5356	int i;
5357
5358	/* Get the VSI level BW configuration */
5359	ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL);
5360	if (ret) {
5361		dev_info(&pf->pdev->dev,
5362			 "couldn't get PF vsi bw config, err %s aq_err %s\n",
5363			 i40e_stat_str(&pf->hw, ret),
5364			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
5365		return -EINVAL;
5366	}
5367
5368	/* Get the VSI level BW configuration per TC */
5369	ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid, &bw_ets_config,
5370					       NULL);
5371	if (ret) {
5372		dev_info(&pf->pdev->dev,
5373			 "couldn't get PF vsi ets bw config, err %s aq_err %s\n",
5374			 i40e_stat_str(&pf->hw, ret),
5375			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
5376		return -EINVAL;
5377	}
5378
5379	if (bw_config.tc_valid_bits != bw_ets_config.tc_valid_bits) {
5380		dev_info(&pf->pdev->dev,
5381			 "Enabled TCs mismatch from querying VSI BW info 0x%08x 0x%08x\n",
5382			 bw_config.tc_valid_bits,
5383			 bw_ets_config.tc_valid_bits);
5384		/* Still continuing */
5385	}
5386
5387	vsi->bw_limit = le16_to_cpu(bw_config.port_bw_limit);
5388	vsi->bw_max_quanta = bw_config.max_bw;
5389	tc_bw_max = le16_to_cpu(bw_ets_config.tc_bw_max[0]) |
5390		    (le16_to_cpu(bw_ets_config.tc_bw_max[1]) << 16);
5391	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
5392		vsi->bw_ets_share_credits[i] = bw_ets_config.share_credits[i];
5393		vsi->bw_ets_limit_credits[i] =
5394					le16_to_cpu(bw_ets_config.credits[i]);
5395		/* 3 bits out of 4 for each TC */
5396		vsi->bw_ets_max_quanta[i] = (u8)((tc_bw_max >> (i*4)) & 0x7);
5397	}
5398
5399	return 0;
5400}
5401
5402/**
5403 * i40e_vsi_configure_bw_alloc - Configure VSI BW allocation per TC
5404 * @vsi: the VSI being configured
5405 * @enabled_tc: TC bitmap
5406 * @bw_share: BW shared credits per TC
5407 *
5408 * Returns 0 on success, negative value on failure
5409 **/
5410static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
5411				       u8 *bw_share)
5412{
5413	struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
5414	struct i40e_pf *pf = vsi->back;
5415	i40e_status ret;
5416	int i;
5417
5418	/* There is no need to reset BW when mqprio mode is on.  */
5419	if (pf->flags & I40E_FLAG_TC_MQPRIO)
5420		return 0;
5421	if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
5422		ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
5423		if (ret)
5424			dev_info(&pf->pdev->dev,
5425				 "Failed to reset tx rate for vsi->seid %u\n",
5426				 vsi->seid);
5427		return ret;
5428	}
5429	bw_data.tc_valid_bits = enabled_tc;
5430	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
5431		bw_data.tc_bw_credits[i] = bw_share[i];
5432
5433	ret = i40e_aq_config_vsi_tc_bw(&pf->hw, vsi->seid, &bw_data, NULL);
5434	if (ret) {
5435		dev_info(&pf->pdev->dev,
5436			 "AQ command Config VSI BW allocation per TC failed = %d\n",
5437			 pf->hw.aq.asq_last_status);
5438		return -EINVAL;
5439	}
5440
5441	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
5442		vsi->info.qs_handle[i] = bw_data.qs_handles[i];
5443
5444	return 0;
5445}
5446
5447/**
5448 * i40e_vsi_config_netdev_tc - Setup the netdev TC configuration
5449 * @vsi: the VSI being configured
5450 * @enabled_tc: TC map to be enabled
5451 *
5452 **/
5453static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc)
5454{
5455	struct net_device *netdev = vsi->netdev;
5456	struct i40e_pf *pf = vsi->back;
5457	struct i40e_hw *hw = &pf->hw;
5458	u8 netdev_tc = 0;
5459	int i;
5460	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
5461
5462	if (!netdev)
5463		return;
5464
5465	if (!enabled_tc) {
5466		netdev_reset_tc(netdev);
5467		return;
5468	}
5469
5470	/* Set up actual enabled TCs on the VSI */
5471	if (netdev_set_num_tc(netdev, vsi->tc_config.numtc))
5472		return;
5473
5474	/* set per TC queues for the VSI */
5475	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
5476		/* Only set TC queues for enabled tcs
5477		 *
5478		 * e.g. For a VSI that has TC0 and TC3 enabled the
5479		 * enabled_tc bitmap would be 0x00001001; the driver
5480		 * will set the numtc for netdev as 2 that will be
5481		 * referenced by the netdev layer as TC 0 and 1.
5482		 */
5483		if (vsi->tc_config.enabled_tc & BIT(i))
5484			netdev_set_tc_queue(netdev,
5485					vsi->tc_config.tc_info[i].netdev_tc,
5486					vsi->tc_config.tc_info[i].qcount,
5487					vsi->tc_config.tc_info[i].qoffset);
5488	}
5489
5490	if (pf->flags & I40E_FLAG_TC_MQPRIO)
5491		return;
5492
5493	/* Assign UP2TC map for the VSI */
5494	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
5495		/* Get the actual TC# for the UP */
5496		u8 ets_tc = dcbcfg->etscfg.prioritytable[i];
5497		/* Get the mapped netdev TC# for the UP */
5498		netdev_tc =  vsi->tc_config.tc_info[ets_tc].netdev_tc;
5499		netdev_set_prio_tc_map(netdev, i, netdev_tc);
5500	}
5501}
5502
5503/**
5504 * i40e_vsi_update_queue_map - Update our copy of VSi info with new queue map
5505 * @vsi: the VSI being configured
5506 * @ctxt: the ctxt buffer returned from AQ VSI update param command
5507 **/
5508static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi,
5509				      struct i40e_vsi_context *ctxt)
5510{
5511	/* copy just the sections touched not the entire info
5512	 * since not all sections are valid as returned by
5513	 * update vsi params
5514	 */
5515	vsi->info.mapping_flags = ctxt->info.mapping_flags;
5516	memcpy(&vsi->info.queue_mapping,
5517	       &ctxt->info.queue_mapping, sizeof(vsi->info.queue_mapping));
5518	memcpy(&vsi->info.tc_mapping, ctxt->info.tc_mapping,
5519	       sizeof(vsi->info.tc_mapping));
5520}
5521
5522/**
5523 * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI
5524 * @vsi: the VSI being reconfigured
5525 * @vsi_offset: offset from main VF VSI
5526 */
5527int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset)
5528{
5529	struct i40e_vsi_context ctxt = {};
5530	struct i40e_pf *pf;
5531	struct i40e_hw *hw;
5532	int ret;
5533
5534	if (!vsi)
5535		return I40E_ERR_PARAM;
5536	pf = vsi->back;
5537	hw = &pf->hw;
5538
5539	ctxt.seid = vsi->seid;
5540	ctxt.pf_num = hw->pf_id;
5541	ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset;
5542	ctxt.uplink_seid = vsi->uplink_seid;
5543	ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
5544	ctxt.flags = I40E_AQ_VSI_TYPE_VF;
5545	ctxt.info = vsi->info;
5546
5547	i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc,
5548				 false);
5549	if (vsi->reconfig_rss) {
5550		vsi->rss_size = min_t(int, pf->alloc_rss_size,
5551				      vsi->num_queue_pairs);
5552		ret = i40e_vsi_config_rss(vsi);
5553		if (ret) {
5554			dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n");
5555			return ret;
5556		}
5557		vsi->reconfig_rss = false;
5558	}
5559
5560	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
5561	if (ret) {
5562		dev_info(&pf->pdev->dev, "Update vsi config failed, err %s aq_err %s\n",
5563			 i40e_stat_str(hw, ret),
5564			 i40e_aq_str(hw, hw->aq.asq_last_status));
5565		return ret;
5566	}
5567	/* update the local VSI info with updated queue map */
5568	i40e_vsi_update_queue_map(vsi, &ctxt);
5569	vsi->info.valid_sections = 0;
5570
5571	return ret;
5572}
5573
5574/**
5575 * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map
5576 * @vsi: VSI to be configured
5577 * @enabled_tc: TC bitmap
5578 *
5579 * This configures a particular VSI for TCs that are mapped to the
5580 * given TC bitmap. It uses default bandwidth share for TCs across
5581 * VSIs to configure TC for a particular VSI.
5582 *
5583 * NOTE:
5584 * It is expected that the VSI queues have been quisced before calling
5585 * this function.
5586 **/
5587static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
5588{
5589	u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
5590	struct i40e_pf *pf = vsi->back;
5591	struct i40e_hw *hw = &pf->hw;
5592	struct i40e_vsi_context ctxt;
5593	int ret = 0;
5594	int i;
5595
5596	/* Check if enabled_tc is same as existing or new TCs */
5597	if (vsi->tc_config.enabled_tc == enabled_tc &&
5598	    vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
5599		return ret;
5600
5601	/* Enable ETS TCs with equal BW Share for now across all VSIs */
5602	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
5603		if (enabled_tc & BIT(i))
5604			bw_share[i] = 1;
5605	}
5606
5607	ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
5608	if (ret) {
5609		struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
5610
5611		dev_info(&pf->pdev->dev,
5612			 "Failed configuring TC map %d for VSI %d\n",
5613			 enabled_tc, vsi->seid);
5614		ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid,
5615						  &bw_config, NULL);
5616		if (ret) {
5617			dev_info(&pf->pdev->dev,
5618				 "Failed querying vsi bw info, err %s aq_err %s\n",
5619				 i40e_stat_str(hw, ret),
5620				 i40e_aq_str(hw, hw->aq.asq_last_status));
5621			goto out;
5622		}
5623		if ((bw_config.tc_valid_bits & enabled_tc) != enabled_tc) {
5624			u8 valid_tc = bw_config.tc_valid_bits & enabled_tc;
5625
5626			if (!valid_tc)
5627				valid_tc = bw_config.tc_valid_bits;
5628			/* Always enable TC0, no matter what */
5629			valid_tc |= 1;
5630			dev_info(&pf->pdev->dev,
5631				 "Requested tc 0x%x, but FW reports 0x%x as valid. Attempting to use 0x%x.\n",
5632				 enabled_tc, bw_config.tc_valid_bits, valid_tc);
5633			enabled_tc = valid_tc;
5634		}
5635
5636		ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
5637		if (ret) {
5638			dev_err(&pf->pdev->dev,
5639				"Unable to  configure TC map %d for VSI %d\n",
5640				enabled_tc, vsi->seid);
5641			goto out;
5642		}
5643	}
5644
5645	/* Update Queue Pairs Mapping for currently enabled UPs */
5646	ctxt.seid = vsi->seid;
5647	ctxt.pf_num = vsi->back->hw.pf_id;
5648	ctxt.vf_num = 0;
5649	ctxt.uplink_seid = vsi->uplink_seid;
5650	ctxt.info = vsi->info;
5651	if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) {
5652		ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc);
5653		if (ret)
5654			goto out;
5655	} else {
5656		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
5657	}
5658
5659	/* On destroying the qdisc, reset vsi->rss_size, as number of enabled
5660	 * queues changed.
5661	 */
5662	if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) {
5663		vsi->rss_size = min_t(int, vsi->back->alloc_rss_size,
5664				      vsi->num_queue_pairs);
5665		ret = i40e_vsi_config_rss(vsi);
5666		if (ret) {
5667			dev_info(&vsi->back->pdev->dev,
5668				 "Failed to reconfig rss for num_queues\n");
5669			return ret;
5670		}
5671		vsi->reconfig_rss = false;
5672	}
5673	if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
5674		ctxt.info.valid_sections |=
5675				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
5676		ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
5677	}
5678
5679	/* Update the VSI after updating the VSI queue-mapping
5680	 * information
5681	 */
5682	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
5683	if (ret) {
5684		dev_info(&pf->pdev->dev,
5685			 "Update vsi tc config failed, err %s aq_err %s\n",
5686			 i40e_stat_str(hw, ret),
5687			 i40e_aq_str(hw, hw->aq.asq_last_status));
5688		goto out;
5689	}
5690	/* update the local VSI info with updated queue map */
5691	i40e_vsi_update_queue_map(vsi, &ctxt);
5692	vsi->info.valid_sections = 0;
5693
5694	/* Update current VSI BW information */
5695	ret = i40e_vsi_get_bw_info(vsi);
5696	if (ret) {
5697		dev_info(&pf->pdev->dev,
5698			 "Failed updating vsi bw info, err %s aq_err %s\n",
5699			 i40e_stat_str(hw, ret),
5700			 i40e_aq_str(hw, hw->aq.asq_last_status));
5701		goto out;
5702	}
5703
5704	/* Update the netdev TC setup */
5705	i40e_vsi_config_netdev_tc(vsi, enabled_tc);
5706out:
5707	return ret;
5708}
5709
5710/**
5711 * i40e_get_link_speed - Returns link speed for the interface
5712 * @vsi: VSI to be configured
5713 *
5714 **/
5715static int i40e_get_link_speed(struct i40e_vsi *vsi)
5716{
5717	struct i40e_pf *pf = vsi->back;
5718
5719	switch (pf->hw.phy.link_info.link_speed) {
5720	case I40E_LINK_SPEED_40GB:
5721		return 40000;
5722	case I40E_LINK_SPEED_25GB:
5723		return 25000;
5724	case I40E_LINK_SPEED_20GB:
5725		return 20000;
5726	case I40E_LINK_SPEED_10GB:
5727		return 10000;
5728	case I40E_LINK_SPEED_1GB:
5729		return 1000;
5730	default:
5731		return -EINVAL;
5732	}
5733}
5734
5735/**
5736 * i40e_bw_bytes_to_mbits - Convert max_tx_rate from bytes to mbits
5737 * @vsi: Pointer to vsi structure
5738 * @max_tx_rate: max TX rate in bytes to be converted into Mbits
5739 *
5740 * Helper function to convert units before send to set BW limit
5741 **/
5742static u64 i40e_bw_bytes_to_mbits(struct i40e_vsi *vsi, u64 max_tx_rate)
5743{
5744	if (max_tx_rate < I40E_BW_MBPS_DIVISOR) {
5745		dev_warn(&vsi->back->pdev->dev,
5746			 "Setting max tx rate to minimum usable value of 50Mbps.\n");
5747		max_tx_rate = I40E_BW_CREDIT_DIVISOR;
5748	} else {
5749		do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
5750	}
5751
5752	return max_tx_rate;
5753}
5754
5755/**
5756 * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
5757 * @vsi: VSI to be configured
5758 * @seid: seid of the channel/VSI
5759 * @max_tx_rate: max TX rate to be configured as BW limit
5760 *
5761 * Helper function to set BW limit for a given VSI
5762 **/
5763int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
5764{
5765	struct i40e_pf *pf = vsi->back;
5766	u64 credits = 0;
5767	int speed = 0;
5768	int ret = 0;
5769
5770	speed = i40e_get_link_speed(vsi);
5771	if (max_tx_rate > speed) {
5772		dev_err(&pf->pdev->dev,
5773			"Invalid max tx rate %llu specified for VSI seid %d.",
5774			max_tx_rate, seid);
5775		return -EINVAL;
5776	}
5777	if (max_tx_rate && max_tx_rate < I40E_BW_CREDIT_DIVISOR) {
5778		dev_warn(&pf->pdev->dev,
5779			 "Setting max tx rate to minimum usable value of 50Mbps.\n");
5780		max_tx_rate = I40E_BW_CREDIT_DIVISOR;
5781	}
5782
5783	/* Tx rate credits are in values of 50Mbps, 0 is disabled */
5784	credits = max_tx_rate;
5785	do_div(credits, I40E_BW_CREDIT_DIVISOR);
5786	ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits,
5787					  I40E_MAX_BW_INACTIVE_ACCUM, NULL);
5788	if (ret)
5789		dev_err(&pf->pdev->dev,
5790			"Failed set tx rate (%llu Mbps) for vsi->seid %u, err %s aq_err %s\n",
5791			max_tx_rate, seid, i40e_stat_str(&pf->hw, ret),
5792			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
5793	return ret;
5794}
5795
5796/**
5797 * i40e_remove_queue_channels - Remove queue channels for the TCs
5798 * @vsi: VSI to be configured
5799 *
5800 * Remove queue channels for the TCs
5801 **/
5802static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
5803{
5804	enum i40e_admin_queue_err last_aq_status;
5805	struct i40e_cloud_filter *cfilter;
5806	struct i40e_channel *ch, *ch_tmp;
5807	struct i40e_pf *pf = vsi->back;
5808	struct hlist_node *node;
5809	int ret, i;
5810
5811	/* Reset rss size that was stored when reconfiguring rss for
5812	 * channel VSIs with non-power-of-2 queue count.
5813	 */
5814	vsi->current_rss_size = 0;
5815
5816	/* perform cleanup for channels if they exist */
5817	if (list_empty(&vsi->ch_list))
5818		return;
5819
5820	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
5821		struct i40e_vsi *p_vsi;
5822
5823		list_del(&ch->list);
5824		p_vsi = ch->parent_vsi;
5825		if (!p_vsi || !ch->initialized) {
5826			kfree(ch);
5827			continue;
5828		}
5829		/* Reset queue contexts */
5830		for (i = 0; i < ch->num_queue_pairs; i++) {
5831			struct i40e_ring *tx_ring, *rx_ring;
5832			u16 pf_q;
5833
5834			pf_q = ch->base_queue + i;
5835			tx_ring = vsi->tx_rings[pf_q];
5836			tx_ring->ch = NULL;
5837
5838			rx_ring = vsi->rx_rings[pf_q];
5839			rx_ring->ch = NULL;
5840		}
5841
5842		/* Reset BW configured for this VSI via mqprio */
5843		ret = i40e_set_bw_limit(vsi, ch->seid, 0);
5844		if (ret)
5845			dev_info(&vsi->back->pdev->dev,
5846				 "Failed to reset tx rate for ch->seid %u\n",
5847				 ch->seid);
5848
5849		/* delete cloud filters associated with this channel */
5850		hlist_for_each_entry_safe(cfilter, node,
5851					  &pf->cloud_filter_list, cloud_node) {
5852			if (cfilter->seid != ch->seid)
5853				continue;
5854
5855			hash_del(&cfilter->cloud_node);
5856			if (cfilter->dst_port)
5857				ret = i40e_add_del_cloud_filter_big_buf(vsi,
5858									cfilter,
5859									false);
5860			else
5861				ret = i40e_add_del_cloud_filter(vsi, cfilter,
5862								false);
5863			last_aq_status = pf->hw.aq.asq_last_status;
5864			if (ret)
5865				dev_info(&pf->pdev->dev,
5866					 "Failed to delete cloud filter, err %s aq_err %s\n",
5867					 i40e_stat_str(&pf->hw, ret),
5868					 i40e_aq_str(&pf->hw, last_aq_status));
5869			kfree(cfilter);
5870		}
5871
5872		/* delete VSI from FW */
5873		ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
5874					     NULL);
5875		if (ret)
5876			dev_err(&vsi->back->pdev->dev,
5877				"unable to remove channel (%d) for parent VSI(%d)\n",
5878				ch->seid, p_vsi->seid);
5879		kfree(ch);
5880	}
5881	INIT_LIST_HEAD(&vsi->ch_list);
5882}
5883
5884/**
5885 * i40e_get_max_queues_for_channel
5886 * @vsi: ptr to VSI to which channels are associated with
5887 *
5888 * Helper function which returns max value among the queue counts set on the
5889 * channels/TCs created.
5890 **/
5891static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi)
5892{
5893	struct i40e_channel *ch, *ch_tmp;
5894	int max = 0;
5895
5896	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
5897		if (!ch->initialized)
5898			continue;
5899		if (ch->num_queue_pairs > max)
5900			max = ch->num_queue_pairs;
5901	}
5902
5903	return max;
5904}
5905
5906/**
5907 * i40e_validate_num_queues - validate num_queues w.r.t channel
5908 * @pf: ptr to PF device
5909 * @num_queues: number of queues
5910 * @vsi: the parent VSI
5911 * @reconfig_rss: indicates should the RSS be reconfigured or not
5912 *
5913 * This function validates number of queues in the context of new channel
5914 * which is being established and determines if RSS should be reconfigured
5915 * or not for parent VSI.
5916 **/
5917static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues,
5918				    struct i40e_vsi *vsi, bool *reconfig_rss)
5919{
5920	int max_ch_queues;
5921
5922	if (!reconfig_rss)
5923		return -EINVAL;
5924
5925	*reconfig_rss = false;
5926	if (vsi->current_rss_size) {
5927		if (num_queues > vsi->current_rss_size) {
5928			dev_dbg(&pf->pdev->dev,
5929				"Error: num_queues (%d) > vsi's current_size(%d)\n",
5930				num_queues, vsi->current_rss_size);
5931			return -EINVAL;
5932		} else if ((num_queues < vsi->current_rss_size) &&
5933			   (!is_power_of_2(num_queues))) {
5934			dev_dbg(&pf->pdev->dev,
5935				"Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n",
5936				num_queues, vsi->current_rss_size);
5937			return -EINVAL;
5938		}
5939	}
5940
5941	if (!is_power_of_2(num_queues)) {
5942		/* Find the max num_queues configured for channel if channel
5943		 * exist.
5944		 * if channel exist, then enforce 'num_queues' to be more than
5945		 * max ever queues configured for channel.
5946		 */
5947		max_ch_queues = i40e_get_max_queues_for_channel(vsi);
5948		if (num_queues < max_ch_queues) {
5949			dev_dbg(&pf->pdev->dev,
5950				"Error: num_queues (%d) < max queues configured for channel(%d)\n",
5951				num_queues, max_ch_queues);
5952			return -EINVAL;
5953		}
5954		*reconfig_rss = true;
5955	}
5956
5957	return 0;
5958}
5959
5960/**
5961 * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size
5962 * @vsi: the VSI being setup
5963 * @rss_size: size of RSS, accordingly LUT gets reprogrammed
5964 *
5965 * This function reconfigures RSS by reprogramming LUTs using 'rss_size'
5966 **/
5967static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
5968{
5969	struct i40e_pf *pf = vsi->back;
5970	u8 seed[I40E_HKEY_ARRAY_SIZE];
5971	struct i40e_hw *hw = &pf->hw;
5972	int local_rss_size;
5973	u8 *lut;
5974	int ret;
5975
5976	if (!vsi->rss_size)
5977		return -EINVAL;
5978
5979	if (rss_size > vsi->rss_size)
5980		return -EINVAL;
5981
5982	local_rss_size = min_t(int, vsi->rss_size, rss_size);
5983	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
5984	if (!lut)
5985		return -ENOMEM;
5986
5987	/* Ignoring user configured lut if there is one */
5988	i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size);
5989
5990	/* Use user configured hash key if there is one, otherwise
5991	 * use default.
5992	 */
5993	if (vsi->rss_hkey_user)
5994		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
5995	else
5996		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
5997
5998	ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
5999	if (ret) {
6000		dev_info(&pf->pdev->dev,
6001			 "Cannot set RSS lut, err %s aq_err %s\n",
6002			 i40e_stat_str(hw, ret),
6003			 i40e_aq_str(hw, hw->aq.asq_last_status));
6004		kfree(lut);
6005		return ret;
6006	}
6007	kfree(lut);
6008
6009	/* Do the update w.r.t. storing rss_size */
6010	if (!vsi->orig_rss_size)
6011		vsi->orig_rss_size = vsi->rss_size;
6012	vsi->current_rss_size = local_rss_size;
6013
6014	return ret;
6015}
6016
6017/**
6018 * i40e_channel_setup_queue_map - Setup a channel queue map
6019 * @pf: ptr to PF device
6020 * @ctxt: VSI context structure
6021 * @ch: ptr to channel structure
6022 *
6023 * Setup queue map for a specific channel
6024 **/
6025static void i40e_channel_setup_queue_map(struct i40e_pf *pf,
6026					 struct i40e_vsi_context *ctxt,
6027					 struct i40e_channel *ch)
6028{
6029	u16 qcount, qmap, sections = 0;
6030	u8 offset = 0;
6031	int pow;
6032
6033	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
6034	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
6035
6036	qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix);
6037	ch->num_queue_pairs = qcount;
6038
6039	/* find the next higher power-of-2 of num queue pairs */
6040	pow = ilog2(qcount);
6041	if (!is_power_of_2(qcount))
6042		pow++;
6043
6044	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
6045		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
6046
6047	/* Setup queue TC[0].qmap for given VSI context */
6048	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
6049
6050	ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */
6051	ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
6052	ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue);
6053	ctxt->info.valid_sections |= cpu_to_le16(sections);
6054}
6055
6056/**
6057 * i40e_add_channel - add a channel by adding VSI
6058 * @pf: ptr to PF device
6059 * @uplink_seid: underlying HW switching element (VEB) ID
6060 * @ch: ptr to channel structure
6061 *
6062 * Add a channel (VSI) using add_vsi and queue_map
6063 **/
6064static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
6065			    struct i40e_channel *ch)
6066{
6067	struct i40e_hw *hw = &pf->hw;
6068	struct i40e_vsi_context ctxt;
6069	u8 enabled_tc = 0x1; /* TC0 enabled */
6070	int ret;
6071
6072	if (ch->type != I40E_VSI_VMDQ2) {
6073		dev_info(&pf->pdev->dev,
6074			 "add new vsi failed, ch->type %d\n", ch->type);
6075		return -EINVAL;
6076	}
6077
6078	memset(&ctxt, 0, sizeof(ctxt));
6079	ctxt.pf_num = hw->pf_id;
6080	ctxt.vf_num = 0;
6081	ctxt.uplink_seid = uplink_seid;
6082	ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
6083	if (ch->type == I40E_VSI_VMDQ2)
6084		ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
6085
6086	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) {
6087		ctxt.info.valid_sections |=
6088		     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
6089		ctxt.info.switch_id =
6090		   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
6091	}
6092
6093	/* Set queue map for a given VSI context */
6094	i40e_channel_setup_queue_map(pf, &ctxt, ch);
6095
6096	/* Now time to create VSI */
6097	ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
6098	if (ret) {
6099		dev_info(&pf->pdev->dev,
6100			 "add new vsi failed, err %s aq_err %s\n",
6101			 i40e_stat_str(&pf->hw, ret),
6102			 i40e_aq_str(&pf->hw,
6103				     pf->hw.aq.asq_last_status));
6104		return -ENOENT;
6105	}
6106
6107	/* Success, update channel, set enabled_tc only if the channel
6108	 * is not a macvlan
6109	 */
6110	ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc;
6111	ch->seid = ctxt.seid;
6112	ch->vsi_number = ctxt.vsi_number;
6113	ch->stat_counter_idx = le16_to_cpu(ctxt.info.stat_counter_idx);
6114
6115	/* copy just the sections touched not the entire info
6116	 * since not all sections are valid as returned by
6117	 * update vsi params
6118	 */
6119	ch->info.mapping_flags = ctxt.info.mapping_flags;
6120	memcpy(&ch->info.queue_mapping,
6121	       &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping));
6122	memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping,
6123	       sizeof(ctxt.info.tc_mapping));
6124
6125	return 0;
6126}
6127
6128static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch,
6129				  u8 *bw_share)
6130{
6131	struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
6132	i40e_status ret;
6133	int i;
6134
6135	bw_data.tc_valid_bits = ch->enabled_tc;
6136	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
6137		bw_data.tc_bw_credits[i] = bw_share[i];
6138
6139	ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid,
6140				       &bw_data, NULL);
6141	if (ret) {
6142		dev_info(&vsi->back->pdev->dev,
6143			 "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n",
6144			 vsi->back->hw.aq.asq_last_status, ch->seid);
6145		return -EINVAL;
6146	}
6147
6148	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
6149		ch->info.qs_handle[i] = bw_data.qs_handles[i];
6150
6151	return 0;
6152}
6153
6154/**
6155 * i40e_channel_config_tx_ring - config TX ring associated with new channel
6156 * @pf: ptr to PF device
6157 * @vsi: the VSI being setup
6158 * @ch: ptr to channel structure
6159 *
6160 * Configure TX rings associated with channel (VSI) since queues are being
6161 * from parent VSI.
6162 **/
6163static int i40e_channel_config_tx_ring(struct i40e_pf *pf,
6164				       struct i40e_vsi *vsi,
6165				       struct i40e_channel *ch)
6166{
6167	i40e_status ret;
6168	int i;
6169	u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
6170
6171	/* Enable ETS TCs with equal BW Share for now across all VSIs */
6172	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
6173		if (ch->enabled_tc & BIT(i))
6174			bw_share[i] = 1;
6175	}
6176
6177	/* configure BW for new VSI */
6178	ret = i40e_channel_config_bw(vsi, ch, bw_share);
6179	if (ret) {
6180		dev_info(&vsi->back->pdev->dev,
6181			 "Failed configuring TC map %d for channel (seid %u)\n",
6182			 ch->enabled_tc, ch->seid);
6183		return ret;
6184	}
6185
6186	for (i = 0; i < ch->num_queue_pairs; i++) {
6187		struct i40e_ring *tx_ring, *rx_ring;
6188		u16 pf_q;
6189
6190		pf_q = ch->base_queue + i;
6191
6192		/* Get to TX ring ptr of main VSI, for re-setup TX queue
6193		 * context
6194		 */
6195		tx_ring = vsi->tx_rings[pf_q];
6196		tx_ring->ch = ch;
6197
6198		/* Get the RX ring ptr */
6199		rx_ring = vsi->rx_rings[pf_q];
6200		rx_ring->ch = ch;
6201	}
6202
6203	return 0;
6204}
6205
6206/**
6207 * i40e_setup_hw_channel - setup new channel
6208 * @pf: ptr to PF device
6209 * @vsi: the VSI being setup
6210 * @ch: ptr to channel structure
6211 * @uplink_seid: underlying HW switching element (VEB) ID
6212 * @type: type of channel to be created (VMDq2/VF)
6213 *
6214 * Setup new channel (VSI) based on specified type (VMDq2/VF)
6215 * and configures TX rings accordingly
6216 **/
6217static inline int i40e_setup_hw_channel(struct i40e_pf *pf,
6218					struct i40e_vsi *vsi,
6219					struct i40e_channel *ch,
6220					u16 uplink_seid, u8 type)
6221{
6222	int ret;
6223
6224	ch->initialized = false;
6225	ch->base_queue = vsi->next_base_queue;
6226	ch->type = type;
6227
6228	/* Proceed with creation of channel (VMDq2) VSI */
6229	ret = i40e_add_channel(pf, uplink_seid, ch);
6230	if (ret) {
6231		dev_info(&pf->pdev->dev,
6232			 "failed to add_channel using uplink_seid %u\n",
6233			 uplink_seid);
6234		return ret;
6235	}
6236
6237	/* Mark the successful creation of channel */
6238	ch->initialized = true;
6239
6240	/* Reconfigure TX queues using QTX_CTL register */
6241	ret = i40e_channel_config_tx_ring(pf, vsi, ch);
6242	if (ret) {
6243		dev_info(&pf->pdev->dev,
6244			 "failed to configure TX rings for channel %u\n",
6245			 ch->seid);
6246		return ret;
6247	}
6248
6249	/* update 'next_base_queue' */
6250	vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs;
6251	dev_dbg(&pf->pdev->dev,
6252		"Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n",
6253		ch->seid, ch->vsi_number, ch->stat_counter_idx,
6254		ch->num_queue_pairs,
6255		vsi->next_base_queue);
6256	return ret;
6257}
6258
6259/**
6260 * i40e_setup_channel - setup new channel using uplink element
6261 * @pf: ptr to PF device
6262 * @vsi: pointer to the VSI to set up the channel within
6263 * @ch: ptr to channel structure
6264 *
6265 * Setup new channel (VSI) based on specified type (VMDq2/VF)
6266 * and uplink switching element (uplink_seid)
6267 **/
6268static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi,
6269			       struct i40e_channel *ch)
6270{
6271	u8 vsi_type;
6272	u16 seid;
6273	int ret;
6274
6275	if (vsi->type == I40E_VSI_MAIN) {
6276		vsi_type = I40E_VSI_VMDQ2;
6277	} else {
6278		dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n",
6279			vsi->type);
6280		return false;
6281	}
6282
6283	/* underlying switching element */
6284	seid = pf->vsi[pf->lan_vsi]->uplink_seid;
6285
6286	/* create channel (VSI), configure TX rings */
6287	ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type);
6288	if (ret) {
6289		dev_err(&pf->pdev->dev, "failed to setup hw_channel\n");
6290		return false;
6291	}
6292
6293	return ch->initialized ? true : false;
6294}
6295
6296/**
6297 * i40e_validate_and_set_switch_mode - sets up switch mode correctly
6298 * @vsi: ptr to VSI which has PF backing
6299 *
6300 * Sets up switch mode correctly if it needs to be changed and perform
6301 * what are allowed modes.
6302 **/
6303static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
6304{
6305	u8 mode;
6306	struct i40e_pf *pf = vsi->back;
6307	struct i40e_hw *hw = &pf->hw;
6308	int ret;
6309
6310	ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities);
6311	if (ret)
6312		return -EINVAL;
6313
6314	if (hw->dev_caps.switch_mode) {
6315		/* if switch mode is set, support mode2 (non-tunneled for
6316		 * cloud filter) for now
6317		 */
6318		u32 switch_mode = hw->dev_caps.switch_mode &
6319				  I40E_SWITCH_MODE_MASK;
6320		if (switch_mode >= I40E_CLOUD_FILTER_MODE1) {
6321			if (switch_mode == I40E_CLOUD_FILTER_MODE2)
6322				return 0;
6323			dev_err(&pf->pdev->dev,
6324				"Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n",
6325				hw->dev_caps.switch_mode);
6326			return -EINVAL;
6327		}
6328	}
6329
6330	/* Set Bit 7 to be valid */
6331	mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
6332
6333	/* Set L4type for TCP support */
6334	mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP;
6335
6336	/* Set cloud filter mode */
6337	mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
6338
6339	/* Prep mode field for set_switch_config */
6340	ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags,
6341					pf->last_sw_conf_valid_flags,
6342					mode, NULL);
6343	if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH)
6344		dev_err(&pf->pdev->dev,
6345			"couldn't set switch config bits, err %s aq_err %s\n",
6346			i40e_stat_str(hw, ret),
6347			i40e_aq_str(hw,
6348				    hw->aq.asq_last_status));
6349
6350	return ret;
6351}
6352
6353/**
6354 * i40e_create_queue_channel - function to create channel
6355 * @vsi: VSI to be configured
6356 * @ch: ptr to channel (it contains channel specific params)
6357 *
6358 * This function creates channel (VSI) using num_queues specified by user,
6359 * reconfigs RSS if needed.
6360 **/
6361int i40e_create_queue_channel(struct i40e_vsi *vsi,
6362			      struct i40e_channel *ch)
6363{
6364	struct i40e_pf *pf = vsi->back;
6365	bool reconfig_rss;
6366	int err;
6367
6368	if (!ch)
6369		return -EINVAL;
6370
6371	if (!ch->num_queue_pairs) {
6372		dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n",
6373			ch->num_queue_pairs);
6374		return -EINVAL;
6375	}
6376
6377	/* validate user requested num_queues for channel */
6378	err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi,
6379				       &reconfig_rss);
6380	if (err) {
6381		dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n",
6382			 ch->num_queue_pairs);
6383		return -EINVAL;
6384	}
6385
6386	/* By default we are in VEPA mode, if this is the first VF/VMDq
6387	 * VSI to be added switch to VEB mode.
6388	 */
6389
6390	if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
6391		pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
6392
6393		if (vsi->type == I40E_VSI_MAIN) {
6394			if (pf->flags & I40E_FLAG_TC_MQPRIO)
6395				i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
6396			else
6397				i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
6398		}
6399		/* now onwards for main VSI, number of queues will be value
6400		 * of TC0's queue count
6401		 */
6402	}
6403
6404	/* By this time, vsi->cnt_q_avail shall be set to non-zero and
6405	 * it should be more than num_queues
6406	 */
6407	if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) {
6408		dev_dbg(&pf->pdev->dev,
6409			"Error: cnt_q_avail (%u) less than num_queues %d\n",
6410			vsi->cnt_q_avail, ch->num_queue_pairs);
6411		return -EINVAL;
6412	}
6413
6414	/* reconfig_rss only if vsi type is MAIN_VSI */
6415	if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) {
6416		err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs);
6417		if (err) {
6418			dev_info(&pf->pdev->dev,
6419				 "Error: unable to reconfig rss for num_queues (%u)\n",
6420				 ch->num_queue_pairs);
6421			return -EINVAL;
6422		}
6423	}
6424
6425	if (!i40e_setup_channel(pf, vsi, ch)) {
6426		dev_info(&pf->pdev->dev, "Failed to setup channel\n");
6427		return -EINVAL;
6428	}
6429
6430	dev_info(&pf->pdev->dev,
6431		 "Setup channel (id:%u) utilizing num_queues %d\n",
6432		 ch->seid, ch->num_queue_pairs);
6433
6434	/* configure VSI for BW limit */
6435	if (ch->max_tx_rate) {
6436		u64 credits = ch->max_tx_rate;
6437
6438		if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate))
6439			return -EINVAL;
6440
6441		do_div(credits, I40E_BW_CREDIT_DIVISOR);
6442		dev_dbg(&pf->pdev->dev,
6443			"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
6444			ch->max_tx_rate,
6445			credits,
6446			ch->seid);
6447	}
6448
6449	/* in case of VF, this will be main SRIOV VSI */
6450	ch->parent_vsi = vsi;
6451
6452	/* and update main_vsi's count for queue_available to use */
6453	vsi->cnt_q_avail -= ch->num_queue_pairs;
6454
6455	return 0;
6456}
6457
6458/**
6459 * i40e_configure_queue_channels - Add queue channel for the given TCs
6460 * @vsi: VSI to be configured
6461 *
6462 * Configures queue channel mapping to the given TCs
6463 **/
6464static int i40e_configure_queue_channels(struct i40e_vsi *vsi)
6465{
6466	struct i40e_channel *ch;
6467	u64 max_rate = 0;
6468	int ret = 0, i;
6469
6470	/* Create app vsi with the TCs. Main VSI with TC0 is already set up */
6471	vsi->tc_seid_map[0] = vsi->seid;
6472	for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) {
6473		if (vsi->tc_config.enabled_tc & BIT(i)) {
6474			ch = kzalloc(sizeof(*ch), GFP_KERNEL);
6475			if (!ch) {
6476				ret = -ENOMEM;
6477				goto err_free;
6478			}
6479
6480			INIT_LIST_HEAD(&ch->list);
6481			ch->num_queue_pairs =
6482				vsi->tc_config.tc_info[i].qcount;
6483			ch->base_queue =
6484				vsi->tc_config.tc_info[i].qoffset;
6485
6486			/* Bandwidth limit through tc interface is in bytes/s,
6487			 * change to Mbit/s
6488			 */
6489			max_rate = vsi->mqprio_qopt.max_rate[i];
6490			do_div(max_rate, I40E_BW_MBPS_DIVISOR);
6491			ch->max_tx_rate = max_rate;
6492
6493			list_add_tail(&ch->list, &vsi->ch_list);
6494
6495			ret = i40e_create_queue_channel(vsi, ch);
6496			if (ret) {
6497				dev_err(&vsi->back->pdev->dev,
6498					"Failed creating queue channel with TC%d: queues %d\n",
6499					i, ch->num_queue_pairs);
6500				goto err_free;
6501			}
6502			vsi->tc_seid_map[i] = ch->seid;
6503		}
6504	}
6505	return ret;
6506
6507err_free:
6508	i40e_remove_queue_channels(vsi);
6509	return ret;
6510}
6511
6512/**
6513 * i40e_veb_config_tc - Configure TCs for given VEB
6514 * @veb: given VEB
6515 * @enabled_tc: TC bitmap
6516 *
6517 * Configures given TC bitmap for VEB (switching) element
6518 **/
6519int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
6520{
6521	struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0};
6522	struct i40e_pf *pf = veb->pf;
6523	int ret = 0;
6524	int i;
6525
6526	/* No TCs or already enabled TCs just return */
6527	if (!enabled_tc || veb->enabled_tc == enabled_tc)
6528		return ret;
6529
6530	bw_data.tc_valid_bits = enabled_tc;
6531	/* bw_data.absolute_credits is not set (relative) */
6532
6533	/* Enable ETS TCs with equal BW Share for now */
6534	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
6535		if (enabled_tc & BIT(i))
6536			bw_data.tc_bw_share_credits[i] = 1;
6537	}
6538
6539	ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid,
6540						   &bw_data, NULL);
6541	if (ret) {
6542		dev_info(&pf->pdev->dev,
6543			 "VEB bw config failed, err %s aq_err %s\n",
6544			 i40e_stat_str(&pf->hw, ret),
6545			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
6546		goto out;
6547	}
6548
6549	/* Update the BW information */
6550	ret = i40e_veb_get_bw_info(veb);
6551	if (ret) {
6552		dev_info(&pf->pdev->dev,
6553			 "Failed getting veb bw config, err %s aq_err %s\n",
6554			 i40e_stat_str(&pf->hw, ret),
6555			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
6556	}
6557
6558out:
6559	return ret;
6560}
6561
6562#ifdef CONFIG_I40E_DCB
6563/**
6564 * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs
6565 * @pf: PF struct
6566 *
6567 * Reconfigure VEB/VSIs on a given PF; it is assumed that
6568 * the caller would've quiesce all the VSIs before calling
6569 * this function
6570 **/
6571static void i40e_dcb_reconfigure(struct i40e_pf *pf)
6572{
6573	u8 tc_map = 0;
6574	int ret;
6575	u8 v;
6576
6577	/* Enable the TCs available on PF to all VEBs */
6578	tc_map = i40e_pf_get_tc_map(pf);
6579	for (v = 0; v < I40E_MAX_VEB; v++) {
6580		if (!pf->veb[v])
6581			continue;
6582		ret = i40e_veb_config_tc(pf->veb[v], tc_map);
6583		if (ret) {
6584			dev_info(&pf->pdev->dev,
6585				 "Failed configuring TC for VEB seid=%d\n",
6586				 pf->veb[v]->seid);
6587			/* Will try to configure as many components */
6588		}
6589	}
6590
6591	/* Update each VSI */
6592	for (v = 0; v < pf->num_alloc_vsi; v++) {
6593		if (!pf->vsi[v])
6594			continue;
6595
6596		/* - Enable all TCs for the LAN VSI
6597		 * - For all others keep them at TC0 for now
6598		 */
6599		if (v == pf->lan_vsi)
6600			tc_map = i40e_pf_get_tc_map(pf);
6601		else
6602			tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
6603
6604		ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
6605		if (ret) {
6606			dev_info(&pf->pdev->dev,
6607				 "Failed configuring TC for VSI seid=%d\n",
6608				 pf->vsi[v]->seid);
6609			/* Will try to configure as many components */
6610		} else {
6611			/* Re-configure VSI vectors based on updated TC map */
6612			i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
6613			if (pf->vsi[v]->netdev)
6614				i40e_dcbnl_set_all(pf->vsi[v]);
6615		}
6616	}
6617}
6618
6619/**
6620 * i40e_resume_port_tx - Resume port Tx
6621 * @pf: PF struct
6622 *
6623 * Resume a port's Tx and issue a PF reset in case of failure to
6624 * resume.
6625 **/
6626static int i40e_resume_port_tx(struct i40e_pf *pf)
6627{
6628	struct i40e_hw *hw = &pf->hw;
6629	int ret;
6630
6631	ret = i40e_aq_resume_port_tx(hw, NULL);
6632	if (ret) {
6633		dev_info(&pf->pdev->dev,
6634			 "Resume Port Tx failed, err %s aq_err %s\n",
6635			  i40e_stat_str(&pf->hw, ret),
6636			  i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
6637		/* Schedule PF reset to recover */
6638		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
6639		i40e_service_event_schedule(pf);
6640	}
6641
6642	return ret;
6643}
6644
6645/**
6646 * i40e_init_pf_dcb - Initialize DCB configuration
6647 * @pf: PF being configured
6648 *
6649 * Query the current DCB configuration and cache it
6650 * in the hardware structure
6651 **/
6652static int i40e_init_pf_dcb(struct i40e_pf *pf)
6653{
6654	struct i40e_hw *hw = &pf->hw;
6655	int err = 0;
6656
6657	/* Do not enable DCB for SW1 and SW2 images even if the FW is capable
6658	 * Also do not enable DCBx if FW LLDP agent is disabled
6659	 */
6660	if ((pf->hw_features & I40E_HW_NO_DCB_SUPPORT) ||
6661	    (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)) {
6662		dev_info(&pf->pdev->dev, "DCB is not supported or FW LLDP is disabled\n");
6663		err = I40E_NOT_SUPPORTED;
6664		goto out;
6665	}
6666
6667	err = i40e_init_dcb(hw, true);
6668	if (!err) {
6669		/* Device/Function is not DCBX capable */
6670		if ((!hw->func_caps.dcb) ||
6671		    (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
6672			dev_info(&pf->pdev->dev,
6673				 "DCBX offload is not supported or is disabled for this PF.\n");
6674		} else {
6675			/* When status is not DISABLED then DCBX in FW */
6676			pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
6677				       DCB_CAP_DCBX_VER_IEEE;
6678
6679			pf->flags |= I40E_FLAG_DCB_CAPABLE;
6680			/* Enable DCB tagging only when more than one TC
6681			 * or explicitly disable if only one TC
6682			 */
6683			if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
6684				pf->flags |= I40E_FLAG_DCB_ENABLED;
6685			else
6686				pf->flags &= ~I40E_FLAG_DCB_ENABLED;
6687			dev_dbg(&pf->pdev->dev,
6688				"DCBX offload is supported for this PF.\n");
6689		}
6690	} else if (pf->hw.aq.asq_last_status == I40E_AQ_RC_EPERM) {
6691		dev_info(&pf->pdev->dev, "FW LLDP disabled for this PF.\n");
6692		pf->flags |= I40E_FLAG_DISABLE_FW_LLDP;
6693	} else {
6694		dev_info(&pf->pdev->dev,
6695			 "Query for DCB configuration failed, err %s aq_err %s\n",
6696			 i40e_stat_str(&pf->hw, err),
6697			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
6698	}
6699
6700out:
6701	return err;
6702}
6703#endif /* CONFIG_I40E_DCB */
6704
6705/**
6706 * i40e_print_link_message - print link up or down
6707 * @vsi: the VSI for which link needs a message
6708 * @isup: true of link is up, false otherwise
6709 */
6710void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
6711{
6712	enum i40e_aq_link_speed new_speed;
6713	struct i40e_pf *pf = vsi->back;
6714	char *speed = "Unknown";
6715	char *fc = "Unknown";
6716	char *fec = "";
6717	char *req_fec = "";
6718	char *an = "";
6719
6720	if (isup)
6721		new_speed = pf->hw.phy.link_info.link_speed;
6722	else
6723		new_speed = I40E_LINK_SPEED_UNKNOWN;
6724
6725	if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
6726		return;
6727	vsi->current_isup = isup;
6728	vsi->current_speed = new_speed;
6729	if (!isup) {
6730		netdev_info(vsi->netdev, "NIC Link is Down\n");
6731		return;
6732	}
6733
6734	/* Warn user if link speed on NPAR enabled partition is not at
6735	 * least 10GB
6736	 */
6737	if (pf->hw.func_caps.npar_enable &&
6738	    (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
6739	     pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
6740		netdev_warn(vsi->netdev,
6741			    "The partition detected link speed that is less than 10Gbps\n");
6742
6743	switch (pf->hw.phy.link_info.link_speed) {
6744	case I40E_LINK_SPEED_40GB:
6745		speed = "40 G";
6746		break;
6747	case I40E_LINK_SPEED_20GB:
6748		speed = "20 G";
6749		break;
6750	case I40E_LINK_SPEED_25GB:
6751		speed = "25 G";
6752		break;
6753	case I40E_LINK_SPEED_10GB:
6754		speed = "10 G";
6755		break;
6756	case I40E_LINK_SPEED_5GB:
6757		speed = "5 G";
6758		break;
6759	case I40E_LINK_SPEED_2_5GB:
6760		speed = "2.5 G";
6761		break;
6762	case I40E_LINK_SPEED_1GB:
6763		speed = "1000 M";
6764		break;
6765	case I40E_LINK_SPEED_100MB:
6766		speed = "100 M";
6767		break;
6768	default:
6769		break;
6770	}
6771
6772	switch (pf->hw.fc.current_mode) {
6773	case I40E_FC_FULL:
6774		fc = "RX/TX";
6775		break;
6776	case I40E_FC_TX_PAUSE:
6777		fc = "TX";
6778		break;
6779	case I40E_FC_RX_PAUSE:
6780		fc = "RX";
6781		break;
6782	default:
6783		fc = "None";
6784		break;
6785	}
6786
6787	if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
6788		req_fec = "None";
6789		fec = "None";
6790		an = "False";
6791
6792		if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
6793			an = "True";
6794
6795		if (pf->hw.phy.link_info.fec_info &
6796		    I40E_AQ_CONFIG_FEC_KR_ENA)
6797			fec = "CL74 FC-FEC/BASE-R";
6798		else if (pf->hw.phy.link_info.fec_info &
6799			 I40E_AQ_CONFIG_FEC_RS_ENA)
6800			fec = "CL108 RS-FEC";
6801
6802		/* 'CL108 RS-FEC' should be displayed when RS is requested, or
6803		 * both RS and FC are requested
6804		 */
6805		if (vsi->back->hw.phy.link_info.req_fec_info &
6806		    (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
6807			if (vsi->back->hw.phy.link_info.req_fec_info &
6808			    I40E_AQ_REQUEST_FEC_RS)
6809				req_fec = "CL108 RS-FEC";
6810			else
6811				req_fec = "CL74 FC-FEC/BASE-R";
6812		}
6813		netdev_info(vsi->netdev,
6814			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
6815			    speed, req_fec, fec, an, fc);
6816	} else if (pf->hw.device_id == I40E_DEV_ID_KX_X722) {
6817		req_fec = "None";
6818		fec = "None";
6819		an = "False";
6820
6821		if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
6822			an = "True";
6823
6824		if (pf->hw.phy.link_info.fec_info &
6825		    I40E_AQ_CONFIG_FEC_KR_ENA)
6826			fec = "CL74 FC-FEC/BASE-R";
6827
6828		if (pf->hw.phy.link_info.req_fec_info &
6829		    I40E_AQ_REQUEST_FEC_KR)
6830			req_fec = "CL74 FC-FEC/BASE-R";
6831
6832		netdev_info(vsi->netdev,
6833			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
6834			    speed, req_fec, fec, an, fc);
6835	} else {
6836		netdev_info(vsi->netdev,
6837			    "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n",
6838			    speed, fc);
6839	}
6840
6841}
6842
6843/**
6844 * i40e_up_complete - Finish the last steps of bringing up a connection
6845 * @vsi: the VSI being configured
6846 **/
6847static int i40e_up_complete(struct i40e_vsi *vsi)
6848{
6849	struct i40e_pf *pf = vsi->back;
6850	int err;
6851
6852	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
6853		i40e_vsi_configure_msix(vsi);
6854	else
6855		i40e_configure_msi_and_legacy(vsi);
6856
6857	/* start rings */
6858	err = i40e_vsi_start_rings(vsi);
6859	if (err)
6860		return err;
6861
6862	clear_bit(__I40E_VSI_DOWN, vsi->state);
6863	i40e_napi_enable_all(vsi);
6864	i40e_vsi_enable_irq(vsi);
6865
6866	if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
6867	    (vsi->netdev)) {
6868		i40e_print_link_message(vsi, true);
6869		netif_tx_start_all_queues(vsi->netdev);
6870		netif_carrier_on(vsi->netdev);
6871	}
6872
6873	/* replay FDIR SB filters */
6874	if (vsi->type == I40E_VSI_FDIR) {
6875		/* reset fd counters */
6876		pf->fd_add_err = 0;
6877		pf->fd_atr_cnt = 0;
6878		i40e_fdir_filter_restore(vsi);
6879	}
6880
6881	/* On the next run of the service_task, notify any clients of the new
6882	 * opened netdev
6883	 */
6884	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
6885	i40e_service_event_schedule(pf);
6886
6887	return 0;
6888}
6889
6890/**
6891 * i40e_vsi_reinit_locked - Reset the VSI
6892 * @vsi: the VSI being configured
6893 *
6894 * Rebuild the ring structs after some configuration
6895 * has changed, e.g. MTU size.
6896 **/
6897static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
6898{
6899	struct i40e_pf *pf = vsi->back;
6900
6901	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
6902		usleep_range(1000, 2000);
6903	i40e_down(vsi);
6904
6905	i40e_up(vsi);
6906	clear_bit(__I40E_CONFIG_BUSY, pf->state);
6907}
6908
6909/**
6910 * i40e_force_link_state - Force the link status
6911 * @pf: board private structure
6912 * @is_up: whether the link state should be forced up or down
6913 **/
6914static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up)
6915{
6916	struct i40e_aq_get_phy_abilities_resp abilities;
6917	struct i40e_aq_set_phy_config config = {0};
6918	bool non_zero_phy_type = is_up;
6919	struct i40e_hw *hw = &pf->hw;
6920	i40e_status err;
6921	u64 mask;
6922	u8 speed;
6923
6924	/* Card might've been put in an unstable state by other drivers
6925	 * and applications, which causes incorrect speed values being
6926	 * set on startup. In order to clear speed registers, we call
6927	 * get_phy_capabilities twice, once to get initial state of
6928	 * available speeds, and once to get current PHY config.
6929	 */
6930	err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities,
6931					   NULL);
6932	if (err) {
6933		dev_err(&pf->pdev->dev,
6934			"failed to get phy cap., ret =  %s last_status =  %s\n",
6935			i40e_stat_str(hw, err),
6936			i40e_aq_str(hw, hw->aq.asq_last_status));
6937		return err;
6938	}
6939	speed = abilities.link_speed;
6940
6941	/* Get the current phy config */
6942	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
6943					   NULL);
6944	if (err) {
6945		dev_err(&pf->pdev->dev,
6946			"failed to get phy cap., ret =  %s last_status =  %s\n",
6947			i40e_stat_str(hw, err),
6948			i40e_aq_str(hw, hw->aq.asq_last_status));
6949		return err;
6950	}
6951
6952	/* If link needs to go up, but was not forced to go down,
6953	 * and its speed values are OK, no need for a flap
6954	 * if non_zero_phy_type was set, still need to force up
6955	 */
6956	if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)
6957		non_zero_phy_type = true;
6958	else if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0)
6959		return I40E_SUCCESS;
6960
6961	/* To force link we need to set bits for all supported PHY types,
6962	 * but there are now more than 32, so we need to split the bitmap
6963	 * across two fields.
6964	 */
6965	mask = I40E_PHY_TYPES_BITMASK;
6966	config.phy_type =
6967		non_zero_phy_type ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
6968	config.phy_type_ext =
6969		non_zero_phy_type ? (u8)((mask >> 32) & 0xff) : 0;
6970	/* Copy the old settings, except of phy_type */
6971	config.abilities = abilities.abilities;
6972	if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED) {
6973		if (is_up)
6974			config.abilities |= I40E_AQ_PHY_ENABLE_LINK;
6975		else
6976			config.abilities &= ~(I40E_AQ_PHY_ENABLE_LINK);
6977	}
6978	if (abilities.link_speed != 0)
6979		config.link_speed = abilities.link_speed;
6980	else
6981		config.link_speed = speed;
6982	config.eee_capability = abilities.eee_capability;
6983	config.eeer = abilities.eeer_val;
6984	config.low_power_ctrl = abilities.d3_lpan;
6985	config.fec_config = abilities.fec_cfg_curr_mod_ext_info &
6986			    I40E_AQ_PHY_FEC_CONFIG_MASK;
6987	err = i40e_aq_set_phy_config(hw, &config, NULL);
6988
6989	if (err) {
6990		dev_err(&pf->pdev->dev,
6991			"set phy config ret =  %s last_status =  %s\n",
6992			i40e_stat_str(&pf->hw, err),
6993			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
6994		return err;
6995	}
6996
6997	/* Update the link info */
6998	err = i40e_update_link_info(hw);
6999	if (err) {
7000		/* Wait a little bit (on 40G cards it sometimes takes a really
7001		 * long time for link to come back from the atomic reset)
7002		 * and try once more
7003		 */
7004		msleep(1000);
7005		i40e_update_link_info(hw);
7006	}
7007
7008	i40e_aq_set_link_restart_an(hw, is_up, NULL);
7009
7010	return I40E_SUCCESS;
7011}
7012
7013/**
7014 * i40e_up - Bring the connection back up after being down
7015 * @vsi: the VSI being configured
7016 **/
7017int i40e_up(struct i40e_vsi *vsi)
7018{
7019	int err;
7020
7021	if (vsi->type == I40E_VSI_MAIN &&
7022	    (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED ||
7023	     vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED))
7024		i40e_force_link_state(vsi->back, true);
7025
7026	err = i40e_vsi_configure(vsi);
7027	if (!err)
7028		err = i40e_up_complete(vsi);
7029
7030	return err;
7031}
7032
7033/**
7034 * i40e_down - Shutdown the connection processing
7035 * @vsi: the VSI being stopped
7036 **/
7037void i40e_down(struct i40e_vsi *vsi)
7038{
7039	int i;
7040
7041	/* It is assumed that the caller of this function
7042	 * sets the vsi->state __I40E_VSI_DOWN bit.
7043	 */
7044	if (vsi->netdev) {
7045		netif_carrier_off(vsi->netdev);
7046		netif_tx_disable(vsi->netdev);
7047	}
7048	i40e_vsi_disable_irq(vsi);
7049	i40e_vsi_stop_rings(vsi);
7050	if (vsi->type == I40E_VSI_MAIN &&
7051	   (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED ||
7052	    vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED))
7053		i40e_force_link_state(vsi->back, false);
7054	i40e_napi_disable_all(vsi);
7055
7056	for (i = 0; i < vsi->num_queue_pairs; i++) {
7057		i40e_clean_tx_ring(vsi->tx_rings[i]);
7058		if (i40e_enabled_xdp_vsi(vsi)) {
7059			/* Make sure that in-progress ndo_xdp_xmit and
7060			 * ndo_xsk_wakeup calls are completed.
7061			 */
7062			synchronize_rcu();
7063			i40e_clean_tx_ring(vsi->xdp_rings[i]);
7064		}
7065		i40e_clean_rx_ring(vsi->rx_rings[i]);
7066	}
7067
7068}
7069
7070/**
7071 * i40e_validate_mqprio_qopt- validate queue mapping info
7072 * @vsi: the VSI being configured
7073 * @mqprio_qopt: queue parametrs
7074 **/
7075static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
7076				     struct tc_mqprio_qopt_offload *mqprio_qopt)
7077{
7078	u64 sum_max_rate = 0;
7079	u64 max_rate = 0;
7080	int i;
7081
7082	if (mqprio_qopt->qopt.offset[0] != 0 ||
7083	    mqprio_qopt->qopt.num_tc < 1 ||
7084	    mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS)
7085		return -EINVAL;
7086	for (i = 0; ; i++) {
7087		if (!mqprio_qopt->qopt.count[i])
7088			return -EINVAL;
7089		if (mqprio_qopt->min_rate[i]) {
7090			dev_err(&vsi->back->pdev->dev,
7091				"Invalid min tx rate (greater than 0) specified\n");
7092			return -EINVAL;
7093		}
7094		max_rate = mqprio_qopt->max_rate[i];
7095		do_div(max_rate, I40E_BW_MBPS_DIVISOR);
7096		sum_max_rate += max_rate;
7097
7098		if (i >= mqprio_qopt->qopt.num_tc - 1)
7099			break;
7100		if (mqprio_qopt->qopt.offset[i + 1] !=
7101		    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
7102			return -EINVAL;
7103	}
7104	if (vsi->num_queue_pairs <
7105	    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
7106		dev_err(&vsi->back->pdev->dev,
7107			"Failed to create traffic channel, insufficient number of queues.\n");
7108		return -EINVAL;
7109	}
7110	if (sum_max_rate > i40e_get_link_speed(vsi)) {
7111		dev_err(&vsi->back->pdev->dev,
7112			"Invalid max tx rate specified\n");
7113		return -EINVAL;
7114	}
7115	return 0;
7116}
7117
7118/**
7119 * i40e_vsi_set_default_tc_config - set default values for tc configuration
7120 * @vsi: the VSI being configured
7121 **/
7122static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
7123{
7124	u16 qcount;
7125	int i;
7126
7127	/* Only TC0 is enabled */
7128	vsi->tc_config.numtc = 1;
7129	vsi->tc_config.enabled_tc = 1;
7130	qcount = min_t(int, vsi->alloc_queue_pairs,
7131		       i40e_pf_get_max_q_per_tc(vsi->back));
7132	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
7133		/* For the TC that is not enabled set the offset to to default
7134		 * queue and allocate one queue for the given TC.
7135		 */
7136		vsi->tc_config.tc_info[i].qoffset = 0;
7137		if (i == 0)
7138			vsi->tc_config.tc_info[i].qcount = qcount;
7139		else
7140			vsi->tc_config.tc_info[i].qcount = 1;
7141		vsi->tc_config.tc_info[i].netdev_tc = 0;
7142	}
7143}
7144
7145/**
7146 * i40e_del_macvlan_filter
7147 * @hw: pointer to the HW structure
7148 * @seid: seid of the channel VSI
7149 * @macaddr: the mac address to apply as a filter
7150 * @aq_err: store the admin Q error
7151 *
7152 * This function deletes a mac filter on the channel VSI which serves as the
7153 * macvlan. Returns 0 on success.
7154 **/
7155static i40e_status i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid,
7156					   const u8 *macaddr, int *aq_err)
7157{
7158	struct i40e_aqc_remove_macvlan_element_data element;
7159	i40e_status status;
7160
7161	memset(&element, 0, sizeof(element));
7162	ether_addr_copy(element.mac_addr, macaddr);
7163	element.vlan_tag = 0;
7164	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
7165	status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL);
7166	*aq_err = hw->aq.asq_last_status;
7167
7168	return status;
7169}
7170
7171/**
7172 * i40e_add_macvlan_filter
7173 * @hw: pointer to the HW structure
7174 * @seid: seid of the channel VSI
7175 * @macaddr: the mac address to apply as a filter
7176 * @aq_err: store the admin Q error
7177 *
7178 * This function adds a mac filter on the channel VSI which serves as the
7179 * macvlan. Returns 0 on success.
7180 **/
7181static i40e_status i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid,
7182					   const u8 *macaddr, int *aq_err)
7183{
7184	struct i40e_aqc_add_macvlan_element_data element;
7185	i40e_status status;
7186	u16 cmd_flags = 0;
7187
7188	ether_addr_copy(element.mac_addr, macaddr);
7189	element.vlan_tag = 0;
7190	element.queue_number = 0;
7191	element.match_method = I40E_AQC_MM_ERR_NO_RES;
7192	cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
7193	element.flags = cpu_to_le16(cmd_flags);
7194	status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL);
7195	*aq_err = hw->aq.asq_last_status;
7196
7197	return status;
7198}
7199
7200/**
7201 * i40e_reset_ch_rings - Reset the queue contexts in a channel
7202 * @vsi: the VSI we want to access
7203 * @ch: the channel we want to access
7204 */
7205static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch)
7206{
7207	struct i40e_ring *tx_ring, *rx_ring;
7208	u16 pf_q;
7209	int i;
7210
7211	for (i = 0; i < ch->num_queue_pairs; i++) {
7212		pf_q = ch->base_queue + i;
7213		tx_ring = vsi->tx_rings[pf_q];
7214		tx_ring->ch = NULL;
7215		rx_ring = vsi->rx_rings[pf_q];
7216		rx_ring->ch = NULL;
7217	}
7218}
7219
7220/**
7221 * i40e_free_macvlan_channels
7222 * @vsi: the VSI we want to access
7223 *
7224 * This function frees the Qs of the channel VSI from
7225 * the stack and also deletes the channel VSIs which
7226 * serve as macvlans.
7227 */
7228static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
7229{
7230	struct i40e_channel *ch, *ch_tmp;
7231	int ret;
7232
7233	if (list_empty(&vsi->macvlan_list))
7234		return;
7235
7236	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
7237		struct i40e_vsi *parent_vsi;
7238
7239		if (i40e_is_channel_macvlan(ch)) {
7240			i40e_reset_ch_rings(vsi, ch);
7241			clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
7242			netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev);
7243			netdev_set_sb_channel(ch->fwd->netdev, 0);
7244			kfree(ch->fwd);
7245			ch->fwd = NULL;
7246		}
7247
7248		list_del(&ch->list);
7249		parent_vsi = ch->parent_vsi;
7250		if (!parent_vsi || !ch->initialized) {
7251			kfree(ch);
7252			continue;
7253		}
7254
7255		/* remove the VSI */
7256		ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
7257					     NULL);
7258		if (ret)
7259			dev_err(&vsi->back->pdev->dev,
7260				"unable to remove channel (%d) for parent VSI(%d)\n",
7261				ch->seid, parent_vsi->seid);
7262		kfree(ch);
7263	}
7264	vsi->macvlan_cnt = 0;
7265}
7266
7267/**
7268 * i40e_fwd_ring_up - bring the macvlan device up
7269 * @vsi: the VSI we want to access
7270 * @vdev: macvlan netdevice
7271 * @fwd: the private fwd structure
7272 */
7273static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
7274			    struct i40e_fwd_adapter *fwd)
7275{
7276	struct i40e_channel *ch = NULL, *ch_tmp, *iter;
7277	int ret = 0, num_tc = 1,  i, aq_err;
7278	struct i40e_pf *pf = vsi->back;
7279	struct i40e_hw *hw = &pf->hw;
7280
7281	/* Go through the list and find an available channel */
7282	list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) {
7283		if (!i40e_is_channel_macvlan(iter)) {
7284			iter->fwd = fwd;
7285			/* record configuration for macvlan interface in vdev */
7286			for (i = 0; i < num_tc; i++)
7287				netdev_bind_sb_channel_queue(vsi->netdev, vdev,
7288							     i,
7289							     iter->num_queue_pairs,
7290							     iter->base_queue);
7291			for (i = 0; i < iter->num_queue_pairs; i++) {
7292				struct i40e_ring *tx_ring, *rx_ring;
7293				u16 pf_q;
7294
7295				pf_q = iter->base_queue + i;
7296
7297				/* Get to TX ring ptr */
7298				tx_ring = vsi->tx_rings[pf_q];
7299				tx_ring->ch = iter;
7300
7301				/* Get the RX ring ptr */
7302				rx_ring = vsi->rx_rings[pf_q];
7303				rx_ring->ch = iter;
7304			}
7305			ch = iter;
7306			break;
7307		}
7308	}
7309
7310	if (!ch)
7311		return -EINVAL;
7312
7313	/* Guarantee all rings are updated before we update the
7314	 * MAC address filter.
7315	 */
7316	wmb();
7317
7318	/* Add a mac filter */
7319	ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err);
7320	if (ret) {
7321		/* if we cannot add the MAC rule then disable the offload */
7322		macvlan_release_l2fw_offload(vdev);
7323		for (i = 0; i < ch->num_queue_pairs; i++) {
7324			struct i40e_ring *rx_ring;
7325			u16 pf_q;
7326
7327			pf_q = ch->base_queue + i;
7328			rx_ring = vsi->rx_rings[pf_q];
7329			rx_ring->netdev = NULL;
7330		}
7331		dev_info(&pf->pdev->dev,
7332			 "Error adding mac filter on macvlan err %s, aq_err %s\n",
7333			  i40e_stat_str(hw, ret),
7334			  i40e_aq_str(hw, aq_err));
7335		netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n");
7336	}
7337
7338	return ret;
7339}
7340
7341/**
7342 * i40e_setup_macvlans - create the channels which will be macvlans
7343 * @vsi: the VSI we want to access
7344 * @macvlan_cnt: no. of macvlans to be setup
7345 * @qcnt: no. of Qs per macvlan
7346 * @vdev: macvlan netdevice
7347 */
7348static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt,
7349			       struct net_device *vdev)
7350{
7351	struct i40e_pf *pf = vsi->back;
7352	struct i40e_hw *hw = &pf->hw;
7353	struct i40e_vsi_context ctxt;
7354	u16 sections, qmap, num_qps;
7355	struct i40e_channel *ch;
7356	int i, pow, ret = 0;
7357	u8 offset = 0;
7358
7359	if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt)
7360		return -EINVAL;
7361
7362	num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt);
7363
7364	/* find the next higher power-of-2 of num queue pairs */
7365	pow = fls(roundup_pow_of_two(num_qps) - 1);
7366
7367	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
7368		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
7369
7370	/* Setup context bits for the main VSI */
7371	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
7372	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
7373	memset(&ctxt, 0, sizeof(ctxt));
7374	ctxt.seid = vsi->seid;
7375	ctxt.pf_num = vsi->back->hw.pf_id;
7376	ctxt.vf_num = 0;
7377	ctxt.uplink_seid = vsi->uplink_seid;
7378	ctxt.info = vsi->info;
7379	ctxt.info.tc_mapping[0] = cpu_to_le16(qmap);
7380	ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
7381	ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
7382	ctxt.info.valid_sections |= cpu_to_le16(sections);
7383
7384	/* Reconfigure RSS for main VSI with new max queue count */
7385	vsi->rss_size = max_t(u16, num_qps, qcnt);
7386	ret = i40e_vsi_config_rss(vsi);
7387	if (ret) {
7388		dev_info(&pf->pdev->dev,
7389			 "Failed to reconfig RSS for num_queues (%u)\n",
7390			 vsi->rss_size);
7391		return ret;
7392	}
7393	vsi->reconfig_rss = true;
7394	dev_dbg(&vsi->back->pdev->dev,
7395		"Reconfigured RSS with num_queues (%u)\n", vsi->rss_size);
7396	vsi->next_base_queue = num_qps;
7397	vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps;
7398
7399	/* Update the VSI after updating the VSI queue-mapping
7400	 * information
7401	 */
7402	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
7403	if (ret) {
7404		dev_info(&pf->pdev->dev,
7405			 "Update vsi tc config failed, err %s aq_err %s\n",
7406			 i40e_stat_str(hw, ret),
7407			 i40e_aq_str(hw, hw->aq.asq_last_status));
7408		return ret;
7409	}
7410	/* update the local VSI info with updated queue map */
7411	i40e_vsi_update_queue_map(vsi, &ctxt);
7412	vsi->info.valid_sections = 0;
7413
7414	/* Create channels for macvlans */
7415	INIT_LIST_HEAD(&vsi->macvlan_list);
7416	for (i = 0; i < macvlan_cnt; i++) {
7417		ch = kzalloc(sizeof(*ch), GFP_KERNEL);
7418		if (!ch) {
7419			ret = -ENOMEM;
7420			goto err_free;
7421		}
7422		INIT_LIST_HEAD(&ch->list);
7423		ch->num_queue_pairs = qcnt;
7424		if (!i40e_setup_channel(pf, vsi, ch)) {
7425			ret = -EINVAL;
7426			kfree(ch);
7427			goto err_free;
7428		}
7429		ch->parent_vsi = vsi;
7430		vsi->cnt_q_avail -= ch->num_queue_pairs;
7431		vsi->macvlan_cnt++;
7432		list_add_tail(&ch->list, &vsi->macvlan_list);
7433	}
7434
7435	return ret;
7436
7437err_free:
7438	dev_info(&pf->pdev->dev, "Failed to setup macvlans\n");
7439	i40e_free_macvlan_channels(vsi);
7440
7441	return ret;
7442}
7443
7444/**
7445 * i40e_fwd_add - configure macvlans
7446 * @netdev: net device to configure
7447 * @vdev: macvlan netdevice
7448 **/
7449static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
7450{
7451	struct i40e_netdev_priv *np = netdev_priv(netdev);
7452	u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors;
7453	struct i40e_vsi *vsi = np->vsi;
7454	struct i40e_pf *pf = vsi->back;
7455	struct i40e_fwd_adapter *fwd;
7456	int avail_macvlan, ret;
7457
7458	if ((pf->flags & I40E_FLAG_DCB_ENABLED)) {
7459		netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
7460		return ERR_PTR(-EINVAL);
7461	}
7462	if ((pf->flags & I40E_FLAG_TC_MQPRIO)) {
7463		netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n");
7464		return ERR_PTR(-EINVAL);
7465	}
7466	if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) {
7467		netdev_info(netdev, "Not enough vectors available to support macvlans\n");
7468		return ERR_PTR(-EINVAL);
7469	}
7470
7471	/* The macvlan device has to be a single Q device so that the
7472	 * tc_to_txq field can be reused to pick the tx queue.
7473	 */
7474	if (netif_is_multiqueue(vdev))
7475		return ERR_PTR(-ERANGE);
7476
7477	if (!vsi->macvlan_cnt) {
7478		/* reserve bit 0 for the pf device */
7479		set_bit(0, vsi->fwd_bitmask);
7480
7481		/* Try to reserve as many queues as possible for macvlans. First
7482		 * reserve 3/4th of max vectors, then half, then quarter and
7483		 * calculate Qs per macvlan as you go
7484		 */
7485		vectors = pf->num_lan_msix;
7486		if (vectors <= I40E_MAX_MACVLANS && vectors > 64) {
7487			/* allocate 4 Qs per macvlan and 32 Qs to the PF*/
7488			q_per_macvlan = 4;
7489			macvlan_cnt = (vectors - 32) / 4;
7490		} else if (vectors <= 64 && vectors > 32) {
7491			/* allocate 2 Qs per macvlan and 16 Qs to the PF*/
7492			q_per_macvlan = 2;
7493			macvlan_cnt = (vectors - 16) / 2;
7494		} else if (vectors <= 32 && vectors > 16) {
7495			/* allocate 1 Q per macvlan and 16 Qs to the PF*/
7496			q_per_macvlan = 1;
7497			macvlan_cnt = vectors - 16;
7498		} else if (vectors <= 16 && vectors > 8) {
7499			/* allocate 1 Q per macvlan and 8 Qs to the PF */
7500			q_per_macvlan = 1;
7501			macvlan_cnt = vectors - 8;
7502		} else {
7503			/* allocate 1 Q per macvlan and 1 Q to the PF */
7504			q_per_macvlan = 1;
7505			macvlan_cnt = vectors - 1;
7506		}
7507
7508		if (macvlan_cnt == 0)
7509			return ERR_PTR(-EBUSY);
7510
7511		/* Quiesce VSI queues */
7512		i40e_quiesce_vsi(vsi);
7513
7514		/* sets up the macvlans but does not "enable" them */
7515		ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan,
7516					  vdev);
7517		if (ret)
7518			return ERR_PTR(ret);
7519
7520		/* Unquiesce VSI */
7521		i40e_unquiesce_vsi(vsi);
7522	}
7523	avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask,
7524					    vsi->macvlan_cnt);
7525	if (avail_macvlan >= I40E_MAX_MACVLANS)
7526		return ERR_PTR(-EBUSY);
7527
7528	/* create the fwd struct */
7529	fwd = kzalloc(sizeof(*fwd), GFP_KERNEL);
7530	if (!fwd)
7531		return ERR_PTR(-ENOMEM);
7532
7533	set_bit(avail_macvlan, vsi->fwd_bitmask);
7534	fwd->bit_no = avail_macvlan;
7535	netdev_set_sb_channel(vdev, avail_macvlan);
7536	fwd->netdev = vdev;
7537
7538	if (!netif_running(netdev))
7539		return fwd;
7540
7541	/* Set fwd ring up */
7542	ret = i40e_fwd_ring_up(vsi, vdev, fwd);
7543	if (ret) {
7544		/* unbind the queues and drop the subordinate channel config */
7545		netdev_unbind_sb_channel(netdev, vdev);
7546		netdev_set_sb_channel(vdev, 0);
7547
7548		kfree(fwd);
7549		return ERR_PTR(-EINVAL);
7550	}
7551
7552	return fwd;
7553}
7554
7555/**
7556 * i40e_del_all_macvlans - Delete all the mac filters on the channels
7557 * @vsi: the VSI we want to access
7558 */
7559static void i40e_del_all_macvlans(struct i40e_vsi *vsi)
7560{
7561	struct i40e_channel *ch, *ch_tmp;
7562	struct i40e_pf *pf = vsi->back;
7563	struct i40e_hw *hw = &pf->hw;
7564	int aq_err, ret = 0;
7565
7566	if (list_empty(&vsi->macvlan_list))
7567		return;
7568
7569	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
7570		if (i40e_is_channel_macvlan(ch)) {
7571			ret = i40e_del_macvlan_filter(hw, ch->seid,
7572						      i40e_channel_mac(ch),
7573						      &aq_err);
7574			if (!ret) {
7575				/* Reset queue contexts */
7576				i40e_reset_ch_rings(vsi, ch);
7577				clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
7578				netdev_unbind_sb_channel(vsi->netdev,
7579							 ch->fwd->netdev);
7580				netdev_set_sb_channel(ch->fwd->netdev, 0);
7581				kfree(ch->fwd);
7582				ch->fwd = NULL;
7583			}
7584		}
7585	}
7586}
7587
7588/**
7589 * i40e_fwd_del - delete macvlan interfaces
7590 * @netdev: net device to configure
7591 * @vdev: macvlan netdevice
7592 */
7593static void i40e_fwd_del(struct net_device *netdev, void *vdev)
7594{
7595	struct i40e_netdev_priv *np = netdev_priv(netdev);
7596	struct i40e_fwd_adapter *fwd = vdev;
7597	struct i40e_channel *ch, *ch_tmp;
7598	struct i40e_vsi *vsi = np->vsi;
7599	struct i40e_pf *pf = vsi->back;
7600	struct i40e_hw *hw = &pf->hw;
7601	int aq_err, ret = 0;
7602
7603	/* Find the channel associated with the macvlan and del mac filter */
7604	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
7605		if (i40e_is_channel_macvlan(ch) &&
7606		    ether_addr_equal(i40e_channel_mac(ch),
7607				     fwd->netdev->dev_addr)) {
7608			ret = i40e_del_macvlan_filter(hw, ch->seid,
7609						      i40e_channel_mac(ch),
7610						      &aq_err);
7611			if (!ret) {
7612				/* Reset queue contexts */
7613				i40e_reset_ch_rings(vsi, ch);
7614				clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
7615				netdev_unbind_sb_channel(netdev, fwd->netdev);
7616				netdev_set_sb_channel(fwd->netdev, 0);
7617				kfree(ch->fwd);
7618				ch->fwd = NULL;
7619			} else {
7620				dev_info(&pf->pdev->dev,
7621					 "Error deleting mac filter on macvlan err %s, aq_err %s\n",
7622					  i40e_stat_str(hw, ret),
7623					  i40e_aq_str(hw, aq_err));
7624			}
7625			break;
7626		}
7627	}
7628}
7629
7630/**
7631 * i40e_setup_tc - configure multiple traffic classes
7632 * @netdev: net device to configure
7633 * @type_data: tc offload data
7634 **/
7635static int i40e_setup_tc(struct net_device *netdev, void *type_data)
7636{
7637	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
7638	struct i40e_netdev_priv *np = netdev_priv(netdev);
7639	struct i40e_vsi *vsi = np->vsi;
7640	struct i40e_pf *pf = vsi->back;
7641	u8 enabled_tc = 0, num_tc, hw;
7642	bool need_reset = false;
7643	int old_queue_pairs;
7644	int ret = -EINVAL;
7645	u16 mode;
7646	int i;
7647
7648	old_queue_pairs = vsi->num_queue_pairs;
7649	num_tc = mqprio_qopt->qopt.num_tc;
7650	hw = mqprio_qopt->qopt.hw;
7651	mode = mqprio_qopt->mode;
7652	if (!hw) {
7653		pf->flags &= ~I40E_FLAG_TC_MQPRIO;
7654		memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
7655		goto config_tc;
7656	}
7657
7658	/* Check if MFP enabled */
7659	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
7660		netdev_info(netdev,
7661			    "Configuring TC not supported in MFP mode\n");
7662		return ret;
7663	}
7664	switch (mode) {
7665	case TC_MQPRIO_MODE_DCB:
7666		pf->flags &= ~I40E_FLAG_TC_MQPRIO;
7667
7668		/* Check if DCB enabled to continue */
7669		if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
7670			netdev_info(netdev,
7671				    "DCB is not enabled for adapter\n");
7672			return ret;
7673		}
7674
7675		/* Check whether tc count is within enabled limit */
7676		if (num_tc > i40e_pf_get_num_tc(pf)) {
7677			netdev_info(netdev,
7678				    "TC count greater than enabled on link for adapter\n");
7679			return ret;
7680		}
7681		break;
7682	case TC_MQPRIO_MODE_CHANNEL:
7683		if (pf->flags & I40E_FLAG_DCB_ENABLED) {
7684			netdev_info(netdev,
7685				    "Full offload of TC Mqprio options is not supported when DCB is enabled\n");
7686			return ret;
7687		}
7688		if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
7689			return ret;
7690		ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
7691		if (ret)
7692			return ret;
7693		memcpy(&vsi->mqprio_qopt, mqprio_qopt,
7694		       sizeof(*mqprio_qopt));
7695		pf->flags |= I40E_FLAG_TC_MQPRIO;
7696		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
7697		break;
7698	default:
7699		return -EINVAL;
7700	}
7701
7702config_tc:
7703	/* Generate TC map for number of tc requested */
7704	for (i = 0; i < num_tc; i++)
7705		enabled_tc |= BIT(i);
7706
7707	/* Requesting same TC configuration as already enabled */
7708	if (enabled_tc == vsi->tc_config.enabled_tc &&
7709	    mode != TC_MQPRIO_MODE_CHANNEL)
7710		return 0;
7711
7712	/* Quiesce VSI queues */
7713	i40e_quiesce_vsi(vsi);
7714
7715	if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO))
7716		i40e_remove_queue_channels(vsi);
7717
7718	/* Configure VSI for enabled TCs */
7719	ret = i40e_vsi_config_tc(vsi, enabled_tc);
7720	if (ret) {
7721		netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
7722			    vsi->seid);
7723		need_reset = true;
7724		goto exit;
7725	} else if (enabled_tc &&
7726		   (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) {
7727		netdev_info(netdev,
7728			    "Failed to create channel. Override queues (%u) not power of 2\n",
7729			    vsi->tc_config.tc_info[0].qcount);
7730		ret = -EINVAL;
7731		need_reset = true;
7732		goto exit;
7733	}
7734
7735	dev_info(&vsi->back->pdev->dev,
7736		 "Setup channel (id:%u) utilizing num_queues %d\n",
7737		 vsi->seid, vsi->tc_config.tc_info[0].qcount);
7738
7739	if (pf->flags & I40E_FLAG_TC_MQPRIO) {
7740		if (vsi->mqprio_qopt.max_rate[0]) {
7741			u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
7742						  vsi->mqprio_qopt.max_rate[0]);
7743
7744			ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
7745			if (!ret) {
7746				u64 credits = max_tx_rate;
7747
7748				do_div(credits, I40E_BW_CREDIT_DIVISOR);
7749				dev_dbg(&vsi->back->pdev->dev,
7750					"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
7751					max_tx_rate,
7752					credits,
7753					vsi->seid);
7754			} else {
7755				need_reset = true;
7756				goto exit;
7757			}
7758		}
7759		ret = i40e_configure_queue_channels(vsi);
7760		if (ret) {
7761			vsi->num_queue_pairs = old_queue_pairs;
7762			netdev_info(netdev,
7763				    "Failed configuring queue channels\n");
7764			need_reset = true;
7765			goto exit;
7766		}
7767	}
7768
7769exit:
7770	/* Reset the configuration data to defaults, only TC0 is enabled */
7771	if (need_reset) {
7772		i40e_vsi_set_default_tc_config(vsi);
7773		need_reset = false;
7774	}
7775
7776	/* Unquiesce VSI */
7777	i40e_unquiesce_vsi(vsi);
7778	return ret;
7779}
7780
7781/**
7782 * i40e_set_cld_element - sets cloud filter element data
7783 * @filter: cloud filter rule
7784 * @cld: ptr to cloud filter element data
7785 *
7786 * This is helper function to copy data into cloud filter element
7787 **/
7788static inline void
7789i40e_set_cld_element(struct i40e_cloud_filter *filter,
7790		     struct i40e_aqc_cloud_filters_element_data *cld)
7791{
7792	u32 ipa;
7793	int i;
7794
7795	memset(cld, 0, sizeof(*cld));
7796	ether_addr_copy(cld->outer_mac, filter->dst_mac);
7797	ether_addr_copy(cld->inner_mac, filter->src_mac);
7798
7799	if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6)
7800		return;
7801
7802	if (filter->n_proto == ETH_P_IPV6) {
7803#define IPV6_MAX_INDEX	(ARRAY_SIZE(filter->dst_ipv6) - 1)
7804		for (i = 0; i < ARRAY_SIZE(filter->dst_ipv6); i++) {
7805			ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]);
7806
7807			*(__le32 *)&cld->ipaddr.raw_v6.data[i * 2] = cpu_to_le32(ipa);
7808		}
7809	} else {
7810		ipa = be32_to_cpu(filter->dst_ipv4);
7811
7812		memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa));
7813	}
7814
7815	cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id));
7816
7817	/* tenant_id is not supported by FW now, once the support is enabled
7818	 * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id)
7819	 */
7820	if (filter->tenant_id)
7821		return;
7822}
7823
7824/**
7825 * i40e_add_del_cloud_filter - Add/del cloud filter
7826 * @vsi: pointer to VSI
7827 * @filter: cloud filter rule
7828 * @add: if true, add, if false, delete
7829 *
7830 * Add or delete a cloud filter for a specific flow spec.
7831 * Returns 0 if the filter were successfully added.
7832 **/
7833int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
7834			      struct i40e_cloud_filter *filter, bool add)
7835{
7836	struct i40e_aqc_cloud_filters_element_data cld_filter;
7837	struct i40e_pf *pf = vsi->back;
7838	int ret;
7839	static const u16 flag_table[128] = {
7840		[I40E_CLOUD_FILTER_FLAGS_OMAC]  =
7841			I40E_AQC_ADD_CLOUD_FILTER_OMAC,
7842		[I40E_CLOUD_FILTER_FLAGS_IMAC]  =
7843			I40E_AQC_ADD_CLOUD_FILTER_IMAC,
7844		[I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN]  =
7845			I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN,
7846		[I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] =
7847			I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID,
7848		[I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] =
7849			I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC,
7850		[I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] =
7851			I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID,
7852		[I40E_CLOUD_FILTER_FLAGS_IIP] =
7853			I40E_AQC_ADD_CLOUD_FILTER_IIP,
7854	};
7855
7856	if (filter->flags >= ARRAY_SIZE(flag_table))
7857		return I40E_ERR_CONFIG;
7858
7859	memset(&cld_filter, 0, sizeof(cld_filter));
7860
7861	/* copy element needed to add cloud filter from filter */
7862	i40e_set_cld_element(filter, &cld_filter);
7863
7864	if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE)
7865		cld_filter.flags = cpu_to_le16(filter->tunnel_type <<
7866					     I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT);
7867
7868	if (filter->n_proto == ETH_P_IPV6)
7869		cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
7870						I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
7871	else
7872		cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
7873						I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
7874
7875	if (add)
7876		ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid,
7877						&cld_filter, 1);
7878	else
7879		ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid,
7880						&cld_filter, 1);
7881	if (ret)
7882		dev_dbg(&pf->pdev->dev,
7883			"Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n",
7884			add ? "add" : "delete", filter->dst_port, ret,
7885			pf->hw.aq.asq_last_status);
7886	else
7887		dev_info(&pf->pdev->dev,
7888			 "%s cloud filter for VSI: %d\n",
7889			 add ? "Added" : "Deleted", filter->seid);
7890	return ret;
7891}
7892
7893/**
7894 * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf
7895 * @vsi: pointer to VSI
7896 * @filter: cloud filter rule
7897 * @add: if true, add, if false, delete
7898 *
7899 * Add or delete a cloud filter for a specific flow spec using big buffer.
7900 * Returns 0 if the filter were successfully added.
7901 **/
7902int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
7903				      struct i40e_cloud_filter *filter,
7904				      bool add)
7905{
7906	struct i40e_aqc_cloud_filters_element_bb cld_filter;
7907	struct i40e_pf *pf = vsi->back;
7908	int ret;
7909
7910	/* Both (src/dst) valid mac_addr are not supported */
7911	if ((is_valid_ether_addr(filter->dst_mac) &&
7912	     is_valid_ether_addr(filter->src_mac)) ||
7913	    (is_multicast_ether_addr(filter->dst_mac) &&
7914	     is_multicast_ether_addr(filter->src_mac)))
7915		return -EOPNOTSUPP;
7916
7917	/* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP
7918	 * ports are not supported via big buffer now.
7919	 */
7920	if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP)
7921		return -EOPNOTSUPP;
7922
7923	/* adding filter using src_port/src_ip is not supported at this stage */
7924	if (filter->src_port ||
7925	    (filter->src_ipv4 && filter->n_proto != ETH_P_IPV6) ||
7926	    !ipv6_addr_any(&filter->ip.v6.src_ip6))
7927		return -EOPNOTSUPP;
7928
7929	memset(&cld_filter, 0, sizeof(cld_filter));
7930
7931	/* copy element needed to add cloud filter from filter */
7932	i40e_set_cld_element(filter, &cld_filter.element);
7933
7934	if (is_valid_ether_addr(filter->dst_mac) ||
7935	    is_valid_ether_addr(filter->src_mac) ||
7936	    is_multicast_ether_addr(filter->dst_mac) ||
7937	    is_multicast_ether_addr(filter->src_mac)) {
7938		/* MAC + IP : unsupported mode */
7939		if (filter->dst_ipv4)
7940			return -EOPNOTSUPP;
7941
7942		/* since we validated that L4 port must be valid before
7943		 * we get here, start with respective "flags" value
7944		 * and update if vlan is present or not
7945		 */
7946		cld_filter.element.flags =
7947			cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT);
7948
7949		if (filter->vlan_id) {
7950			cld_filter.element.flags =
7951			cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT);
7952		}
7953
7954	} else if ((filter->dst_ipv4 && filter->n_proto != ETH_P_IPV6) ||
7955		   !ipv6_addr_any(&filter->ip.v6.dst_ip6)) {
7956		cld_filter.element.flags =
7957				cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT);
7958		if (filter->n_proto == ETH_P_IPV6)
7959			cld_filter.element.flags |=
7960				cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
7961		else
7962			cld_filter.element.flags |=
7963				cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
7964	} else {
7965		dev_err(&pf->pdev->dev,
7966			"either mac or ip has to be valid for cloud filter\n");
7967		return -EINVAL;
7968	}
7969
7970	/* Now copy L4 port in Byte 6..7 in general fields */
7971	cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] =
7972						be16_to_cpu(filter->dst_port);
7973
7974	if (add) {
7975		/* Validate current device switch mode, change if necessary */
7976		ret = i40e_validate_and_set_switch_mode(vsi);
7977		if (ret) {
7978			dev_err(&pf->pdev->dev,
7979				"failed to set switch mode, ret %d\n",
7980				ret);
7981			return ret;
7982		}
7983
7984		ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid,
7985						   &cld_filter, 1);
7986	} else {
7987		ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid,
7988						   &cld_filter, 1);
7989	}
7990
7991	if (ret)
7992		dev_dbg(&pf->pdev->dev,
7993			"Failed to %s cloud filter(big buffer) err %d aq_err %d\n",
7994			add ? "add" : "delete", ret, pf->hw.aq.asq_last_status);
7995	else
7996		dev_info(&pf->pdev->dev,
7997			 "%s cloud filter for VSI: %d, L4 port: %d\n",
7998			 add ? "add" : "delete", filter->seid,
7999			 ntohs(filter->dst_port));
8000	return ret;
8001}
8002
8003/**
8004 * i40e_parse_cls_flower - Parse tc flower filters provided by kernel
8005 * @vsi: Pointer to VSI
8006 * @f: Pointer to struct flow_cls_offload
8007 * @filter: Pointer to cloud filter structure
8008 *
8009 **/
8010static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
8011				 struct flow_cls_offload *f,
8012				 struct i40e_cloud_filter *filter)
8013{
8014	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
8015	struct flow_dissector *dissector = rule->match.dissector;
8016	u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
8017	struct i40e_pf *pf = vsi->back;
8018	u8 field_flags = 0;
8019
8020	if (dissector->used_keys &
8021	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
8022	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
8023	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
8024	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
8025	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
8026	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
8027	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
8028	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
8029		dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n",
8030			dissector->used_keys);
8031		return -EOPNOTSUPP;
8032	}
8033
8034	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
8035		struct flow_match_enc_keyid match;
8036
8037		flow_rule_match_enc_keyid(rule, &match);
8038		if (match.mask->keyid != 0)
8039			field_flags |= I40E_CLOUD_FIELD_TEN_ID;
8040
8041		filter->tenant_id = be32_to_cpu(match.key->keyid);
8042	}
8043
8044	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
8045		struct flow_match_basic match;
8046
8047		flow_rule_match_basic(rule, &match);
8048		n_proto_key = ntohs(match.key->n_proto);
8049		n_proto_mask = ntohs(match.mask->n_proto);
8050
8051		if (n_proto_key == ETH_P_ALL) {
8052			n_proto_key = 0;
8053			n_proto_mask = 0;
8054		}
8055		filter->n_proto = n_proto_key & n_proto_mask;
8056		filter->ip_proto = match.key->ip_proto;
8057	}
8058
8059	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
8060		struct flow_match_eth_addrs match;
8061
8062		flow_rule_match_eth_addrs(rule, &match);
8063
8064		/* use is_broadcast and is_zero to check for all 0xf or 0 */
8065		if (!is_zero_ether_addr(match.mask->dst)) {
8066			if (is_broadcast_ether_addr(match.mask->dst)) {
8067				field_flags |= I40E_CLOUD_FIELD_OMAC;
8068			} else {
8069				dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n",
8070					match.mask->dst);
8071				return I40E_ERR_CONFIG;
8072			}
8073		}
8074
8075		if (!is_zero_ether_addr(match.mask->src)) {
8076			if (is_broadcast_ether_addr(match.mask->src)) {
8077				field_flags |= I40E_CLOUD_FIELD_IMAC;
8078			} else {
8079				dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n",
8080					match.mask->src);
8081				return I40E_ERR_CONFIG;
8082			}
8083		}
8084		ether_addr_copy(filter->dst_mac, match.key->dst);
8085		ether_addr_copy(filter->src_mac, match.key->src);
8086	}
8087
8088	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
8089		struct flow_match_vlan match;
8090
8091		flow_rule_match_vlan(rule, &match);
8092		if (match.mask->vlan_id) {
8093			if (match.mask->vlan_id == VLAN_VID_MASK) {
8094				field_flags |= I40E_CLOUD_FIELD_IVLAN;
8095
8096			} else {
8097				dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n",
8098					match.mask->vlan_id);
8099				return I40E_ERR_CONFIG;
8100			}
8101		}
8102
8103		filter->vlan_id = cpu_to_be16(match.key->vlan_id);
8104	}
8105
8106	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
8107		struct flow_match_control match;
8108
8109		flow_rule_match_control(rule, &match);
8110		addr_type = match.key->addr_type;
8111	}
8112
8113	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
8114		struct flow_match_ipv4_addrs match;
8115
8116		flow_rule_match_ipv4_addrs(rule, &match);
8117		if (match.mask->dst) {
8118			if (match.mask->dst == cpu_to_be32(0xffffffff)) {
8119				field_flags |= I40E_CLOUD_FIELD_IIP;
8120			} else {
8121				dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n",
8122					&match.mask->dst);
8123				return I40E_ERR_CONFIG;
8124			}
8125		}
8126
8127		if (match.mask->src) {
8128			if (match.mask->src == cpu_to_be32(0xffffffff)) {
8129				field_flags |= I40E_CLOUD_FIELD_IIP;
8130			} else {
8131				dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n",
8132					&match.mask->src);
8133				return I40E_ERR_CONFIG;
8134			}
8135		}
8136
8137		if (field_flags & I40E_CLOUD_FIELD_TEN_ID) {
8138			dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n");
8139			return I40E_ERR_CONFIG;
8140		}
8141		filter->dst_ipv4 = match.key->dst;
8142		filter->src_ipv4 = match.key->src;
8143	}
8144
8145	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
8146		struct flow_match_ipv6_addrs match;
8147
8148		flow_rule_match_ipv6_addrs(rule, &match);
8149
8150		/* src and dest IPV6 address should not be LOOPBACK
8151		 * (0:0:0:0:0:0:0:1), which can be represented as ::1
8152		 */
8153		if (ipv6_addr_loopback(&match.key->dst) ||
8154		    ipv6_addr_loopback(&match.key->src)) {
8155			dev_err(&pf->pdev->dev,
8156				"Bad ipv6, addr is LOOPBACK\n");
8157			return I40E_ERR_CONFIG;
8158		}
8159		if (!ipv6_addr_any(&match.mask->dst) ||
8160		    !ipv6_addr_any(&match.mask->src))
8161			field_flags |= I40E_CLOUD_FIELD_IIP;
8162
8163		memcpy(&filter->src_ipv6, &match.key->src.s6_addr32,
8164		       sizeof(filter->src_ipv6));
8165		memcpy(&filter->dst_ipv6, &match.key->dst.s6_addr32,
8166		       sizeof(filter->dst_ipv6));
8167	}
8168
8169	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
8170		struct flow_match_ports match;
8171
8172		flow_rule_match_ports(rule, &match);
8173		if (match.mask->src) {
8174			if (match.mask->src == cpu_to_be16(0xffff)) {
8175				field_flags |= I40E_CLOUD_FIELD_IIP;
8176			} else {
8177				dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n",
8178					be16_to_cpu(match.mask->src));
8179				return I40E_ERR_CONFIG;
8180			}
8181		}
8182
8183		if (match.mask->dst) {
8184			if (match.mask->dst == cpu_to_be16(0xffff)) {
8185				field_flags |= I40E_CLOUD_FIELD_IIP;
8186			} else {
8187				dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n",
8188					be16_to_cpu(match.mask->dst));
8189				return I40E_ERR_CONFIG;
8190			}
8191		}
8192
8193		filter->dst_port = match.key->dst;
8194		filter->src_port = match.key->src;
8195
8196		switch (filter->ip_proto) {
8197		case IPPROTO_TCP:
8198		case IPPROTO_UDP:
8199			break;
8200		default:
8201			dev_err(&pf->pdev->dev,
8202				"Only UDP and TCP transport are supported\n");
8203			return -EINVAL;
8204		}
8205	}
8206	filter->flags = field_flags;
8207	return 0;
8208}
8209
8210/**
8211 * i40e_handle_tclass: Forward to a traffic class on the device
8212 * @vsi: Pointer to VSI
8213 * @tc: traffic class index on the device
8214 * @filter: Pointer to cloud filter structure
8215 *
8216 **/
8217static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc,
8218			      struct i40e_cloud_filter *filter)
8219{
8220	struct i40e_channel *ch, *ch_tmp;
8221
8222	/* direct to a traffic class on the same device */
8223	if (tc == 0) {
8224		filter->seid = vsi->seid;
8225		return 0;
8226	} else if (vsi->tc_config.enabled_tc & BIT(tc)) {
8227		if (!filter->dst_port) {
8228			dev_err(&vsi->back->pdev->dev,
8229				"Specify destination port to direct to traffic class that is not default\n");
8230			return -EINVAL;
8231		}
8232		if (list_empty(&vsi->ch_list))
8233			return -EINVAL;
8234		list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list,
8235					 list) {
8236			if (ch->seid == vsi->tc_seid_map[tc])
8237				filter->seid = ch->seid;
8238		}
8239		return 0;
8240	}
8241	dev_err(&vsi->back->pdev->dev, "TC is not enabled\n");
8242	return -EINVAL;
8243}
8244
8245/**
8246 * i40e_configure_clsflower - Configure tc flower filters
8247 * @vsi: Pointer to VSI
8248 * @cls_flower: Pointer to struct flow_cls_offload
8249 *
8250 **/
8251static int i40e_configure_clsflower(struct i40e_vsi *vsi,
8252				    struct flow_cls_offload *cls_flower)
8253{
8254	int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
8255	struct i40e_cloud_filter *filter = NULL;
8256	struct i40e_pf *pf = vsi->back;
8257	int err = 0;
8258
8259	if (tc < 0) {
8260		dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
8261		return -EOPNOTSUPP;
8262	}
8263
8264	if (!tc) {
8265		dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination");
8266		return -EINVAL;
8267	}
8268
8269	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
8270	    test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
8271		return -EBUSY;
8272
8273	if (pf->fdir_pf_active_filters ||
8274	    (!hlist_empty(&pf->fdir_filter_list))) {
8275		dev_err(&vsi->back->pdev->dev,
8276			"Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n");
8277		return -EINVAL;
8278	}
8279
8280	if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) {
8281		dev_err(&vsi->back->pdev->dev,
8282			"Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n");
8283		vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED;
8284		vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
8285	}
8286
8287	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
8288	if (!filter)
8289		return -ENOMEM;
8290
8291	filter->cookie = cls_flower->cookie;
8292
8293	err = i40e_parse_cls_flower(vsi, cls_flower, filter);
8294	if (err < 0)
8295		goto err;
8296
8297	err = i40e_handle_tclass(vsi, tc, filter);
8298	if (err < 0)
8299		goto err;
8300
8301	/* Add cloud filter */
8302	if (filter->dst_port)
8303		err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true);
8304	else
8305		err = i40e_add_del_cloud_filter(vsi, filter, true);
8306
8307	if (err) {
8308		dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n",
8309			err);
8310		goto err;
8311	}
8312
8313	/* add filter to the ordered list */
8314	INIT_HLIST_NODE(&filter->cloud_node);
8315
8316	hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list);
8317
8318	pf->num_cloud_filters++;
8319
8320	return err;
8321err:
8322	kfree(filter);
8323	return err;
8324}
8325
8326/**
8327 * i40e_find_cloud_filter - Find the could filter in the list
8328 * @vsi: Pointer to VSI
8329 * @cookie: filter specific cookie
8330 *
8331 **/
8332static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi,
8333							unsigned long *cookie)
8334{
8335	struct i40e_cloud_filter *filter = NULL;
8336	struct hlist_node *node2;
8337
8338	hlist_for_each_entry_safe(filter, node2,
8339				  &vsi->back->cloud_filter_list, cloud_node)
8340		if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
8341			return filter;
8342	return NULL;
8343}
8344
8345/**
8346 * i40e_delete_clsflower - Remove tc flower filters
8347 * @vsi: Pointer to VSI
8348 * @cls_flower: Pointer to struct flow_cls_offload
8349 *
8350 **/
8351static int i40e_delete_clsflower(struct i40e_vsi *vsi,
8352				 struct flow_cls_offload *cls_flower)
8353{
8354	struct i40e_cloud_filter *filter = NULL;
8355	struct i40e_pf *pf = vsi->back;
8356	int err = 0;
8357
8358	filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie);
8359
8360	if (!filter)
8361		return -EINVAL;
8362
8363	hash_del(&filter->cloud_node);
8364
8365	if (filter->dst_port)
8366		err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false);
8367	else
8368		err = i40e_add_del_cloud_filter(vsi, filter, false);
8369
8370	kfree(filter);
8371	if (err) {
8372		dev_err(&pf->pdev->dev,
8373			"Failed to delete cloud filter, err %s\n",
8374			i40e_stat_str(&pf->hw, err));
8375		return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
8376	}
8377
8378	pf->num_cloud_filters--;
8379	if (!pf->num_cloud_filters)
8380		if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
8381		    !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
8382			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
8383			pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
8384			pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
8385		}
8386	return 0;
8387}
8388
8389/**
8390 * i40e_setup_tc_cls_flower - flower classifier offloads
8391 * @np: net device to configure
8392 * @cls_flower: offload data
8393 **/
8394static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
8395				    struct flow_cls_offload *cls_flower)
8396{
8397	struct i40e_vsi *vsi = np->vsi;
8398
8399	switch (cls_flower->command) {
8400	case FLOW_CLS_REPLACE:
8401		return i40e_configure_clsflower(vsi, cls_flower);
8402	case FLOW_CLS_DESTROY:
8403		return i40e_delete_clsflower(vsi, cls_flower);
8404	case FLOW_CLS_STATS:
8405		return -EOPNOTSUPP;
8406	default:
8407		return -EOPNOTSUPP;
8408	}
8409}
8410
8411static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
8412				  void *cb_priv)
8413{
8414	struct i40e_netdev_priv *np = cb_priv;
8415
8416	if (!tc_cls_can_offload_and_chain0(np->vsi->netdev, type_data))
8417		return -EOPNOTSUPP;
8418
8419	switch (type) {
8420	case TC_SETUP_CLSFLOWER:
8421		return i40e_setup_tc_cls_flower(np, type_data);
8422
8423	default:
8424		return -EOPNOTSUPP;
8425	}
8426}
8427
8428static LIST_HEAD(i40e_block_cb_list);
8429
8430static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
8431			   void *type_data)
8432{
8433	struct i40e_netdev_priv *np = netdev_priv(netdev);
8434
8435	switch (type) {
8436	case TC_SETUP_QDISC_MQPRIO:
8437		return i40e_setup_tc(netdev, type_data);
8438	case TC_SETUP_BLOCK:
8439		return flow_block_cb_setup_simple(type_data,
8440						  &i40e_block_cb_list,
8441						  i40e_setup_tc_block_cb,
8442						  np, np, true);
8443	default:
8444		return -EOPNOTSUPP;
8445	}
8446}
8447
8448/**
8449 * i40e_open - Called when a network interface is made active
8450 * @netdev: network interface device structure
8451 *
8452 * The open entry point is called when a network interface is made
8453 * active by the system (IFF_UP).  At this point all resources needed
8454 * for transmit and receive operations are allocated, the interrupt
8455 * handler is registered with the OS, the netdev watchdog subtask is
8456 * enabled, and the stack is notified that the interface is ready.
8457 *
8458 * Returns 0 on success, negative value on failure
8459 **/
8460int i40e_open(struct net_device *netdev)
8461{
8462	struct i40e_netdev_priv *np = netdev_priv(netdev);
8463	struct i40e_vsi *vsi = np->vsi;
8464	struct i40e_pf *pf = vsi->back;
8465	int err;
8466
8467	/* disallow open during test or if eeprom is broken */
8468	if (test_bit(__I40E_TESTING, pf->state) ||
8469	    test_bit(__I40E_BAD_EEPROM, pf->state))
8470		return -EBUSY;
8471
8472	netif_carrier_off(netdev);
8473
8474	if (i40e_force_link_state(pf, true))
8475		return -EAGAIN;
8476
8477	err = i40e_vsi_open(vsi);
8478	if (err)
8479		return err;
8480
8481	/* configure global TSO hardware offload settings */
8482	wr32(&pf->hw, I40E_GLLAN_TSOMSK_F, be32_to_cpu(TCP_FLAG_PSH |
8483						       TCP_FLAG_FIN) >> 16);
8484	wr32(&pf->hw, I40E_GLLAN_TSOMSK_M, be32_to_cpu(TCP_FLAG_PSH |
8485						       TCP_FLAG_FIN |
8486						       TCP_FLAG_CWR) >> 16);
8487	wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16);
8488
8489	udp_tunnel_get_rx_info(netdev);
8490
8491	return 0;
8492}
8493
8494/**
8495 * i40e_netif_set_realnum_tx_rx_queues - Update number of tx/rx queues
8496 * @vsi: vsi structure
8497 *
8498 * This updates netdev's number of tx/rx queues
8499 *
8500 * Returns status of setting tx/rx queues
8501 **/
8502static int i40e_netif_set_realnum_tx_rx_queues(struct i40e_vsi *vsi)
8503{
8504	int ret;
8505
8506	ret = netif_set_real_num_rx_queues(vsi->netdev,
8507					   vsi->num_queue_pairs);
8508	if (ret)
8509		return ret;
8510
8511	return netif_set_real_num_tx_queues(vsi->netdev,
8512					    vsi->num_queue_pairs);
8513}
8514
8515/**
8516 * i40e_vsi_open -
8517 * @vsi: the VSI to open
8518 *
8519 * Finish initialization of the VSI.
8520 *
8521 * Returns 0 on success, negative value on failure
8522 *
8523 * Note: expects to be called while under rtnl_lock()
8524 **/
8525int i40e_vsi_open(struct i40e_vsi *vsi)
8526{
8527	struct i40e_pf *pf = vsi->back;
8528	char int_name[I40E_INT_NAME_STR_LEN];
8529	int err;
8530
8531	/* allocate descriptors */
8532	err = i40e_vsi_setup_tx_resources(vsi);
8533	if (err)
8534		goto err_setup_tx;
8535	err = i40e_vsi_setup_rx_resources(vsi);
8536	if (err)
8537		goto err_setup_rx;
8538
8539	err = i40e_vsi_configure(vsi);
8540	if (err)
8541		goto err_setup_rx;
8542
8543	if (vsi->netdev) {
8544		snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
8545			 dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
8546		err = i40e_vsi_request_irq(vsi, int_name);
8547		if (err)
8548			goto err_setup_rx;
8549
8550		/* Notify the stack of the actual queue counts. */
8551		err = i40e_netif_set_realnum_tx_rx_queues(vsi);
8552		if (err)
8553			goto err_set_queues;
8554
8555	} else if (vsi->type == I40E_VSI_FDIR) {
8556		snprintf(int_name, sizeof(int_name) - 1, "%s-%s:fdir",
8557			 dev_driver_string(&pf->pdev->dev),
8558			 dev_name(&pf->pdev->dev));
8559		err = i40e_vsi_request_irq(vsi, int_name);
8560		if (err)
8561			goto err_setup_rx;
8562
8563	} else {
8564		err = -EINVAL;
8565		goto err_setup_rx;
8566	}
8567
8568	err = i40e_up_complete(vsi);
8569	if (err)
8570		goto err_up_complete;
8571
8572	return 0;
8573
8574err_up_complete:
8575	i40e_down(vsi);
8576err_set_queues:
8577	i40e_vsi_free_irq(vsi);
8578err_setup_rx:
8579	i40e_vsi_free_rx_resources(vsi);
8580err_setup_tx:
8581	i40e_vsi_free_tx_resources(vsi);
8582	if (vsi == pf->vsi[pf->lan_vsi])
8583		i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
8584
8585	return err;
8586}
8587
8588/**
8589 * i40e_fdir_filter_exit - Cleans up the Flow Director accounting
8590 * @pf: Pointer to PF
8591 *
8592 * This function destroys the hlist where all the Flow Director
8593 * filters were saved.
8594 **/
8595static void i40e_fdir_filter_exit(struct i40e_pf *pf)
8596{
8597	struct i40e_fdir_filter *filter;
8598	struct i40e_flex_pit *pit_entry, *tmp;
8599	struct hlist_node *node2;
8600
8601	hlist_for_each_entry_safe(filter, node2,
8602				  &pf->fdir_filter_list, fdir_node) {
8603		hlist_del(&filter->fdir_node);
8604		kfree(filter);
8605	}
8606
8607	list_for_each_entry_safe(pit_entry, tmp, &pf->l3_flex_pit_list, list) {
8608		list_del(&pit_entry->list);
8609		kfree(pit_entry);
8610	}
8611	INIT_LIST_HEAD(&pf->l3_flex_pit_list);
8612
8613	list_for_each_entry_safe(pit_entry, tmp, &pf->l4_flex_pit_list, list) {
8614		list_del(&pit_entry->list);
8615		kfree(pit_entry);
8616	}
8617	INIT_LIST_HEAD(&pf->l4_flex_pit_list);
8618
8619	pf->fdir_pf_active_filters = 0;
8620	pf->fd_tcp4_filter_cnt = 0;
8621	pf->fd_udp4_filter_cnt = 0;
8622	pf->fd_sctp4_filter_cnt = 0;
8623	pf->fd_ip4_filter_cnt = 0;
8624
8625	/* Reprogram the default input set for TCP/IPv4 */
8626	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
8627				I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
8628				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
8629
8630	/* Reprogram the default input set for UDP/IPv4 */
8631	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_UDP,
8632				I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
8633				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
8634
8635	/* Reprogram the default input set for SCTP/IPv4 */
8636	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP,
8637				I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
8638				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
8639
8640	/* Reprogram the default input set for Other/IPv4 */
8641	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER,
8642				I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
8643
8644	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV4,
8645				I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
8646}
8647
8648/**
8649 * i40e_cloud_filter_exit - Cleans up the cloud filters
8650 * @pf: Pointer to PF
8651 *
8652 * This function destroys the hlist where all the cloud filters
8653 * were saved.
8654 **/
8655static void i40e_cloud_filter_exit(struct i40e_pf *pf)
8656{
8657	struct i40e_cloud_filter *cfilter;
8658	struct hlist_node *node;
8659
8660	hlist_for_each_entry_safe(cfilter, node,
8661				  &pf->cloud_filter_list, cloud_node) {
8662		hlist_del(&cfilter->cloud_node);
8663		kfree(cfilter);
8664	}
8665	pf->num_cloud_filters = 0;
8666
8667	if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
8668	    !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
8669		pf->flags |= I40E_FLAG_FD_SB_ENABLED;
8670		pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
8671		pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
8672	}
8673}
8674
8675/**
8676 * i40e_close - Disables a network interface
8677 * @netdev: network interface device structure
8678 *
8679 * The close entry point is called when an interface is de-activated
8680 * by the OS.  The hardware is still under the driver's control, but
8681 * this netdev interface is disabled.
8682 *
8683 * Returns 0, this is not allowed to fail
8684 **/
8685int i40e_close(struct net_device *netdev)
8686{
8687	struct i40e_netdev_priv *np = netdev_priv(netdev);
8688	struct i40e_vsi *vsi = np->vsi;
8689
8690	i40e_vsi_close(vsi);
8691
8692	return 0;
8693}
8694
8695/**
8696 * i40e_do_reset - Start a PF or Core Reset sequence
8697 * @pf: board private structure
8698 * @reset_flags: which reset is requested
8699 * @lock_acquired: indicates whether or not the lock has been acquired
8700 * before this function was called.
8701 *
8702 * The essential difference in resets is that the PF Reset
8703 * doesn't clear the packet buffers, doesn't reset the PE
8704 * firmware, and doesn't bother the other PFs on the chip.
8705 **/
8706void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
8707{
8708	u32 val;
8709
8710	/* do the biggest reset indicated */
8711	if (reset_flags & BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED)) {
8712
8713		/* Request a Global Reset
8714		 *
8715		 * This will start the chip's countdown to the actual full
8716		 * chip reset event, and a warning interrupt to be sent
8717		 * to all PFs, including the requestor.  Our handler
8718		 * for the warning interrupt will deal with the shutdown
8719		 * and recovery of the switch setup.
8720		 */
8721		dev_dbg(&pf->pdev->dev, "GlobalR requested\n");
8722		val = rd32(&pf->hw, I40E_GLGEN_RTRIG);
8723		val |= I40E_GLGEN_RTRIG_GLOBR_MASK;
8724		wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
8725
8726	} else if (reset_flags & BIT_ULL(__I40E_CORE_RESET_REQUESTED)) {
8727
8728		/* Request a Core Reset
8729		 *
8730		 * Same as Global Reset, except does *not* include the MAC/PHY
8731		 */
8732		dev_dbg(&pf->pdev->dev, "CoreR requested\n");
8733		val = rd32(&pf->hw, I40E_GLGEN_RTRIG);
8734		val |= I40E_GLGEN_RTRIG_CORER_MASK;
8735		wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
8736		i40e_flush(&pf->hw);
8737
8738	} else if (reset_flags & I40E_PF_RESET_FLAG) {
8739
8740		/* Request a PF Reset
8741		 *
8742		 * Resets only the PF-specific registers
8743		 *
8744		 * This goes directly to the tear-down and rebuild of
8745		 * the switch, since we need to do all the recovery as
8746		 * for the Core Reset.
8747		 */
8748		dev_dbg(&pf->pdev->dev, "PFR requested\n");
8749		i40e_handle_reset_warning(pf, lock_acquired);
8750
8751	} else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) {
8752		/* Request a PF Reset
8753		 *
8754		 * Resets PF and reinitializes PFs VSI.
8755		 */
8756		i40e_prep_for_reset(pf, lock_acquired);
8757		i40e_reset_and_rebuild(pf, true, lock_acquired);
8758		dev_info(&pf->pdev->dev,
8759			 pf->flags & I40E_FLAG_DISABLE_FW_LLDP ?
8760			 "FW LLDP is disabled\n" :
8761			 "FW LLDP is enabled\n");
8762
8763	} else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) {
8764		int v;
8765
8766		/* Find the VSI(s) that requested a re-init */
8767		dev_info(&pf->pdev->dev,
8768			 "VSI reinit requested\n");
8769		for (v = 0; v < pf->num_alloc_vsi; v++) {
8770			struct i40e_vsi *vsi = pf->vsi[v];
8771
8772			if (vsi != NULL &&
8773			    test_and_clear_bit(__I40E_VSI_REINIT_REQUESTED,
8774					       vsi->state))
8775				i40e_vsi_reinit_locked(pf->vsi[v]);
8776		}
8777	} else if (reset_flags & BIT_ULL(__I40E_DOWN_REQUESTED)) {
8778		int v;
8779
8780		/* Find the VSI(s) that needs to be brought down */
8781		dev_info(&pf->pdev->dev, "VSI down requested\n");
8782		for (v = 0; v < pf->num_alloc_vsi; v++) {
8783			struct i40e_vsi *vsi = pf->vsi[v];
8784
8785			if (vsi != NULL &&
8786			    test_and_clear_bit(__I40E_VSI_DOWN_REQUESTED,
8787					       vsi->state)) {
8788				set_bit(__I40E_VSI_DOWN, vsi->state);
8789				i40e_down(vsi);
8790			}
8791		}
8792	} else {
8793		dev_info(&pf->pdev->dev,
8794			 "bad reset request 0x%08x\n", reset_flags);
8795	}
8796}
8797
8798#ifdef CONFIG_I40E_DCB
8799/**
8800 * i40e_dcb_need_reconfig - Check if DCB needs reconfig
8801 * @pf: board private structure
8802 * @old_cfg: current DCB config
8803 * @new_cfg: new DCB config
8804 **/
8805bool i40e_dcb_need_reconfig(struct i40e_pf *pf,
8806			    struct i40e_dcbx_config *old_cfg,
8807			    struct i40e_dcbx_config *new_cfg)
8808{
8809	bool need_reconfig = false;
8810
8811	/* Check if ETS configuration has changed */
8812	if (memcmp(&new_cfg->etscfg,
8813		   &old_cfg->etscfg,
8814		   sizeof(new_cfg->etscfg))) {
8815		/* If Priority Table has changed reconfig is needed */
8816		if (memcmp(&new_cfg->etscfg.prioritytable,
8817			   &old_cfg->etscfg.prioritytable,
8818			   sizeof(new_cfg->etscfg.prioritytable))) {
8819			need_reconfig = true;
8820			dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n");
8821		}
8822
8823		if (memcmp(&new_cfg->etscfg.tcbwtable,
8824			   &old_cfg->etscfg.tcbwtable,
8825			   sizeof(new_cfg->etscfg.tcbwtable)))
8826			dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n");
8827
8828		if (memcmp(&new_cfg->etscfg.tsatable,
8829			   &old_cfg->etscfg.tsatable,
8830			   sizeof(new_cfg->etscfg.tsatable)))
8831			dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n");
8832	}
8833
8834	/* Check if PFC configuration has changed */
8835	if (memcmp(&new_cfg->pfc,
8836		   &old_cfg->pfc,
8837		   sizeof(new_cfg->pfc))) {
8838		need_reconfig = true;
8839		dev_dbg(&pf->pdev->dev, "PFC config change detected.\n");
8840	}
8841
8842	/* Check if APP Table has changed */
8843	if (memcmp(&new_cfg->app,
8844		   &old_cfg->app,
8845		   sizeof(new_cfg->app))) {
8846		need_reconfig = true;
8847		dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
8848	}
8849
8850	dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
8851	return need_reconfig;
8852}
8853
8854/**
8855 * i40e_handle_lldp_event - Handle LLDP Change MIB event
8856 * @pf: board private structure
8857 * @e: event info posted on ARQ
8858 **/
8859static int i40e_handle_lldp_event(struct i40e_pf *pf,
8860				  struct i40e_arq_event_info *e)
8861{
8862	struct i40e_aqc_lldp_get_mib *mib =
8863		(struct i40e_aqc_lldp_get_mib *)&e->desc.params.raw;
8864	struct i40e_hw *hw = &pf->hw;
8865	struct i40e_dcbx_config tmp_dcbx_cfg;
8866	bool need_reconfig = false;
8867	int ret = 0;
8868	u8 type;
8869
8870	/* Not DCB capable or capability disabled */
8871	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
8872		return ret;
8873
8874	/* Ignore if event is not for Nearest Bridge */
8875	type = ((mib->type >> I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT)
8876		& I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
8877	dev_dbg(&pf->pdev->dev, "LLDP event mib bridge type 0x%x\n", type);
8878	if (type != I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE)
8879		return ret;
8880
8881	/* Check MIB Type and return if event for Remote MIB update */
8882	type = mib->type & I40E_AQ_LLDP_MIB_TYPE_MASK;
8883	dev_dbg(&pf->pdev->dev,
8884		"LLDP event mib type %s\n", type ? "remote" : "local");
8885	if (type == I40E_AQ_LLDP_MIB_REMOTE) {
8886		/* Update the remote cached instance and return */
8887		ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_REMOTE,
8888				I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE,
8889				&hw->remote_dcbx_config);
8890		goto exit;
8891	}
8892
8893	/* Store the old configuration */
8894	tmp_dcbx_cfg = hw->local_dcbx_config;
8895
8896	/* Reset the old DCBx configuration data */
8897	memset(&hw->local_dcbx_config, 0, sizeof(hw->local_dcbx_config));
8898	/* Get updated DCBX data from firmware */
8899	ret = i40e_get_dcb_config(&pf->hw);
8900	if (ret) {
8901		dev_info(&pf->pdev->dev,
8902			 "Failed querying DCB configuration data from firmware, err %s aq_err %s\n",
8903			 i40e_stat_str(&pf->hw, ret),
8904			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
8905		goto exit;
8906	}
8907
8908	/* No change detected in DCBX configs */
8909	if (!memcmp(&tmp_dcbx_cfg, &hw->local_dcbx_config,
8910		    sizeof(tmp_dcbx_cfg))) {
8911		dev_dbg(&pf->pdev->dev, "No change detected in DCBX configuration.\n");
8912		goto exit;
8913	}
8914
8915	need_reconfig = i40e_dcb_need_reconfig(pf, &tmp_dcbx_cfg,
8916					       &hw->local_dcbx_config);
8917
8918	i40e_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &hw->local_dcbx_config);
8919
8920	if (!need_reconfig)
8921		goto exit;
8922
8923	/* Enable DCB tagging only when more than one TC */
8924	if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
8925		pf->flags |= I40E_FLAG_DCB_ENABLED;
8926	else
8927		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
8928
8929	set_bit(__I40E_PORT_SUSPENDED, pf->state);
8930	/* Reconfiguration needed quiesce all VSIs */
8931	i40e_pf_quiesce_all_vsi(pf);
8932
8933	/* Changes in configuration update VEB/VSI */
8934	i40e_dcb_reconfigure(pf);
8935
8936	ret = i40e_resume_port_tx(pf);
8937
8938	clear_bit(__I40E_PORT_SUSPENDED, pf->state);
8939	/* In case of error no point in resuming VSIs */
8940	if (ret)
8941		goto exit;
8942
8943	/* Wait for the PF's queues to be disabled */
8944	ret = i40e_pf_wait_queues_disabled(pf);
8945	if (ret) {
8946		/* Schedule PF reset to recover */
8947		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
8948		i40e_service_event_schedule(pf);
8949	} else {
8950		i40e_pf_unquiesce_all_vsi(pf);
8951		set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
8952		set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
8953	}
8954
8955exit:
8956	return ret;
8957}
8958#endif /* CONFIG_I40E_DCB */
8959
8960/**
8961 * i40e_do_reset_safe - Protected reset path for userland calls.
8962 * @pf: board private structure
8963 * @reset_flags: which reset is requested
8964 *
8965 **/
8966void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags)
8967{
8968	rtnl_lock();
8969	i40e_do_reset(pf, reset_flags, true);
8970	rtnl_unlock();
8971}
8972
8973/**
8974 * i40e_handle_lan_overflow_event - Handler for LAN queue overflow event
8975 * @pf: board private structure
8976 * @e: event info posted on ARQ
8977 *
8978 * Handler for LAN Queue Overflow Event generated by the firmware for PF
8979 * and VF queues
8980 **/
8981static void i40e_handle_lan_overflow_event(struct i40e_pf *pf,
8982					   struct i40e_arq_event_info *e)
8983{
8984	struct i40e_aqc_lan_overflow *data =
8985		(struct i40e_aqc_lan_overflow *)&e->desc.params.raw;
8986	u32 queue = le32_to_cpu(data->prtdcb_rupto);
8987	u32 qtx_ctl = le32_to_cpu(data->otx_ctl);
8988	struct i40e_hw *hw = &pf->hw;
8989	struct i40e_vf *vf;
8990	u16 vf_id;
8991
8992	dev_dbg(&pf->pdev->dev, "overflow Rx Queue Number = %d QTX_CTL=0x%08x\n",
8993		queue, qtx_ctl);
8994
8995	/* Queue belongs to VF, find the VF and issue VF reset */
8996	if (((qtx_ctl & I40E_QTX_CTL_PFVF_Q_MASK)
8997	    >> I40E_QTX_CTL_PFVF_Q_SHIFT) == I40E_QTX_CTL_VF_QUEUE) {
8998		vf_id = (u16)((qtx_ctl & I40E_QTX_CTL_VFVM_INDX_MASK)
8999			 >> I40E_QTX_CTL_VFVM_INDX_SHIFT);
9000		vf_id -= hw->func_caps.vf_base_id;
9001		vf = &pf->vf[vf_id];
9002		i40e_vc_notify_vf_reset(vf);
9003		/* Allow VF to process pending reset notification */
9004		msleep(20);
9005		i40e_reset_vf(vf, false);
9006	}
9007}
9008
9009/**
9010 * i40e_get_cur_guaranteed_fd_count - Get the consumed guaranteed FD filters
9011 * @pf: board private structure
9012 **/
9013u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf)
9014{
9015	u32 val, fcnt_prog;
9016
9017	val = rd32(&pf->hw, I40E_PFQF_FDSTAT);
9018	fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK);
9019	return fcnt_prog;
9020}
9021
9022/**
9023 * i40e_get_current_fd_count - Get total FD filters programmed for this PF
9024 * @pf: board private structure
9025 **/
9026u32 i40e_get_current_fd_count(struct i40e_pf *pf)
9027{
9028	u32 val, fcnt_prog;
9029
9030	val = rd32(&pf->hw, I40E_PFQF_FDSTAT);
9031	fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK) +
9032		    ((val & I40E_PFQF_FDSTAT_BEST_CNT_MASK) >>
9033		      I40E_PFQF_FDSTAT_BEST_CNT_SHIFT);
9034	return fcnt_prog;
9035}
9036
9037/**
9038 * i40e_get_global_fd_count - Get total FD filters programmed on device
9039 * @pf: board private structure
9040 **/
9041u32 i40e_get_global_fd_count(struct i40e_pf *pf)
9042{
9043	u32 val, fcnt_prog;
9044
9045	val = rd32(&pf->hw, I40E_GLQF_FDCNT_0);
9046	fcnt_prog = (val & I40E_GLQF_FDCNT_0_GUARANT_CNT_MASK) +
9047		    ((val & I40E_GLQF_FDCNT_0_BESTCNT_MASK) >>
9048		     I40E_GLQF_FDCNT_0_BESTCNT_SHIFT);
9049	return fcnt_prog;
9050}
9051
9052/**
9053 * i40e_reenable_fdir_sb - Restore FDir SB capability
9054 * @pf: board private structure
9055 **/
9056static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
9057{
9058	if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
9059		if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
9060		    (I40E_DEBUG_FD & pf->hw.debug_mask))
9061			dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
9062}
9063
9064/**
9065 * i40e_reenable_fdir_atr - Restore FDir ATR capability
9066 * @pf: board private structure
9067 **/
9068static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
9069{
9070	if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) {
9071		/* ATR uses the same filtering logic as SB rules. It only
9072		 * functions properly if the input set mask is at the default
9073		 * settings. It is safe to restore the default input set
9074		 * because there are no active TCPv4 filter rules.
9075		 */
9076		i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
9077					I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
9078					I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
9079
9080		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
9081		    (I40E_DEBUG_FD & pf->hw.debug_mask))
9082			dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
9083	}
9084}
9085
9086/**
9087 * i40e_delete_invalid_filter - Delete an invalid FDIR filter
9088 * @pf: board private structure
9089 * @filter: FDir filter to remove
9090 */
9091static void i40e_delete_invalid_filter(struct i40e_pf *pf,
9092				       struct i40e_fdir_filter *filter)
9093{
9094	/* Update counters */
9095	pf->fdir_pf_active_filters--;
9096	pf->fd_inv = 0;
9097
9098	switch (filter->flow_type) {
9099	case TCP_V4_FLOW:
9100		pf->fd_tcp4_filter_cnt--;
9101		break;
9102	case UDP_V4_FLOW:
9103		pf->fd_udp4_filter_cnt--;
9104		break;
9105	case SCTP_V4_FLOW:
9106		pf->fd_sctp4_filter_cnt--;
9107		break;
9108	case IP_USER_FLOW:
9109		switch (filter->ip4_proto) {
9110		case IPPROTO_TCP:
9111			pf->fd_tcp4_filter_cnt--;
9112			break;
9113		case IPPROTO_UDP:
9114			pf->fd_udp4_filter_cnt--;
9115			break;
9116		case IPPROTO_SCTP:
9117			pf->fd_sctp4_filter_cnt--;
9118			break;
9119		case IPPROTO_IP:
9120			pf->fd_ip4_filter_cnt--;
9121			break;
9122		}
9123		break;
9124	}
9125
9126	/* Remove the filter from the list and free memory */
9127	hlist_del(&filter->fdir_node);
9128	kfree(filter);
9129}
9130
9131/**
9132 * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled
9133 * @pf: board private structure
9134 **/
9135void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
9136{
9137	struct i40e_fdir_filter *filter;
9138	u32 fcnt_prog, fcnt_avail;
9139	struct hlist_node *node;
9140
9141	if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
9142		return;
9143
9144	/* Check if we have enough room to re-enable FDir SB capability. */
9145	fcnt_prog = i40e_get_global_fd_count(pf);
9146	fcnt_avail = pf->fdir_pf_filter_count;
9147	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) ||
9148	    (pf->fd_add_err == 0) ||
9149	    (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt))
9150		i40e_reenable_fdir_sb(pf);
9151
9152	/* We should wait for even more space before re-enabling ATR.
9153	 * Additionally, we cannot enable ATR as long as we still have TCP SB
9154	 * rules active.
9155	 */
9156	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) &&
9157	    (pf->fd_tcp4_filter_cnt == 0))
9158		i40e_reenable_fdir_atr(pf);
9159
9160	/* if hw had a problem adding a filter, delete it */
9161	if (pf->fd_inv > 0) {
9162		hlist_for_each_entry_safe(filter, node,
9163					  &pf->fdir_filter_list, fdir_node)
9164			if (filter->fd_id == pf->fd_inv)
9165				i40e_delete_invalid_filter(pf, filter);
9166	}
9167}
9168
9169#define I40E_MIN_FD_FLUSH_INTERVAL 10
9170#define I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE 30
9171/**
9172 * i40e_fdir_flush_and_replay - Function to flush all FD filters and replay SB
9173 * @pf: board private structure
9174 **/
9175static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
9176{
9177	unsigned long min_flush_time;
9178	int flush_wait_retry = 50;
9179	bool disable_atr = false;
9180	int fd_room;
9181	int reg;
9182
9183	if (!time_after(jiffies, pf->fd_flush_timestamp +
9184				 (I40E_MIN_FD_FLUSH_INTERVAL * HZ)))
9185		return;
9186
9187	/* If the flush is happening too quick and we have mostly SB rules we
9188	 * should not re-enable ATR for some time.
9189	 */
9190	min_flush_time = pf->fd_flush_timestamp +
9191			 (I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE * HZ);
9192	fd_room = pf->fdir_pf_filter_count - pf->fdir_pf_active_filters;
9193
9194	if (!(time_after(jiffies, min_flush_time)) &&
9195	    (fd_room < I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) {
9196		if (I40E_DEBUG_FD & pf->hw.debug_mask)
9197			dev_info(&pf->pdev->dev, "ATR disabled, not enough FD filter space.\n");
9198		disable_atr = true;
9199	}
9200
9201	pf->fd_flush_timestamp = jiffies;
9202	set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
9203	/* flush all filters */
9204	wr32(&pf->hw, I40E_PFQF_CTL_1,
9205	     I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
9206	i40e_flush(&pf->hw);
9207	pf->fd_flush_cnt++;
9208	pf->fd_add_err = 0;
9209	do {
9210		/* Check FD flush status every 5-6msec */
9211		usleep_range(5000, 6000);
9212		reg = rd32(&pf->hw, I40E_PFQF_CTL_1);
9213		if (!(reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK))
9214			break;
9215	} while (flush_wait_retry--);
9216	if (reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK) {
9217		dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n");
9218	} else {
9219		/* replay sideband filters */
9220		i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
9221		if (!disable_atr && !pf->fd_tcp4_filter_cnt)
9222			clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
9223		clear_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
9224		if (I40E_DEBUG_FD & pf->hw.debug_mask)
9225			dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
9226	}
9227}
9228
9229/**
9230 * i40e_get_current_atr_count - Get the count of total FD ATR filters programmed
9231 * @pf: board private structure
9232 **/
9233u32 i40e_get_current_atr_cnt(struct i40e_pf *pf)
9234{
9235	return i40e_get_current_fd_count(pf) - pf->fdir_pf_active_filters;
9236}
9237
9238/**
9239 * i40e_fdir_reinit_subtask - Worker thread to reinit FDIR filter table
9240 * @pf: board private structure
9241 **/
9242static void i40e_fdir_reinit_subtask(struct i40e_pf *pf)
9243{
9244
9245	/* if interface is down do nothing */
9246	if (test_bit(__I40E_DOWN, pf->state))
9247		return;
9248
9249	if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
9250		i40e_fdir_flush_and_replay(pf);
9251
9252	i40e_fdir_check_and_reenable(pf);
9253
9254}
9255
9256/**
9257 * i40e_vsi_link_event - notify VSI of a link event
9258 * @vsi: vsi to be notified
9259 * @link_up: link up or down
9260 **/
9261static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up)
9262{
9263	if (!vsi || test_bit(__I40E_VSI_DOWN, vsi->state))
9264		return;
9265
9266	switch (vsi->type) {
9267	case I40E_VSI_MAIN:
9268		if (!vsi->netdev || !vsi->netdev_registered)
9269			break;
9270
9271		if (link_up) {
9272			netif_carrier_on(vsi->netdev);
9273			netif_tx_wake_all_queues(vsi->netdev);
9274		} else {
9275			netif_carrier_off(vsi->netdev);
9276			netif_tx_stop_all_queues(vsi->netdev);
9277		}
9278		break;
9279
9280	case I40E_VSI_SRIOV:
9281	case I40E_VSI_VMDQ2:
9282	case I40E_VSI_CTRL:
9283	case I40E_VSI_IWARP:
9284	case I40E_VSI_MIRROR:
9285	default:
9286		/* there is no notification for other VSIs */
9287		break;
9288	}
9289}
9290
9291/**
9292 * i40e_veb_link_event - notify elements on the veb of a link event
9293 * @veb: veb to be notified
9294 * @link_up: link up or down
9295 **/
9296static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up)
9297{
9298	struct i40e_pf *pf;
9299	int i;
9300
9301	if (!veb || !veb->pf)
9302		return;
9303	pf = veb->pf;
9304
9305	/* depth first... */
9306	for (i = 0; i < I40E_MAX_VEB; i++)
9307		if (pf->veb[i] && (pf->veb[i]->uplink_seid == veb->seid))
9308			i40e_veb_link_event(pf->veb[i], link_up);
9309
9310	/* ... now the local VSIs */
9311	for (i = 0; i < pf->num_alloc_vsi; i++)
9312		if (pf->vsi[i] && (pf->vsi[i]->uplink_seid == veb->seid))
9313			i40e_vsi_link_event(pf->vsi[i], link_up);
9314}
9315
9316/**
9317 * i40e_link_event - Update netif_carrier status
9318 * @pf: board private structure
9319 **/
9320static void i40e_link_event(struct i40e_pf *pf)
9321{
9322	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
9323	u8 new_link_speed, old_link_speed;
9324	i40e_status status;
9325	bool new_link, old_link;
9326
9327	/* set this to force the get_link_status call to refresh state */
9328	pf->hw.phy.get_link_info = true;
9329	old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP);
9330	status = i40e_get_link_status(&pf->hw, &new_link);
9331
9332	/* On success, disable temp link polling */
9333	if (status == I40E_SUCCESS) {
9334		clear_bit(__I40E_TEMP_LINK_POLLING, pf->state);
9335	} else {
9336		/* Enable link polling temporarily until i40e_get_link_status
9337		 * returns I40E_SUCCESS
9338		 */
9339		set_bit(__I40E_TEMP_LINK_POLLING, pf->state);
9340		dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n",
9341			status);
9342		return;
9343	}
9344
9345	old_link_speed = pf->hw.phy.link_info_old.link_speed;
9346	new_link_speed = pf->hw.phy.link_info.link_speed;
9347
9348	if (new_link == old_link &&
9349	    new_link_speed == old_link_speed &&
9350	    (test_bit(__I40E_VSI_DOWN, vsi->state) ||
9351	     new_link == netif_carrier_ok(vsi->netdev)))
9352		return;
9353
9354	i40e_print_link_message(vsi, new_link);
9355
9356	/* Notify the base of the switch tree connected to
9357	 * the link.  Floating VEBs are not notified.
9358	 */
9359	if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb])
9360		i40e_veb_link_event(pf->veb[pf->lan_veb], new_link);
9361	else
9362		i40e_vsi_link_event(vsi, new_link);
9363
9364	if (pf->vf)
9365		i40e_vc_notify_link_state(pf);
9366
9367	if (pf->flags & I40E_FLAG_PTP)
9368		i40e_ptp_set_increment(pf);
9369}
9370
9371/**
9372 * i40e_watchdog_subtask - periodic checks not using event driven response
9373 * @pf: board private structure
9374 **/
9375static void i40e_watchdog_subtask(struct i40e_pf *pf)
9376{
9377	int i;
9378
9379	/* if interface is down do nothing */
9380	if (test_bit(__I40E_DOWN, pf->state) ||
9381	    test_bit(__I40E_CONFIG_BUSY, pf->state))
9382		return;
9383
9384	/* make sure we don't do these things too often */
9385	if (time_before(jiffies, (pf->service_timer_previous +
9386				  pf->service_timer_period)))
9387		return;
9388	pf->service_timer_previous = jiffies;
9389
9390	if ((pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) ||
9391	    test_bit(__I40E_TEMP_LINK_POLLING, pf->state))
9392		i40e_link_event(pf);
9393
9394	/* Update the stats for active netdevs so the network stack
9395	 * can look at updated numbers whenever it cares to
9396	 */
9397	for (i = 0; i < pf->num_alloc_vsi; i++)
9398		if (pf->vsi[i] && pf->vsi[i]->netdev)
9399			i40e_update_stats(pf->vsi[i]);
9400
9401	if (pf->flags & I40E_FLAG_VEB_STATS_ENABLED) {
9402		/* Update the stats for the active switching components */
9403		for (i = 0; i < I40E_MAX_VEB; i++)
9404			if (pf->veb[i])
9405				i40e_update_veb_stats(pf->veb[i]);
9406	}
9407
9408	i40e_ptp_rx_hang(pf);
9409	i40e_ptp_tx_hang(pf);
9410}
9411
9412/**
9413 * i40e_reset_subtask - Set up for resetting the device and driver
9414 * @pf: board private structure
9415 **/
9416static void i40e_reset_subtask(struct i40e_pf *pf)
9417{
9418	u32 reset_flags = 0;
9419
9420	if (test_bit(__I40E_REINIT_REQUESTED, pf->state)) {
9421		reset_flags |= BIT(__I40E_REINIT_REQUESTED);
9422		clear_bit(__I40E_REINIT_REQUESTED, pf->state);
9423	}
9424	if (test_bit(__I40E_PF_RESET_REQUESTED, pf->state)) {
9425		reset_flags |= BIT(__I40E_PF_RESET_REQUESTED);
9426		clear_bit(__I40E_PF_RESET_REQUESTED, pf->state);
9427	}
9428	if (test_bit(__I40E_CORE_RESET_REQUESTED, pf->state)) {
9429		reset_flags |= BIT(__I40E_CORE_RESET_REQUESTED);
9430		clear_bit(__I40E_CORE_RESET_REQUESTED, pf->state);
9431	}
9432	if (test_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state)) {
9433		reset_flags |= BIT(__I40E_GLOBAL_RESET_REQUESTED);
9434		clear_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state);
9435	}
9436	if (test_bit(__I40E_DOWN_REQUESTED, pf->state)) {
9437		reset_flags |= BIT(__I40E_DOWN_REQUESTED);
9438		clear_bit(__I40E_DOWN_REQUESTED, pf->state);
9439	}
9440
9441	/* If there's a recovery already waiting, it takes
9442	 * precedence before starting a new reset sequence.
9443	 */
9444	if (test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) {
9445		i40e_prep_for_reset(pf, false);
9446		i40e_reset(pf);
9447		i40e_rebuild(pf, false, false);
9448	}
9449
9450	/* If we're already down or resetting, just bail */
9451	if (reset_flags &&
9452	    !test_bit(__I40E_DOWN, pf->state) &&
9453	    !test_bit(__I40E_CONFIG_BUSY, pf->state)) {
9454		i40e_do_reset(pf, reset_flags, false);
9455	}
9456}
9457
9458/**
9459 * i40e_handle_link_event - Handle link event
9460 * @pf: board private structure
9461 * @e: event info posted on ARQ
9462 **/
9463static void i40e_handle_link_event(struct i40e_pf *pf,
9464				   struct i40e_arq_event_info *e)
9465{
9466	struct i40e_aqc_get_link_status *status =
9467		(struct i40e_aqc_get_link_status *)&e->desc.params.raw;
9468
9469	/* Do a new status request to re-enable LSE reporting
9470	 * and load new status information into the hw struct
9471	 * This completely ignores any state information
9472	 * in the ARQ event info, instead choosing to always
9473	 * issue the AQ update link status command.
9474	 */
9475	i40e_link_event(pf);
9476
9477	/* Check if module meets thermal requirements */
9478	if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) {
9479		dev_err(&pf->pdev->dev,
9480			"Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n");
9481		dev_err(&pf->pdev->dev,
9482			"Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
9483	} else {
9484		/* check for unqualified module, if link is down, suppress
9485		 * the message if link was forced to be down.
9486		 */
9487		if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
9488		    (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
9489		    (!(status->link_info & I40E_AQ_LINK_UP)) &&
9490		    (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) {
9491			dev_err(&pf->pdev->dev,
9492				"Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
9493			dev_err(&pf->pdev->dev,
9494				"Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
9495		}
9496	}
9497}
9498
9499/**
9500 * i40e_clean_adminq_subtask - Clean the AdminQ rings
9501 * @pf: board private structure
9502 **/
9503static void i40e_clean_adminq_subtask(struct i40e_pf *pf)
9504{
9505	struct i40e_arq_event_info event;
9506	struct i40e_hw *hw = &pf->hw;
9507	u16 pending, i = 0;
9508	i40e_status ret;
9509	u16 opcode;
9510	u32 oldval;
9511	u32 val;
9512
9513	/* Do not run clean AQ when PF reset fails */
9514	if (test_bit(__I40E_RESET_FAILED, pf->state))
9515		return;
9516
9517	/* check for error indications */
9518	val = rd32(&pf->hw, pf->hw.aq.arq.len);
9519	oldval = val;
9520	if (val & I40E_PF_ARQLEN_ARQVFE_MASK) {
9521		if (hw->debug_mask & I40E_DEBUG_AQ)
9522			dev_info(&pf->pdev->dev, "ARQ VF Error detected\n");
9523		val &= ~I40E_PF_ARQLEN_ARQVFE_MASK;
9524	}
9525	if (val & I40E_PF_ARQLEN_ARQOVFL_MASK) {
9526		if (hw->debug_mask & I40E_DEBUG_AQ)
9527			dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n");
9528		val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK;
9529		pf->arq_overflows++;
9530	}
9531	if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) {
9532		if (hw->debug_mask & I40E_DEBUG_AQ)
9533			dev_info(&pf->pdev->dev, "ARQ Critical Error detected\n");
9534		val &= ~I40E_PF_ARQLEN_ARQCRIT_MASK;
9535	}
9536	if (oldval != val)
9537		wr32(&pf->hw, pf->hw.aq.arq.len, val);
9538
9539	val = rd32(&pf->hw, pf->hw.aq.asq.len);
9540	oldval = val;
9541	if (val & I40E_PF_ATQLEN_ATQVFE_MASK) {
9542		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
9543			dev_info(&pf->pdev->dev, "ASQ VF Error detected\n");
9544		val &= ~I40E_PF_ATQLEN_ATQVFE_MASK;
9545	}
9546	if (val & I40E_PF_ATQLEN_ATQOVFL_MASK) {
9547		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
9548			dev_info(&pf->pdev->dev, "ASQ Overflow Error detected\n");
9549		val &= ~I40E_PF_ATQLEN_ATQOVFL_MASK;
9550	}
9551	if (val & I40E_PF_ATQLEN_ATQCRIT_MASK) {
9552		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
9553			dev_info(&pf->pdev->dev, "ASQ Critical Error detected\n");
9554		val &= ~I40E_PF_ATQLEN_ATQCRIT_MASK;
9555	}
9556	if (oldval != val)
9557		wr32(&pf->hw, pf->hw.aq.asq.len, val);
9558
9559	event.buf_len = I40E_MAX_AQ_BUF_SIZE;
9560	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
9561	if (!event.msg_buf)
9562		return;
9563
9564	do {
9565		ret = i40e_clean_arq_element(hw, &event, &pending);
9566		if (ret == I40E_ERR_ADMIN_QUEUE_NO_WORK)
9567			break;
9568		else if (ret) {
9569			dev_info(&pf->pdev->dev, "ARQ event error %d\n", ret);
9570			break;
9571		}
9572
9573		opcode = le16_to_cpu(event.desc.opcode);
9574		switch (opcode) {
9575
9576		case i40e_aqc_opc_get_link_status:
9577			i40e_handle_link_event(pf, &event);
9578			break;
9579		case i40e_aqc_opc_send_msg_to_pf:
9580			ret = i40e_vc_process_vf_msg(pf,
9581					le16_to_cpu(event.desc.retval),
9582					le32_to_cpu(event.desc.cookie_high),
9583					le32_to_cpu(event.desc.cookie_low),
9584					event.msg_buf,
9585					event.msg_len);
9586			break;
9587		case i40e_aqc_opc_lldp_update_mib:
9588			dev_dbg(&pf->pdev->dev, "ARQ: Update LLDP MIB event received\n");
9589#ifdef CONFIG_I40E_DCB
9590			rtnl_lock();
9591			ret = i40e_handle_lldp_event(pf, &event);
9592			rtnl_unlock();
9593#endif /* CONFIG_I40E_DCB */
9594			break;
9595		case i40e_aqc_opc_event_lan_overflow:
9596			dev_dbg(&pf->pdev->dev, "ARQ LAN queue overflow event received\n");
9597			i40e_handle_lan_overflow_event(pf, &event);
9598			break;
9599		case i40e_aqc_opc_send_msg_to_peer:
9600			dev_info(&pf->pdev->dev, "ARQ: Msg from other pf\n");
9601			break;
9602		case i40e_aqc_opc_nvm_erase:
9603		case i40e_aqc_opc_nvm_update:
9604		case i40e_aqc_opc_oem_post_update:
9605			i40e_debug(&pf->hw, I40E_DEBUG_NVM,
9606				   "ARQ NVM operation 0x%04x completed\n",
9607				   opcode);
9608			break;
9609		default:
9610			dev_info(&pf->pdev->dev,
9611				 "ARQ: Unknown event 0x%04x ignored\n",
9612				 opcode);
9613			break;
9614		}
9615	} while (i++ < pf->adminq_work_limit);
9616
9617	if (i < pf->adminq_work_limit)
9618		clear_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state);
9619
9620	/* re-enable Admin queue interrupt cause */
9621	val = rd32(hw, I40E_PFINT_ICR0_ENA);
9622	val |=  I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
9623	wr32(hw, I40E_PFINT_ICR0_ENA, val);
9624	i40e_flush(hw);
9625
9626	kfree(event.msg_buf);
9627}
9628
9629/**
9630 * i40e_verify_eeprom - make sure eeprom is good to use
9631 * @pf: board private structure
9632 **/
9633static void i40e_verify_eeprom(struct i40e_pf *pf)
9634{
9635	int err;
9636
9637	err = i40e_diag_eeprom_test(&pf->hw);
9638	if (err) {
9639		/* retry in case of garbage read */
9640		err = i40e_diag_eeprom_test(&pf->hw);
9641		if (err) {
9642			dev_info(&pf->pdev->dev, "eeprom check failed (%d), Tx/Rx traffic disabled\n",
9643				 err);
9644			set_bit(__I40E_BAD_EEPROM, pf->state);
9645		}
9646	}
9647
9648	if (!err && test_bit(__I40E_BAD_EEPROM, pf->state)) {
9649		dev_info(&pf->pdev->dev, "eeprom check passed, Tx/Rx traffic enabled\n");
9650		clear_bit(__I40E_BAD_EEPROM, pf->state);
9651	}
9652}
9653
9654/**
9655 * i40e_enable_pf_switch_lb
9656 * @pf: pointer to the PF structure
9657 *
9658 * enable switch loop back or die - no point in a return value
9659 **/
9660static void i40e_enable_pf_switch_lb(struct i40e_pf *pf)
9661{
9662	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
9663	struct i40e_vsi_context ctxt;
9664	int ret;
9665
9666	ctxt.seid = pf->main_vsi_seid;
9667	ctxt.pf_num = pf->hw.pf_id;
9668	ctxt.vf_num = 0;
9669	ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
9670	if (ret) {
9671		dev_info(&pf->pdev->dev,
9672			 "couldn't get PF vsi config, err %s aq_err %s\n",
9673			 i40e_stat_str(&pf->hw, ret),
9674			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
9675		return;
9676	}
9677	ctxt.flags = I40E_AQ_VSI_TYPE_PF;
9678	ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
9679	ctxt.info.switch_id |= cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
9680
9681	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
9682	if (ret) {
9683		dev_info(&pf->pdev->dev,
9684			 "update vsi switch failed, err %s aq_err %s\n",
9685			 i40e_stat_str(&pf->hw, ret),
9686			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
9687	}
9688}
9689
9690/**
9691 * i40e_disable_pf_switch_lb
9692 * @pf: pointer to the PF structure
9693 *
9694 * disable switch loop back or die - no point in a return value
9695 **/
9696static void i40e_disable_pf_switch_lb(struct i40e_pf *pf)
9697{
9698	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
9699	struct i40e_vsi_context ctxt;
9700	int ret;
9701
9702	ctxt.seid = pf->main_vsi_seid;
9703	ctxt.pf_num = pf->hw.pf_id;
9704	ctxt.vf_num = 0;
9705	ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
9706	if (ret) {
9707		dev_info(&pf->pdev->dev,
9708			 "couldn't get PF vsi config, err %s aq_err %s\n",
9709			 i40e_stat_str(&pf->hw, ret),
9710			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
9711		return;
9712	}
9713	ctxt.flags = I40E_AQ_VSI_TYPE_PF;
9714	ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
9715	ctxt.info.switch_id &= ~cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
9716
9717	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
9718	if (ret) {
9719		dev_info(&pf->pdev->dev,
9720			 "update vsi switch failed, err %s aq_err %s\n",
9721			 i40e_stat_str(&pf->hw, ret),
9722			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
9723	}
9724}
9725
9726/**
9727 * i40e_config_bridge_mode - Configure the HW bridge mode
9728 * @veb: pointer to the bridge instance
9729 *
9730 * Configure the loop back mode for the LAN VSI that is downlink to the
9731 * specified HW bridge instance. It is expected this function is called
9732 * when a new HW bridge is instantiated.
9733 **/
9734static void i40e_config_bridge_mode(struct i40e_veb *veb)
9735{
9736	struct i40e_pf *pf = veb->pf;
9737
9738	if (pf->hw.debug_mask & I40E_DEBUG_LAN)
9739		dev_info(&pf->pdev->dev, "enabling bridge mode: %s\n",
9740			 veb->bridge_mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
9741	if (veb->bridge_mode & BRIDGE_MODE_VEPA)
9742		i40e_disable_pf_switch_lb(pf);
9743	else
9744		i40e_enable_pf_switch_lb(pf);
9745}
9746
9747/**
9748 * i40e_reconstitute_veb - rebuild the VEB and anything connected to it
9749 * @veb: pointer to the VEB instance
9750 *
9751 * This is a recursive function that first builds the attached VSIs then
9752 * recurses in to build the next layer of VEB.  We track the connections
9753 * through our own index numbers because the seid's from the HW could
9754 * change across the reset.
9755 **/
9756static int i40e_reconstitute_veb(struct i40e_veb *veb)
9757{
9758	struct i40e_vsi *ctl_vsi = NULL;
9759	struct i40e_pf *pf = veb->pf;
9760	int v, veb_idx;
9761	int ret;
9762
9763	/* build VSI that owns this VEB, temporarily attached to base VEB */
9764	for (v = 0; v < pf->num_alloc_vsi && !ctl_vsi; v++) {
9765		if (pf->vsi[v] &&
9766		    pf->vsi[v]->veb_idx == veb->idx &&
9767		    pf->vsi[v]->flags & I40E_VSI_FLAG_VEB_OWNER) {
9768			ctl_vsi = pf->vsi[v];
9769			break;
9770		}
9771	}
9772	if (!ctl_vsi) {
9773		dev_info(&pf->pdev->dev,
9774			 "missing owner VSI for veb_idx %d\n", veb->idx);
9775		ret = -ENOENT;
9776		goto end_reconstitute;
9777	}
9778	if (ctl_vsi != pf->vsi[pf->lan_vsi])
9779		ctl_vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid;
9780	ret = i40e_add_vsi(ctl_vsi);
9781	if (ret) {
9782		dev_info(&pf->pdev->dev,
9783			 "rebuild of veb_idx %d owner VSI failed: %d\n",
9784			 veb->idx, ret);
9785		goto end_reconstitute;
9786	}
9787	i40e_vsi_reset_stats(ctl_vsi);
9788
9789	/* create the VEB in the switch and move the VSI onto the VEB */
9790	ret = i40e_add_veb(veb, ctl_vsi);
9791	if (ret)
9792		goto end_reconstitute;
9793
9794	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
9795		veb->bridge_mode = BRIDGE_MODE_VEB;
9796	else
9797		veb->bridge_mode = BRIDGE_MODE_VEPA;
9798	i40e_config_bridge_mode(veb);
9799
9800	/* create the remaining VSIs attached to this VEB */
9801	for (v = 0; v < pf->num_alloc_vsi; v++) {
9802		if (!pf->vsi[v] || pf->vsi[v] == ctl_vsi)
9803			continue;
9804
9805		if (pf->vsi[v]->veb_idx == veb->idx) {
9806			struct i40e_vsi *vsi = pf->vsi[v];
9807
9808			vsi->uplink_seid = veb->seid;
9809			ret = i40e_add_vsi(vsi);
9810			if (ret) {
9811				dev_info(&pf->pdev->dev,
9812					 "rebuild of vsi_idx %d failed: %d\n",
9813					 v, ret);
9814				goto end_reconstitute;
9815			}
9816			i40e_vsi_reset_stats(vsi);
9817		}
9818	}
9819
9820	/* create any VEBs attached to this VEB - RECURSION */
9821	for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) {
9822		if (pf->veb[veb_idx] && pf->veb[veb_idx]->veb_idx == veb->idx) {
9823			pf->veb[veb_idx]->uplink_seid = veb->seid;
9824			ret = i40e_reconstitute_veb(pf->veb[veb_idx]);
9825			if (ret)
9826				break;
9827		}
9828	}
9829
9830end_reconstitute:
9831	return ret;
9832}
9833
9834/**
9835 * i40e_get_capabilities - get info about the HW
9836 * @pf: the PF struct
9837 * @list_type: AQ capability to be queried
9838 **/
9839static int i40e_get_capabilities(struct i40e_pf *pf,
9840				 enum i40e_admin_queue_opc list_type)
9841{
9842	struct i40e_aqc_list_capabilities_element_resp *cap_buf;
9843	u16 data_size;
9844	int buf_len;
9845	int err;
9846
9847	buf_len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp);
9848	do {
9849		cap_buf = kzalloc(buf_len, GFP_KERNEL);
9850		if (!cap_buf)
9851			return -ENOMEM;
9852
9853		/* this loads the data into the hw struct for us */
9854		err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len,
9855						    &data_size, list_type,
9856						    NULL);
9857		/* data loaded, buffer no longer needed */
9858		kfree(cap_buf);
9859
9860		if (pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) {
9861			/* retry with a larger buffer */
9862			buf_len = data_size;
9863		} else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK || err) {
9864			dev_info(&pf->pdev->dev,
9865				 "capability discovery failed, err %s aq_err %s\n",
9866				 i40e_stat_str(&pf->hw, err),
9867				 i40e_aq_str(&pf->hw,
9868					     pf->hw.aq.asq_last_status));
9869			return -ENODEV;
9870		}
9871	} while (err);
9872
9873	if (pf->hw.debug_mask & I40E_DEBUG_USER) {
9874		if (list_type == i40e_aqc_opc_list_func_capabilities) {
9875			dev_info(&pf->pdev->dev,
9876				 "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
9877				 pf->hw.pf_id, pf->hw.func_caps.num_vfs,
9878				 pf->hw.func_caps.num_msix_vectors,
9879				 pf->hw.func_caps.num_msix_vectors_vf,
9880				 pf->hw.func_caps.fd_filters_guaranteed,
9881				 pf->hw.func_caps.fd_filters_best_effort,
9882				 pf->hw.func_caps.num_tx_qp,
9883				 pf->hw.func_caps.num_vsis);
9884		} else if (list_type == i40e_aqc_opc_list_dev_capabilities) {
9885			dev_info(&pf->pdev->dev,
9886				 "switch_mode=0x%04x, function_valid=0x%08x\n",
9887				 pf->hw.dev_caps.switch_mode,
9888				 pf->hw.dev_caps.valid_functions);
9889			dev_info(&pf->pdev->dev,
9890				 "SR-IOV=%d, num_vfs for all function=%u\n",
9891				 pf->hw.dev_caps.sr_iov_1_1,
9892				 pf->hw.dev_caps.num_vfs);
9893			dev_info(&pf->pdev->dev,
9894				 "num_vsis=%u, num_rx:%u, num_tx=%u\n",
9895				 pf->hw.dev_caps.num_vsis,
9896				 pf->hw.dev_caps.num_rx_qp,
9897				 pf->hw.dev_caps.num_tx_qp);
9898		}
9899	}
9900	if (list_type == i40e_aqc_opc_list_func_capabilities) {
9901#define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \
9902		       + pf->hw.func_caps.num_vfs)
9903		if (pf->hw.revision_id == 0 &&
9904		    pf->hw.func_caps.num_vsis < DEF_NUM_VSI) {
9905			dev_info(&pf->pdev->dev,
9906				 "got num_vsis %d, setting num_vsis to %d\n",
9907				 pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
9908			pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
9909		}
9910	}
9911	return 0;
9912}
9913
9914static int i40e_vsi_clear(struct i40e_vsi *vsi);
9915
9916/**
9917 * i40e_fdir_sb_setup - initialize the Flow Director resources for Sideband
9918 * @pf: board private structure
9919 **/
9920static void i40e_fdir_sb_setup(struct i40e_pf *pf)
9921{
9922	struct i40e_vsi *vsi;
9923
9924	/* quick workaround for an NVM issue that leaves a critical register
9925	 * uninitialized
9926	 */
9927	if (!rd32(&pf->hw, I40E_GLQF_HKEY(0))) {
9928		static const u32 hkey[] = {
9929			0xe640d33f, 0xcdfe98ab, 0x73fa7161, 0x0d7a7d36,
9930			0xeacb7d61, 0xaa4f05b6, 0x9c5c89ed, 0xfc425ddb,
9931			0xa4654832, 0xfc7461d4, 0x8f827619, 0xf5c63c21,
9932			0x95b3a76d};
9933		int i;
9934
9935		for (i = 0; i <= I40E_GLQF_HKEY_MAX_INDEX; i++)
9936			wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]);
9937	}
9938
9939	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
9940		return;
9941
9942	/* find existing VSI and see if it needs configuring */
9943	vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
9944
9945	/* create a new VSI if none exists */
9946	if (!vsi) {
9947		vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR,
9948				     pf->vsi[pf->lan_vsi]->seid, 0);
9949		if (!vsi) {
9950			dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
9951			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
9952			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
9953			return;
9954		}
9955	}
9956
9957	i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring);
9958}
9959
9960/**
9961 * i40e_fdir_teardown - release the Flow Director resources
9962 * @pf: board private structure
9963 **/
9964static void i40e_fdir_teardown(struct i40e_pf *pf)
9965{
9966	struct i40e_vsi *vsi;
9967
9968	i40e_fdir_filter_exit(pf);
9969	vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
9970	if (vsi)
9971		i40e_vsi_release(vsi);
9972}
9973
9974/**
9975 * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs
9976 * @vsi: PF main vsi
9977 * @seid: seid of main or channel VSIs
9978 *
9979 * Rebuilds cloud filters associated with main VSI and channel VSIs if they
9980 * existed before reset
9981 **/
9982static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
9983{
9984	struct i40e_cloud_filter *cfilter;
9985	struct i40e_pf *pf = vsi->back;
9986	struct hlist_node *node;
9987	i40e_status ret;
9988
9989	/* Add cloud filters back if they exist */
9990	hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list,
9991				  cloud_node) {
9992		if (cfilter->seid != seid)
9993			continue;
9994
9995		if (cfilter->dst_port)
9996			ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
9997								true);
9998		else
9999			ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
10000
10001		if (ret) {
10002			dev_dbg(&pf->pdev->dev,
10003				"Failed to rebuild cloud filter, err %s aq_err %s\n",
10004				i40e_stat_str(&pf->hw, ret),
10005				i40e_aq_str(&pf->hw,
10006					    pf->hw.aq.asq_last_status));
10007			return ret;
10008		}
10009	}
10010	return 0;
10011}
10012
10013/**
10014 * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset
10015 * @vsi: PF main vsi
10016 *
10017 * Rebuilds channel VSIs if they existed before reset
10018 **/
10019static int i40e_rebuild_channels(struct i40e_vsi *vsi)
10020{
10021	struct i40e_channel *ch, *ch_tmp;
10022	i40e_status ret;
10023
10024	if (list_empty(&vsi->ch_list))
10025		return 0;
10026
10027	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
10028		if (!ch->initialized)
10029			break;
10030		/* Proceed with creation of channel (VMDq2) VSI */
10031		ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch);
10032		if (ret) {
10033			dev_info(&vsi->back->pdev->dev,
10034				 "failed to rebuild channels using uplink_seid %u\n",
10035				 vsi->uplink_seid);
10036			return ret;
10037		}
10038		/* Reconfigure TX queues using QTX_CTL register */
10039		ret = i40e_channel_config_tx_ring(vsi->back, vsi, ch);
10040		if (ret) {
10041			dev_info(&vsi->back->pdev->dev,
10042				 "failed to configure TX rings for channel %u\n",
10043				 ch->seid);
10044			return ret;
10045		}
10046		/* update 'next_base_queue' */
10047		vsi->next_base_queue = vsi->next_base_queue +
10048							ch->num_queue_pairs;
10049		if (ch->max_tx_rate) {
10050			u64 credits = ch->max_tx_rate;
10051
10052			if (i40e_set_bw_limit(vsi, ch->seid,
10053					      ch->max_tx_rate))
10054				return -EINVAL;
10055
10056			do_div(credits, I40E_BW_CREDIT_DIVISOR);
10057			dev_dbg(&vsi->back->pdev->dev,
10058				"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
10059				ch->max_tx_rate,
10060				credits,
10061				ch->seid);
10062		}
10063		ret = i40e_rebuild_cloud_filters(vsi, ch->seid);
10064		if (ret) {
10065			dev_dbg(&vsi->back->pdev->dev,
10066				"Failed to rebuild cloud filters for channel VSI %u\n",
10067				ch->seid);
10068			return ret;
10069		}
10070	}
10071	return 0;
10072}
10073
10074/**
10075 * i40e_clean_xps_state - clean xps state for every tx_ring
10076 * @vsi: ptr to the VSI
10077 **/
10078static void i40e_clean_xps_state(struct i40e_vsi *vsi)
10079{
10080	int i;
10081
10082	if (vsi->tx_rings)
10083		for (i = 0; i < vsi->num_queue_pairs; i++)
10084			if (vsi->tx_rings[i])
10085				clear_bit(__I40E_TX_XPS_INIT_DONE,
10086					  vsi->tx_rings[i]->state);
10087}
10088
10089/**
10090 * i40e_prep_for_reset - prep for the core to reset
10091 * @pf: board private structure
10092 * @lock_acquired: indicates whether or not the lock has been acquired
10093 * before this function was called.
10094 *
10095 * Close up the VFs and other things in prep for PF Reset.
10096  **/
10097static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired)
10098{
10099	struct i40e_hw *hw = &pf->hw;
10100	i40e_status ret = 0;
10101	u32 v;
10102
10103	clear_bit(__I40E_RESET_INTR_RECEIVED, pf->state);
10104	if (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
10105		return;
10106	if (i40e_check_asq_alive(&pf->hw))
10107		i40e_vc_notify_reset(pf);
10108
10109	dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n");
10110
10111	/* quiesce the VSIs and their queues that are not already DOWN */
10112	/* pf_quiesce_all_vsi modifies netdev structures -rtnl_lock needed */
10113	if (!lock_acquired)
10114		rtnl_lock();
10115	i40e_pf_quiesce_all_vsi(pf);
10116	if (!lock_acquired)
10117		rtnl_unlock();
10118
10119	for (v = 0; v < pf->num_alloc_vsi; v++) {
10120		if (pf->vsi[v]) {
10121			i40e_clean_xps_state(pf->vsi[v]);
10122			pf->vsi[v]->seid = 0;
10123		}
10124	}
10125
10126	i40e_shutdown_adminq(&pf->hw);
10127
10128	/* call shutdown HMC */
10129	if (hw->hmc.hmc_obj) {
10130		ret = i40e_shutdown_lan_hmc(hw);
10131		if (ret)
10132			dev_warn(&pf->pdev->dev,
10133				 "shutdown_lan_hmc failed: %d\n", ret);
10134	}
10135
10136	/* Save the current PTP time so that we can restore the time after the
10137	 * reset completes.
10138	 */
10139	i40e_ptp_save_hw_time(pf);
10140}
10141
10142/**
10143 * i40e_send_version - update firmware with driver version
10144 * @pf: PF struct
10145 */
10146static void i40e_send_version(struct i40e_pf *pf)
10147{
10148	struct i40e_driver_version dv;
10149
10150	dv.major_version = 0xff;
10151	dv.minor_version = 0xff;
10152	dv.build_version = 0xff;
10153	dv.subbuild_version = 0;
10154	strlcpy(dv.driver_string, UTS_RELEASE, sizeof(dv.driver_string));
10155	i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
10156}
10157
10158/**
10159 * i40e_get_oem_version - get OEM specific version information
10160 * @hw: pointer to the hardware structure
10161 **/
10162static void i40e_get_oem_version(struct i40e_hw *hw)
10163{
10164	u16 block_offset = 0xffff;
10165	u16 block_length = 0;
10166	u16 capabilities = 0;
10167	u16 gen_snap = 0;
10168	u16 release = 0;
10169
10170#define I40E_SR_NVM_OEM_VERSION_PTR		0x1B
10171#define I40E_NVM_OEM_LENGTH_OFFSET		0x00
10172#define I40E_NVM_OEM_CAPABILITIES_OFFSET	0x01
10173#define I40E_NVM_OEM_GEN_OFFSET			0x02
10174#define I40E_NVM_OEM_RELEASE_OFFSET		0x03
10175#define I40E_NVM_OEM_CAPABILITIES_MASK		0x000F
10176#define I40E_NVM_OEM_LENGTH			3
10177
10178	/* Check if pointer to OEM version block is valid. */
10179	i40e_read_nvm_word(hw, I40E_SR_NVM_OEM_VERSION_PTR, &block_offset);
10180	if (block_offset == 0xffff)
10181		return;
10182
10183	/* Check if OEM version block has correct length. */
10184	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_LENGTH_OFFSET,
10185			   &block_length);
10186	if (block_length < I40E_NVM_OEM_LENGTH)
10187		return;
10188
10189	/* Check if OEM version format is as expected. */
10190	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_CAPABILITIES_OFFSET,
10191			   &capabilities);
10192	if ((capabilities & I40E_NVM_OEM_CAPABILITIES_MASK) != 0)
10193		return;
10194
10195	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_GEN_OFFSET,
10196			   &gen_snap);
10197	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_RELEASE_OFFSET,
10198			   &release);
10199	hw->nvm.oem_ver = (gen_snap << I40E_OEM_SNAP_SHIFT) | release;
10200	hw->nvm.eetrack = I40E_OEM_EETRACK_ID;
10201}
10202
10203/**
10204 * i40e_reset - wait for core reset to finish reset, reset pf if corer not seen
10205 * @pf: board private structure
10206 **/
10207static int i40e_reset(struct i40e_pf *pf)
10208{
10209	struct i40e_hw *hw = &pf->hw;
10210	i40e_status ret;
10211
10212	ret = i40e_pf_reset(hw);
10213	if (ret) {
10214		dev_info(&pf->pdev->dev, "PF reset failed, %d\n", ret);
10215		set_bit(__I40E_RESET_FAILED, pf->state);
10216		clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state);
10217	} else {
10218		pf->pfr_count++;
10219	}
10220	return ret;
10221}
10222
10223/**
10224 * i40e_rebuild - rebuild using a saved config
10225 * @pf: board private structure
10226 * @reinit: if the Main VSI needs to re-initialized.
10227 * @lock_acquired: indicates whether or not the lock has been acquired
10228 * before this function was called.
10229 **/
10230static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
10231{
10232	const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
10233	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
10234	struct i40e_hw *hw = &pf->hw;
10235	i40e_status ret;
10236	u32 val;
10237	int v;
10238
10239	if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
10240	    is_recovery_mode_reported)
10241		i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
10242
10243	if (test_bit(__I40E_DOWN, pf->state) &&
10244	    !test_bit(__I40E_RECOVERY_MODE, pf->state))
10245		goto clear_recovery;
10246	dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
10247
10248	/* rebuild the basics for the AdminQ, HMC, and initial HW switch */
10249	ret = i40e_init_adminq(&pf->hw);
10250	if (ret) {
10251		dev_info(&pf->pdev->dev, "Rebuild AdminQ failed, err %s aq_err %s\n",
10252			 i40e_stat_str(&pf->hw, ret),
10253			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
10254		goto clear_recovery;
10255	}
10256	i40e_get_oem_version(&pf->hw);
10257
10258	if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) {
10259		/* The following delay is necessary for firmware update. */
10260		mdelay(1000);
10261	}
10262
10263	/* re-verify the eeprom if we just had an EMP reset */
10264	if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
10265		i40e_verify_eeprom(pf);
10266
10267	/* if we are going out of or into recovery mode we have to act
10268	 * accordingly with regard to resources initialization
10269	 * and deinitialization
10270	 */
10271	if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
10272		if (i40e_get_capabilities(pf,
10273					  i40e_aqc_opc_list_func_capabilities))
10274			goto end_unlock;
10275
10276		if (is_recovery_mode_reported) {
10277			/* we're staying in recovery mode so we'll reinitialize
10278			 * misc vector here
10279			 */
10280			if (i40e_setup_misc_vector_for_recovery_mode(pf))
10281				goto end_unlock;
10282		} else {
10283			if (!lock_acquired)
10284				rtnl_lock();
10285			/* we're going out of recovery mode so we'll free
10286			 * the IRQ allocated specifically for recovery mode
10287			 * and restore the interrupt scheme
10288			 */
10289			free_irq(pf->pdev->irq, pf);
10290			i40e_clear_interrupt_scheme(pf);
10291			if (i40e_restore_interrupt_scheme(pf))
10292				goto end_unlock;
10293		}
10294
10295		/* tell the firmware that we're starting */
10296		i40e_send_version(pf);
10297
10298		/* bail out in case recovery mode was detected, as there is
10299		 * no need for further configuration.
10300		 */
10301		goto end_unlock;
10302	}
10303
10304	i40e_clear_pxe_mode(hw);
10305	ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
10306	if (ret)
10307		goto end_core_reset;
10308
10309	ret = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
10310				hw->func_caps.num_rx_qp, 0, 0);
10311	if (ret) {
10312		dev_info(&pf->pdev->dev, "init_lan_hmc failed: %d\n", ret);
10313		goto end_core_reset;
10314	}
10315	ret = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY);
10316	if (ret) {
10317		dev_info(&pf->pdev->dev, "configure_lan_hmc failed: %d\n", ret);
10318		goto end_core_reset;
10319	}
10320
10321	/* Enable FW to write a default DCB config on link-up */
10322	i40e_aq_set_dcb_parameters(hw, true, NULL);
10323
10324#ifdef CONFIG_I40E_DCB
10325	ret = i40e_init_pf_dcb(pf);
10326	if (ret) {
10327		dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n", ret);
10328		pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
10329		/* Continue without DCB enabled */
10330	}
10331#endif /* CONFIG_I40E_DCB */
10332	/* do basic switch setup */
10333	if (!lock_acquired)
10334		rtnl_lock();
10335	ret = i40e_setup_pf_switch(pf, reinit, true);
10336	if (ret)
10337		goto end_unlock;
10338
10339	/* The driver only wants link up/down and module qualification
10340	 * reports from firmware.  Note the negative logic.
10341	 */
10342	ret = i40e_aq_set_phy_int_mask(&pf->hw,
10343				       ~(I40E_AQ_EVENT_LINK_UPDOWN |
10344					 I40E_AQ_EVENT_MEDIA_NA |
10345					 I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
10346	if (ret)
10347		dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n",
10348			 i40e_stat_str(&pf->hw, ret),
10349			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
10350
10351	/* Rebuild the VSIs and VEBs that existed before reset.
10352	 * They are still in our local switch element arrays, so only
10353	 * need to rebuild the switch model in the HW.
10354	 *
10355	 * If there were VEBs but the reconstitution failed, we'll try
10356	 * try to recover minimal use by getting the basic PF VSI working.
10357	 */
10358	if (vsi->uplink_seid != pf->mac_seid) {
10359		dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
10360		/* find the one VEB connected to the MAC, and find orphans */
10361		for (v = 0; v < I40E_MAX_VEB; v++) {
10362			if (!pf->veb[v])
10363				continue;
10364
10365			if (pf->veb[v]->uplink_seid == pf->mac_seid ||
10366			    pf->veb[v]->uplink_seid == 0) {
10367				ret = i40e_reconstitute_veb(pf->veb[v]);
10368
10369				if (!ret)
10370					continue;
10371
10372				/* If Main VEB failed, we're in deep doodoo,
10373				 * so give up rebuilding the switch and set up
10374				 * for minimal rebuild of PF VSI.
10375				 * If orphan failed, we'll report the error
10376				 * but try to keep going.
10377				 */
10378				if (pf->veb[v]->uplink_seid == pf->mac_seid) {
10379					dev_info(&pf->pdev->dev,
10380						 "rebuild of switch failed: %d, will try to set up simple PF connection\n",
10381						 ret);
10382					vsi->uplink_seid = pf->mac_seid;
10383					break;
10384				} else if (pf->veb[v]->uplink_seid == 0) {
10385					dev_info(&pf->pdev->dev,
10386						 "rebuild of orphan VEB failed: %d\n",
10387						 ret);
10388				}
10389			}
10390		}
10391	}
10392
10393	if (vsi->uplink_seid == pf->mac_seid) {
10394		dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n");
10395		/* no VEB, so rebuild only the Main VSI */
10396		ret = i40e_add_vsi(vsi);
10397		if (ret) {
10398			dev_info(&pf->pdev->dev,
10399				 "rebuild of Main VSI failed: %d\n", ret);
10400			goto end_unlock;
10401		}
10402	}
10403
10404	if (vsi->mqprio_qopt.max_rate[0]) {
10405		u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
10406						  vsi->mqprio_qopt.max_rate[0]);
10407		u64 credits = 0;
10408
10409		ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
10410		if (ret)
10411			goto end_unlock;
10412
10413		credits = max_tx_rate;
10414		do_div(credits, I40E_BW_CREDIT_DIVISOR);
10415		dev_dbg(&vsi->back->pdev->dev,
10416			"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
10417			max_tx_rate,
10418			credits,
10419			vsi->seid);
10420	}
10421
10422	ret = i40e_rebuild_cloud_filters(vsi, vsi->seid);
10423	if (ret)
10424		goto end_unlock;
10425
10426	/* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs
10427	 * for this main VSI if they exist
10428	 */
10429	ret = i40e_rebuild_channels(vsi);
10430	if (ret)
10431		goto end_unlock;
10432
10433	/* Reconfigure hardware for allowing smaller MSS in the case
10434	 * of TSO, so that we avoid the MDD being fired and causing
10435	 * a reset in the case of small MSS+TSO.
10436	 */
10437#define I40E_REG_MSS          0x000E64DC
10438#define I40E_REG_MSS_MIN_MASK 0x3FF0000
10439#define I40E_64BYTE_MSS       0x400000
10440	val = rd32(hw, I40E_REG_MSS);
10441	if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) {
10442		val &= ~I40E_REG_MSS_MIN_MASK;
10443		val |= I40E_64BYTE_MSS;
10444		wr32(hw, I40E_REG_MSS, val);
10445	}
10446
10447	if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
10448		msleep(75);
10449		ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
10450		if (ret)
10451			dev_info(&pf->pdev->dev, "link restart failed, err %s aq_err %s\n",
10452				 i40e_stat_str(&pf->hw, ret),
10453				 i40e_aq_str(&pf->hw,
10454					     pf->hw.aq.asq_last_status));
10455	}
10456	/* reinit the misc interrupt */
10457	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
10458		ret = i40e_setup_misc_vector(pf);
10459		if (ret)
10460			goto end_unlock;
10461	}
10462
10463	/* Add a filter to drop all Flow control frames from any VSI from being
10464	 * transmitted. By doing so we stop a malicious VF from sending out
10465	 * PAUSE or PFC frames and potentially controlling traffic for other
10466	 * PF/VF VSIs.
10467	 * The FW can still send Flow control frames if enabled.
10468	 */
10469	i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
10470						       pf->main_vsi_seid);
10471
10472	/* restart the VSIs that were rebuilt and running before the reset */
10473	i40e_pf_unquiesce_all_vsi(pf);
10474
10475	/* Release the RTNL lock before we start resetting VFs */
10476	if (!lock_acquired)
10477		rtnl_unlock();
10478
10479	/* Restore promiscuous settings */
10480	ret = i40e_set_promiscuous(pf, pf->cur_promisc);
10481	if (ret)
10482		dev_warn(&pf->pdev->dev,
10483			 "Failed to restore promiscuous setting: %s, err %s aq_err %s\n",
10484			 pf->cur_promisc ? "on" : "off",
10485			 i40e_stat_str(&pf->hw, ret),
10486			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
10487
10488	i40e_reset_all_vfs(pf, true);
10489
10490	/* tell the firmware that we're starting */
10491	i40e_send_version(pf);
10492
10493	/* We've already released the lock, so don't do it again */
10494	goto end_core_reset;
10495
10496end_unlock:
10497	if (!lock_acquired)
10498		rtnl_unlock();
10499end_core_reset:
10500	clear_bit(__I40E_RESET_FAILED, pf->state);
10501clear_recovery:
10502	clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state);
10503	clear_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state);
10504}
10505
10506/**
10507 * i40e_reset_and_rebuild - reset and rebuild using a saved config
10508 * @pf: board private structure
10509 * @reinit: if the Main VSI needs to re-initialized.
10510 * @lock_acquired: indicates whether or not the lock has been acquired
10511 * before this function was called.
10512 **/
10513static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
10514				   bool lock_acquired)
10515{
10516	int ret;
10517	/* Now we wait for GRST to settle out.
10518	 * We don't have to delete the VEBs or VSIs from the hw switch
10519	 * because the reset will make them disappear.
10520	 */
10521	ret = i40e_reset(pf);
10522	if (!ret)
10523		i40e_rebuild(pf, reinit, lock_acquired);
10524}
10525
10526/**
10527 * i40e_handle_reset_warning - prep for the PF to reset, reset and rebuild
10528 * @pf: board private structure
10529 *
10530 * Close up the VFs and other things in prep for a Core Reset,
10531 * then get ready to rebuild the world.
10532 * @lock_acquired: indicates whether or not the lock has been acquired
10533 * before this function was called.
10534 **/
10535static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
10536{
10537	i40e_prep_for_reset(pf, lock_acquired);
10538	i40e_reset_and_rebuild(pf, false, lock_acquired);
10539}
10540
10541/**
10542 * i40e_handle_mdd_event
10543 * @pf: pointer to the PF structure
10544 *
10545 * Called from the MDD irq handler to identify possibly malicious vfs
10546 **/
10547static void i40e_handle_mdd_event(struct i40e_pf *pf)
10548{
10549	struct i40e_hw *hw = &pf->hw;
10550	bool mdd_detected = false;
10551	struct i40e_vf *vf;
10552	u32 reg;
10553	int i;
10554
10555	if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
10556		return;
10557
10558	/* find what triggered the MDD event */
10559	reg = rd32(hw, I40E_GL_MDET_TX);
10560	if (reg & I40E_GL_MDET_TX_VALID_MASK) {
10561		u8 pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >>
10562				I40E_GL_MDET_TX_PF_NUM_SHIFT;
10563		u16 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >>
10564				I40E_GL_MDET_TX_VF_NUM_SHIFT;
10565		u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >>
10566				I40E_GL_MDET_TX_EVENT_SHIFT;
10567		u16 queue = ((reg & I40E_GL_MDET_TX_QUEUE_MASK) >>
10568				I40E_GL_MDET_TX_QUEUE_SHIFT) -
10569				pf->hw.func_caps.base_queue;
10570		if (netif_msg_tx_err(pf))
10571			dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on TX queue %d PF number 0x%02x VF number 0x%02x\n",
10572				 event, queue, pf_num, vf_num);
10573		wr32(hw, I40E_GL_MDET_TX, 0xffffffff);
10574		mdd_detected = true;
10575	}
10576	reg = rd32(hw, I40E_GL_MDET_RX);
10577	if (reg & I40E_GL_MDET_RX_VALID_MASK) {
10578		u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >>
10579				I40E_GL_MDET_RX_FUNCTION_SHIFT;
10580		u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >>
10581				I40E_GL_MDET_RX_EVENT_SHIFT;
10582		u16 queue = ((reg & I40E_GL_MDET_RX_QUEUE_MASK) >>
10583				I40E_GL_MDET_RX_QUEUE_SHIFT) -
10584				pf->hw.func_caps.base_queue;
10585		if (netif_msg_rx_err(pf))
10586			dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on RX queue %d of function 0x%02x\n",
10587				 event, queue, func);
10588		wr32(hw, I40E_GL_MDET_RX, 0xffffffff);
10589		mdd_detected = true;
10590	}
10591
10592	if (mdd_detected) {
10593		reg = rd32(hw, I40E_PF_MDET_TX);
10594		if (reg & I40E_PF_MDET_TX_VALID_MASK) {
10595			wr32(hw, I40E_PF_MDET_TX, 0xFFFF);
10596			dev_dbg(&pf->pdev->dev, "TX driver issue detected on PF\n");
10597		}
10598		reg = rd32(hw, I40E_PF_MDET_RX);
10599		if (reg & I40E_PF_MDET_RX_VALID_MASK) {
10600			wr32(hw, I40E_PF_MDET_RX, 0xFFFF);
10601			dev_dbg(&pf->pdev->dev, "RX driver issue detected on PF\n");
10602		}
10603	}
10604
10605	/* see if one of the VFs needs its hand slapped */
10606	for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
10607		vf = &(pf->vf[i]);
10608		reg = rd32(hw, I40E_VP_MDET_TX(i));
10609		if (reg & I40E_VP_MDET_TX_VALID_MASK) {
10610			wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
10611			vf->num_mdd_events++;
10612			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
10613				 i);
10614			dev_info(&pf->pdev->dev,
10615				 "Use PF Control I/F to re-enable the VF\n");
10616			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
10617		}
10618
10619		reg = rd32(hw, I40E_VP_MDET_RX(i));
10620		if (reg & I40E_VP_MDET_RX_VALID_MASK) {
10621			wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
10622			vf->num_mdd_events++;
10623			dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
10624				 i);
10625			dev_info(&pf->pdev->dev,
10626				 "Use PF Control I/F to re-enable the VF\n");
10627			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
10628		}
10629	}
10630
10631	/* re-enable mdd interrupt cause */
10632	clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
10633	reg = rd32(hw, I40E_PFINT_ICR0_ENA);
10634	reg |=  I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
10635	wr32(hw, I40E_PFINT_ICR0_ENA, reg);
10636	i40e_flush(hw);
10637}
10638
10639/**
10640 * i40e_service_task - Run the driver's async subtasks
10641 * @work: pointer to work_struct containing our data
10642 **/
10643static void i40e_service_task(struct work_struct *work)
10644{
10645	struct i40e_pf *pf = container_of(work,
10646					  struct i40e_pf,
10647					  service_task);
10648	unsigned long start_time = jiffies;
10649
10650	/* don't bother with service tasks if a reset is in progress */
10651	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
10652	    test_bit(__I40E_SUSPENDED, pf->state))
10653		return;
10654
10655	if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state))
10656		return;
10657
10658	if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) {
10659		i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]);
10660		i40e_sync_filters_subtask(pf);
10661		i40e_reset_subtask(pf);
10662		i40e_handle_mdd_event(pf);
10663		i40e_vc_process_vflr_event(pf);
10664		i40e_watchdog_subtask(pf);
10665		i40e_fdir_reinit_subtask(pf);
10666		if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) {
10667			/* Client subtask will reopen next time through. */
10668			i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi],
10669							   true);
10670		} else {
10671			i40e_client_subtask(pf);
10672			if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE,
10673					       pf->state))
10674				i40e_notify_client_of_l2_param_changes(
10675								pf->vsi[pf->lan_vsi]);
10676		}
10677		i40e_sync_filters_subtask(pf);
10678	} else {
10679		i40e_reset_subtask(pf);
10680	}
10681
10682	i40e_clean_adminq_subtask(pf);
10683
10684	/* flush memory to make sure state is correct before next watchdog */
10685	smp_mb__before_atomic();
10686	clear_bit(__I40E_SERVICE_SCHED, pf->state);
10687
10688	/* If the tasks have taken longer than one timer cycle or there
10689	 * is more work to be done, reschedule the service task now
10690	 * rather than wait for the timer to tick again.
10691	 */
10692	if (time_after(jiffies, (start_time + pf->service_timer_period)) ||
10693	    test_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state)		 ||
10694	    test_bit(__I40E_MDD_EVENT_PENDING, pf->state)		 ||
10695	    test_bit(__I40E_VFLR_EVENT_PENDING, pf->state))
10696		i40e_service_event_schedule(pf);
10697}
10698
10699/**
10700 * i40e_service_timer - timer callback
10701 * @t: timer list pointer
10702 **/
10703static void i40e_service_timer(struct timer_list *t)
10704{
10705	struct i40e_pf *pf = from_timer(pf, t, service_timer);
10706
10707	mod_timer(&pf->service_timer,
10708		  round_jiffies(jiffies + pf->service_timer_period));
10709	i40e_service_event_schedule(pf);
10710}
10711
10712/**
10713 * i40e_set_num_rings_in_vsi - Determine number of rings in the VSI
10714 * @vsi: the VSI being configured
10715 **/
10716static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
10717{
10718	struct i40e_pf *pf = vsi->back;
10719
10720	switch (vsi->type) {
10721	case I40E_VSI_MAIN:
10722		vsi->alloc_queue_pairs = pf->num_lan_qps;
10723		if (!vsi->num_tx_desc)
10724			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
10725						 I40E_REQ_DESCRIPTOR_MULTIPLE);
10726		if (!vsi->num_rx_desc)
10727			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
10728						 I40E_REQ_DESCRIPTOR_MULTIPLE);
10729		if (pf->flags & I40E_FLAG_MSIX_ENABLED)
10730			vsi->num_q_vectors = pf->num_lan_msix;
10731		else
10732			vsi->num_q_vectors = 1;
10733
10734		break;
10735
10736	case I40E_VSI_FDIR:
10737		vsi->alloc_queue_pairs = 1;
10738		vsi->num_tx_desc = ALIGN(I40E_FDIR_RING_COUNT,
10739					 I40E_REQ_DESCRIPTOR_MULTIPLE);
10740		vsi->num_rx_desc = ALIGN(I40E_FDIR_RING_COUNT,
10741					 I40E_REQ_DESCRIPTOR_MULTIPLE);
10742		vsi->num_q_vectors = pf->num_fdsb_msix;
10743		break;
10744
10745	case I40E_VSI_VMDQ2:
10746		vsi->alloc_queue_pairs = pf->num_vmdq_qps;
10747		if (!vsi->num_tx_desc)
10748			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
10749						 I40E_REQ_DESCRIPTOR_MULTIPLE);
10750		if (!vsi->num_rx_desc)
10751			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
10752						 I40E_REQ_DESCRIPTOR_MULTIPLE);
10753		vsi->num_q_vectors = pf->num_vmdq_msix;
10754		break;
10755
10756	case I40E_VSI_SRIOV:
10757		vsi->alloc_queue_pairs = pf->num_vf_qps;
10758		if (!vsi->num_tx_desc)
10759			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
10760						 I40E_REQ_DESCRIPTOR_MULTIPLE);
10761		if (!vsi->num_rx_desc)
10762			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
10763						 I40E_REQ_DESCRIPTOR_MULTIPLE);
10764		break;
10765
10766	default:
10767		WARN_ON(1);
10768		return -ENODATA;
10769	}
10770
10771	return 0;
10772}
10773
10774/**
10775 * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
10776 * @vsi: VSI pointer
10777 * @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
10778 *
10779 * On error: returns error code (negative)
10780 * On success: returns 0
10781 **/
10782static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors)
10783{
10784	struct i40e_ring **next_rings;
10785	int size;
10786	int ret = 0;
10787
10788	/* allocate memory for both Tx, XDP Tx and Rx ring pointers */
10789	size = sizeof(struct i40e_ring *) * vsi->alloc_queue_pairs *
10790	       (i40e_enabled_xdp_vsi(vsi) ? 3 : 2);
10791	vsi->tx_rings = kzalloc(size, GFP_KERNEL);
10792	if (!vsi->tx_rings)
10793		return -ENOMEM;
10794	next_rings = vsi->tx_rings + vsi->alloc_queue_pairs;
10795	if (i40e_enabled_xdp_vsi(vsi)) {
10796		vsi->xdp_rings = next_rings;
10797		next_rings += vsi->alloc_queue_pairs;
10798	}
10799	vsi->rx_rings = next_rings;
10800
10801	if (alloc_qvectors) {
10802		/* allocate memory for q_vector pointers */
10803		size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors;
10804		vsi->q_vectors = kzalloc(size, GFP_KERNEL);
10805		if (!vsi->q_vectors) {
10806			ret = -ENOMEM;
10807			goto err_vectors;
10808		}
10809	}
10810	return ret;
10811
10812err_vectors:
10813	kfree(vsi->tx_rings);
10814	return ret;
10815}
10816
10817/**
10818 * i40e_vsi_mem_alloc - Allocates the next available struct vsi in the PF
10819 * @pf: board private structure
10820 * @type: type of VSI
10821 *
10822 * On error: returns error code (negative)
10823 * On success: returns vsi index in PF (positive)
10824 **/
10825static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
10826{
10827	int ret = -ENODEV;
10828	struct i40e_vsi *vsi;
10829	int vsi_idx;
10830	int i;
10831
10832	/* Need to protect the allocation of the VSIs at the PF level */
10833	mutex_lock(&pf->switch_mutex);
10834
10835	/* VSI list may be fragmented if VSI creation/destruction has
10836	 * been happening.  We can afford to do a quick scan to look
10837	 * for any free VSIs in the list.
10838	 *
10839	 * find next empty vsi slot, looping back around if necessary
10840	 */
10841	i = pf->next_vsi;
10842	while (i < pf->num_alloc_vsi && pf->vsi[i])
10843		i++;
10844	if (i >= pf->num_alloc_vsi) {
10845		i = 0;
10846		while (i < pf->next_vsi && pf->vsi[i])
10847			i++;
10848	}
10849
10850	if (i < pf->num_alloc_vsi && !pf->vsi[i]) {
10851		vsi_idx = i;             /* Found one! */
10852	} else {
10853		ret = -ENODEV;
10854		goto unlock_pf;  /* out of VSI slots! */
10855	}
10856	pf->next_vsi = ++i;
10857
10858	vsi = kzalloc(sizeof(*vsi), GFP_KERNEL);
10859	if (!vsi) {
10860		ret = -ENOMEM;
10861		goto unlock_pf;
10862	}
10863	vsi->type = type;
10864	vsi->back = pf;
10865	set_bit(__I40E_VSI_DOWN, vsi->state);
10866	vsi->flags = 0;
10867	vsi->idx = vsi_idx;
10868	vsi->int_rate_limit = 0;
10869	vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ?
10870				pf->rss_table_size : 64;
10871	vsi->netdev_registered = false;
10872	vsi->work_limit = I40E_DEFAULT_IRQ_WORK;
10873	hash_init(vsi->mac_filter_hash);
10874	vsi->irqs_ready = false;
10875
10876	if (type == I40E_VSI_MAIN) {
10877		vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL);
10878		if (!vsi->af_xdp_zc_qps)
10879			goto err_rings;
10880	}
10881
10882	ret = i40e_set_num_rings_in_vsi(vsi);
10883	if (ret)
10884		goto err_rings;
10885
10886	ret = i40e_vsi_alloc_arrays(vsi, true);
10887	if (ret)
10888		goto err_rings;
10889
10890	/* Setup default MSIX irq handler for VSI */
10891	i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings);
10892
10893	/* Initialize VSI lock */
10894	spin_lock_init(&vsi->mac_filter_hash_lock);
10895	pf->vsi[vsi_idx] = vsi;
10896	ret = vsi_idx;
10897	goto unlock_pf;
10898
10899err_rings:
10900	bitmap_free(vsi->af_xdp_zc_qps);
10901	pf->next_vsi = i - 1;
10902	kfree(vsi);
10903unlock_pf:
10904	mutex_unlock(&pf->switch_mutex);
10905	return ret;
10906}
10907
10908/**
10909 * i40e_vsi_free_arrays - Free queue and vector pointer arrays for the VSI
10910 * @vsi: VSI pointer
10911 * @free_qvectors: a bool to specify if q_vectors need to be freed.
10912 *
10913 * On error: returns error code (negative)
10914 * On success: returns 0
10915 **/
10916static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors)
10917{
10918	/* free the ring and vector containers */
10919	if (free_qvectors) {
10920		kfree(vsi->q_vectors);
10921		vsi->q_vectors = NULL;
10922	}
10923	kfree(vsi->tx_rings);
10924	vsi->tx_rings = NULL;
10925	vsi->rx_rings = NULL;
10926	vsi->xdp_rings = NULL;
10927}
10928
10929/**
10930 * i40e_clear_rss_config_user - clear the user configured RSS hash keys
10931 * and lookup table
10932 * @vsi: Pointer to VSI structure
10933 */
10934static void i40e_clear_rss_config_user(struct i40e_vsi *vsi)
10935{
10936	if (!vsi)
10937		return;
10938
10939	kfree(vsi->rss_hkey_user);
10940	vsi->rss_hkey_user = NULL;
10941
10942	kfree(vsi->rss_lut_user);
10943	vsi->rss_lut_user = NULL;
10944}
10945
10946/**
10947 * i40e_vsi_clear - Deallocate the VSI provided
10948 * @vsi: the VSI being un-configured
10949 **/
10950static int i40e_vsi_clear(struct i40e_vsi *vsi)
10951{
10952	struct i40e_pf *pf;
10953
10954	if (!vsi)
10955		return 0;
10956
10957	if (!vsi->back)
10958		goto free_vsi;
10959	pf = vsi->back;
10960
10961	mutex_lock(&pf->switch_mutex);
10962	if (!pf->vsi[vsi->idx]) {
10963		dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n",
10964			vsi->idx, vsi->idx, vsi->type);
10965		goto unlock_vsi;
10966	}
10967
10968	if (pf->vsi[vsi->idx] != vsi) {
10969		dev_err(&pf->pdev->dev,
10970			"pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n",
10971			pf->vsi[vsi->idx]->idx,
10972			pf->vsi[vsi->idx]->type,
10973			vsi->idx, vsi->type);
10974		goto unlock_vsi;
10975	}
10976
10977	/* updates the PF for this cleared vsi */
10978	i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
10979	i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
10980
10981	bitmap_free(vsi->af_xdp_zc_qps);
10982	i40e_vsi_free_arrays(vsi, true);
10983	i40e_clear_rss_config_user(vsi);
10984
10985	pf->vsi[vsi->idx] = NULL;
10986	if (vsi->idx < pf->next_vsi)
10987		pf->next_vsi = vsi->idx;
10988
10989unlock_vsi:
10990	mutex_unlock(&pf->switch_mutex);
10991free_vsi:
10992	kfree(vsi);
10993
10994	return 0;
10995}
10996
10997/**
10998 * i40e_vsi_clear_rings - Deallocates the Rx and Tx rings for the provided VSI
10999 * @vsi: the VSI being cleaned
11000 **/
11001static void i40e_vsi_clear_rings(struct i40e_vsi *vsi)
11002{
11003	int i;
11004
11005	if (vsi->tx_rings && vsi->tx_rings[0]) {
11006		for (i = 0; i < vsi->alloc_queue_pairs; i++) {
11007			kfree_rcu(vsi->tx_rings[i], rcu);
11008			WRITE_ONCE(vsi->tx_rings[i], NULL);
11009			WRITE_ONCE(vsi->rx_rings[i], NULL);
11010			if (vsi->xdp_rings)
11011				WRITE_ONCE(vsi->xdp_rings[i], NULL);
11012		}
11013	}
11014}
11015
11016/**
11017 * i40e_alloc_rings - Allocates the Rx and Tx rings for the provided VSI
11018 * @vsi: the VSI being configured
11019 **/
11020static int i40e_alloc_rings(struct i40e_vsi *vsi)
11021{
11022	int i, qpv = i40e_enabled_xdp_vsi(vsi) ? 3 : 2;
11023	struct i40e_pf *pf = vsi->back;
11024	struct i40e_ring *ring;
11025
11026	/* Set basic values in the rings to be used later during open() */
11027	for (i = 0; i < vsi->alloc_queue_pairs; i++) {
11028		/* allocate space for both Tx and Rx in one shot */
11029		ring = kcalloc(qpv, sizeof(struct i40e_ring), GFP_KERNEL);
11030		if (!ring)
11031			goto err_out;
11032
11033		ring->queue_index = i;
11034		ring->reg_idx = vsi->base_queue + i;
11035		ring->ring_active = false;
11036		ring->vsi = vsi;
11037		ring->netdev = vsi->netdev;
11038		ring->dev = &pf->pdev->dev;
11039		ring->count = vsi->num_tx_desc;
11040		ring->size = 0;
11041		ring->dcb_tc = 0;
11042		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
11043			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
11044		ring->itr_setting = pf->tx_itr_default;
11045		WRITE_ONCE(vsi->tx_rings[i], ring++);
11046
11047		if (!i40e_enabled_xdp_vsi(vsi))
11048			goto setup_rx;
11049
11050		ring->queue_index = vsi->alloc_queue_pairs + i;
11051		ring->reg_idx = vsi->base_queue + ring->queue_index;
11052		ring->ring_active = false;
11053		ring->vsi = vsi;
11054		ring->netdev = NULL;
11055		ring->dev = &pf->pdev->dev;
11056		ring->count = vsi->num_tx_desc;
11057		ring->size = 0;
11058		ring->dcb_tc = 0;
11059		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
11060			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
11061		set_ring_xdp(ring);
11062		ring->itr_setting = pf->tx_itr_default;
11063		WRITE_ONCE(vsi->xdp_rings[i], ring++);
11064
11065setup_rx:
11066		ring->queue_index = i;
11067		ring->reg_idx = vsi->base_queue + i;
11068		ring->ring_active = false;
11069		ring->vsi = vsi;
11070		ring->netdev = vsi->netdev;
11071		ring->dev = &pf->pdev->dev;
11072		ring->count = vsi->num_rx_desc;
11073		ring->size = 0;
11074		ring->dcb_tc = 0;
11075		ring->itr_setting = pf->rx_itr_default;
11076		WRITE_ONCE(vsi->rx_rings[i], ring);
11077	}
11078
11079	return 0;
11080
11081err_out:
11082	i40e_vsi_clear_rings(vsi);
11083	return -ENOMEM;
11084}
11085
11086/**
11087 * i40e_reserve_msix_vectors - Reserve MSI-X vectors in the kernel
11088 * @pf: board private structure
11089 * @vectors: the number of MSI-X vectors to request
11090 *
11091 * Returns the number of vectors reserved, or error
11092 **/
11093static int i40e_reserve_msix_vectors(struct i40e_pf *pf, int vectors)
11094{
11095	vectors = pci_enable_msix_range(pf->pdev, pf->msix_entries,
11096					I40E_MIN_MSIX, vectors);
11097	if (vectors < 0) {
11098		dev_info(&pf->pdev->dev,
11099			 "MSI-X vector reservation failed: %d\n", vectors);
11100		vectors = 0;
11101	}
11102
11103	return vectors;
11104}
11105
11106/**
11107 * i40e_init_msix - Setup the MSIX capability
11108 * @pf: board private structure
11109 *
11110 * Work with the OS to set up the MSIX vectors needed.
11111 *
11112 * Returns the number of vectors reserved or negative on failure
11113 **/
11114static int i40e_init_msix(struct i40e_pf *pf)
11115{
11116	struct i40e_hw *hw = &pf->hw;
11117	int cpus, extra_vectors;
11118	int vectors_left;
11119	int v_budget, i;
11120	int v_actual;
11121	int iwarp_requested = 0;
11122
11123	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
11124		return -ENODEV;
11125
11126	/* The number of vectors we'll request will be comprised of:
11127	 *   - Add 1 for "other" cause for Admin Queue events, etc.
11128	 *   - The number of LAN queue pairs
11129	 *	- Queues being used for RSS.
11130	 *		We don't need as many as max_rss_size vectors.
11131	 *		use rss_size instead in the calculation since that
11132	 *		is governed by number of cpus in the system.
11133	 *	- assumes symmetric Tx/Rx pairing
11134	 *   - The number of VMDq pairs
11135	 *   - The CPU count within the NUMA node if iWARP is enabled
11136	 * Once we count this up, try the request.
11137	 *
11138	 * If we can't get what we want, we'll simplify to nearly nothing
11139	 * and try again.  If that still fails, we punt.
11140	 */
11141	vectors_left = hw->func_caps.num_msix_vectors;
11142	v_budget = 0;
11143
11144	/* reserve one vector for miscellaneous handler */
11145	if (vectors_left) {
11146		v_budget++;
11147		vectors_left--;
11148	}
11149
11150	/* reserve some vectors for the main PF traffic queues. Initially we
11151	 * only reserve at most 50% of the available vectors, in the case that
11152	 * the number of online CPUs is large. This ensures that we can enable
11153	 * extra features as well. Once we've enabled the other features, we
11154	 * will use any remaining vectors to reach as close as we can to the
11155	 * number of online CPUs.
11156	 */
11157	cpus = num_online_cpus();
11158	pf->num_lan_msix = min_t(int, cpus, vectors_left / 2);
11159	vectors_left -= pf->num_lan_msix;
11160
11161	/* reserve one vector for sideband flow director */
11162	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
11163		if (vectors_left) {
11164			pf->num_fdsb_msix = 1;
11165			v_budget++;
11166			vectors_left--;
11167		} else {
11168			pf->num_fdsb_msix = 0;
11169		}
11170	}
11171
11172	/* can we reserve enough for iWARP? */
11173	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
11174		iwarp_requested = pf->num_iwarp_msix;
11175
11176		if (!vectors_left)
11177			pf->num_iwarp_msix = 0;
11178		else if (vectors_left < pf->num_iwarp_msix)
11179			pf->num_iwarp_msix = 1;
11180		v_budget += pf->num_iwarp_msix;
11181		vectors_left -= pf->num_iwarp_msix;
11182	}
11183
11184	/* any vectors left over go for VMDq support */
11185	if (pf->flags & I40E_FLAG_VMDQ_ENABLED) {
11186		if (!vectors_left) {
11187			pf->num_vmdq_msix = 0;
11188			pf->num_vmdq_qps = 0;
11189		} else {
11190			int vmdq_vecs_wanted =
11191				pf->num_vmdq_vsis * pf->num_vmdq_qps;
11192			int vmdq_vecs =
11193				min_t(int, vectors_left, vmdq_vecs_wanted);
11194
11195			/* if we're short on vectors for what's desired, we limit
11196			 * the queues per vmdq.  If this is still more than are
11197			 * available, the user will need to change the number of
11198			 * queues/vectors used by the PF later with the ethtool
11199			 * channels command
11200			 */
11201			if (vectors_left < vmdq_vecs_wanted) {
11202				pf->num_vmdq_qps = 1;
11203				vmdq_vecs_wanted = pf->num_vmdq_vsis;
11204				vmdq_vecs = min_t(int,
11205						  vectors_left,
11206						  vmdq_vecs_wanted);
11207			}
11208			pf->num_vmdq_msix = pf->num_vmdq_qps;
11209
11210			v_budget += vmdq_vecs;
11211			vectors_left -= vmdq_vecs;
11212		}
11213	}
11214
11215	/* On systems with a large number of SMP cores, we previously limited
11216	 * the number of vectors for num_lan_msix to be at most 50% of the
11217	 * available vectors, to allow for other features. Now, we add back
11218	 * the remaining vectors. However, we ensure that the total
11219	 * num_lan_msix will not exceed num_online_cpus(). To do this, we
11220	 * calculate the number of vectors we can add without going over the
11221	 * cap of CPUs. For systems with a small number of CPUs this will be
11222	 * zero.
11223	 */
11224	extra_vectors = min_t(int, cpus - pf->num_lan_msix, vectors_left);
11225	pf->num_lan_msix += extra_vectors;
11226	vectors_left -= extra_vectors;
11227
11228	WARN(vectors_left < 0,
11229	     "Calculation of remaining vectors underflowed. This is an accounting bug when determining total MSI-X vectors.\n");
11230
11231	v_budget += pf->num_lan_msix;
11232	pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
11233				   GFP_KERNEL);
11234	if (!pf->msix_entries)
11235		return -ENOMEM;
11236
11237	for (i = 0; i < v_budget; i++)
11238		pf->msix_entries[i].entry = i;
11239	v_actual = i40e_reserve_msix_vectors(pf, v_budget);
11240
11241	if (v_actual < I40E_MIN_MSIX) {
11242		pf->flags &= ~I40E_FLAG_MSIX_ENABLED;
11243		kfree(pf->msix_entries);
11244		pf->msix_entries = NULL;
11245		pci_disable_msix(pf->pdev);
11246		return -ENODEV;
11247
11248	} else if (v_actual == I40E_MIN_MSIX) {
11249		/* Adjust for minimal MSIX use */
11250		pf->num_vmdq_vsis = 0;
11251		pf->num_vmdq_qps = 0;
11252		pf->num_lan_qps = 1;
11253		pf->num_lan_msix = 1;
11254
11255	} else if (v_actual != v_budget) {
11256		/* If we have limited resources, we will start with no vectors
11257		 * for the special features and then allocate vectors to some
11258		 * of these features based on the policy and at the end disable
11259		 * the features that did not get any vectors.
11260		 */
11261		int vec;
11262
11263		dev_info(&pf->pdev->dev,
11264			 "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n",
11265			 v_actual, v_budget);
11266		/* reserve the misc vector */
11267		vec = v_actual - 1;
11268
11269		/* Scale vector usage down */
11270		pf->num_vmdq_msix = 1;    /* force VMDqs to only one vector */
11271		pf->num_vmdq_vsis = 1;
11272		pf->num_vmdq_qps = 1;
11273
11274		/* partition out the remaining vectors */
11275		switch (vec) {
11276		case 2:
11277			pf->num_lan_msix = 1;
11278			break;
11279		case 3:
11280			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
11281				pf->num_lan_msix = 1;
11282				pf->num_iwarp_msix = 1;
11283			} else {
11284				pf->num_lan_msix = 2;
11285			}
11286			break;
11287		default:
11288			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
11289				pf->num_iwarp_msix = min_t(int, (vec / 3),
11290						 iwarp_requested);
11291				pf->num_vmdq_vsis = min_t(int, (vec / 3),
11292						  I40E_DEFAULT_NUM_VMDQ_VSI);
11293			} else {
11294				pf->num_vmdq_vsis = min_t(int, (vec / 2),
11295						  I40E_DEFAULT_NUM_VMDQ_VSI);
11296			}
11297			if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
11298				pf->num_fdsb_msix = 1;
11299				vec--;
11300			}
11301			pf->num_lan_msix = min_t(int,
11302			       (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)),
11303							      pf->num_lan_msix);
11304			pf->num_lan_qps = pf->num_lan_msix;
11305			break;
11306		}
11307	}
11308
11309	if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
11310	    (pf->num_fdsb_msix == 0)) {
11311		dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n");
11312		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
11313		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
11314	}
11315	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
11316	    (pf->num_vmdq_msix == 0)) {
11317		dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
11318		pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
11319	}
11320
11321	if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
11322	    (pf->num_iwarp_msix == 0)) {
11323		dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
11324		pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
11325	}
11326	i40e_debug(&pf->hw, I40E_DEBUG_INIT,
11327		   "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n",
11328		   pf->num_lan_msix,
11329		   pf->num_vmdq_msix * pf->num_vmdq_vsis,
11330		   pf->num_fdsb_msix,
11331		   pf->num_iwarp_msix);
11332
11333	return v_actual;
11334}
11335
11336/**
11337 * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
11338 * @vsi: the VSI being configured
11339 * @v_idx: index of the vector in the vsi struct
11340 *
11341 * We allocate one q_vector.  If allocation fails we return -ENOMEM.
11342 **/
11343static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
11344{
11345	struct i40e_q_vector *q_vector;
11346
11347	/* allocate q_vector */
11348	q_vector = kzalloc(sizeof(struct i40e_q_vector), GFP_KERNEL);
11349	if (!q_vector)
11350		return -ENOMEM;
11351
11352	q_vector->vsi = vsi;
11353	q_vector->v_idx = v_idx;
11354	cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
11355
11356	if (vsi->netdev)
11357		netif_napi_add(vsi->netdev, &q_vector->napi,
11358			       i40e_napi_poll, NAPI_POLL_WEIGHT);
11359
11360	/* tie q_vector and vsi together */
11361	vsi->q_vectors[v_idx] = q_vector;
11362
11363	return 0;
11364}
11365
11366/**
11367 * i40e_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
11368 * @vsi: the VSI being configured
11369 *
11370 * We allocate one q_vector per queue interrupt.  If allocation fails we
11371 * return -ENOMEM.
11372 **/
11373static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
11374{
11375	struct i40e_pf *pf = vsi->back;
11376	int err, v_idx, num_q_vectors;
11377
11378	/* if not MSIX, give the one vector only to the LAN VSI */
11379	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
11380		num_q_vectors = vsi->num_q_vectors;
11381	else if (vsi == pf->vsi[pf->lan_vsi])
11382		num_q_vectors = 1;
11383	else
11384		return -EINVAL;
11385
11386	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
11387		err = i40e_vsi_alloc_q_vector(vsi, v_idx);
11388		if (err)
11389			goto err_out;
11390	}
11391
11392	return 0;
11393
11394err_out:
11395	while (v_idx--)
11396		i40e_free_q_vector(vsi, v_idx);
11397
11398	return err;
11399}
11400
11401/**
11402 * i40e_init_interrupt_scheme - Determine proper interrupt scheme
11403 * @pf: board private structure to initialize
11404 **/
11405static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
11406{
11407	int vectors = 0;
11408	ssize_t size;
11409
11410	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
11411		vectors = i40e_init_msix(pf);
11412		if (vectors < 0) {
11413			pf->flags &= ~(I40E_FLAG_MSIX_ENABLED	|
11414				       I40E_FLAG_IWARP_ENABLED	|
11415				       I40E_FLAG_RSS_ENABLED	|
11416				       I40E_FLAG_DCB_CAPABLE	|
11417				       I40E_FLAG_DCB_ENABLED	|
11418				       I40E_FLAG_SRIOV_ENABLED	|
11419				       I40E_FLAG_FD_SB_ENABLED	|
11420				       I40E_FLAG_FD_ATR_ENABLED	|
11421				       I40E_FLAG_VMDQ_ENABLED);
11422			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
11423
11424			/* rework the queue expectations without MSIX */
11425			i40e_determine_queue_usage(pf);
11426		}
11427	}
11428
11429	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED) &&
11430	    (pf->flags & I40E_FLAG_MSI_ENABLED)) {
11431		dev_info(&pf->pdev->dev, "MSI-X not available, trying MSI\n");
11432		vectors = pci_enable_msi(pf->pdev);
11433		if (vectors < 0) {
11434			dev_info(&pf->pdev->dev, "MSI init failed - %d\n",
11435				 vectors);
11436			pf->flags &= ~I40E_FLAG_MSI_ENABLED;
11437		}
11438		vectors = 1;  /* one MSI or Legacy vector */
11439	}
11440
11441	if (!(pf->flags & (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED)))
11442		dev_info(&pf->pdev->dev, "MSI-X and MSI not available, falling back to Legacy IRQ\n");
11443
11444	/* set up vector assignment tracking */
11445	size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * vectors);
11446	pf->irq_pile = kzalloc(size, GFP_KERNEL);
11447	if (!pf->irq_pile)
11448		return -ENOMEM;
11449
11450	pf->irq_pile->num_entries = vectors;
11451
11452	/* track first vector for misc interrupts, ignore return */
11453	(void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1);
11454
11455	return 0;
11456}
11457
11458/**
11459 * i40e_restore_interrupt_scheme - Restore the interrupt scheme
11460 * @pf: private board data structure
11461 *
11462 * Restore the interrupt scheme that was cleared when we suspended the
11463 * device. This should be called during resume to re-allocate the q_vectors
11464 * and reacquire IRQs.
11465 */
11466static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
11467{
11468	int err, i;
11469
11470	/* We cleared the MSI and MSI-X flags when disabling the old interrupt
11471	 * scheme. We need to re-enabled them here in order to attempt to
11472	 * re-acquire the MSI or MSI-X vectors
11473	 */
11474	pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
11475
11476	err = i40e_init_interrupt_scheme(pf);
11477	if (err)
11478		return err;
11479
11480	/* Now that we've re-acquired IRQs, we need to remap the vectors and
11481	 * rings together again.
11482	 */
11483	for (i = 0; i < pf->num_alloc_vsi; i++) {
11484		if (pf->vsi[i]) {
11485			err = i40e_vsi_alloc_q_vectors(pf->vsi[i]);
11486			if (err)
11487				goto err_unwind;
11488			i40e_vsi_map_rings_to_vectors(pf->vsi[i]);
11489		}
11490	}
11491
11492	err = i40e_setup_misc_vector(pf);
11493	if (err)
11494		goto err_unwind;
11495
11496	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
11497		i40e_client_update_msix_info(pf);
11498
11499	return 0;
11500
11501err_unwind:
11502	while (i--) {
11503		if (pf->vsi[i])
11504			i40e_vsi_free_q_vectors(pf->vsi[i]);
11505	}
11506
11507	return err;
11508}
11509
11510/**
11511 * i40e_setup_misc_vector_for_recovery_mode - Setup the misc vector to handle
11512 * non queue events in recovery mode
11513 * @pf: board private structure
11514 *
11515 * This sets up the handler for MSIX 0 or MSI/legacy, which is used to manage
11516 * the non-queue interrupts, e.g. AdminQ and errors in recovery mode.
11517 * This is handled differently than in recovery mode since no Tx/Rx resources
11518 * are being allocated.
11519 **/
11520static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf)
11521{
11522	int err;
11523
11524	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
11525		err = i40e_setup_misc_vector(pf);
11526
11527		if (err) {
11528			dev_info(&pf->pdev->dev,
11529				 "MSI-X misc vector request failed, error %d\n",
11530				 err);
11531			return err;
11532		}
11533	} else {
11534		u32 flags = pf->flags & I40E_FLAG_MSI_ENABLED ? 0 : IRQF_SHARED;
11535
11536		err = request_irq(pf->pdev->irq, i40e_intr, flags,
11537				  pf->int_name, pf);
11538
11539		if (err) {
11540			dev_info(&pf->pdev->dev,
11541				 "MSI/legacy misc vector request failed, error %d\n",
11542				 err);
11543			return err;
11544		}
11545		i40e_enable_misc_int_causes(pf);
11546		i40e_irq_dynamic_enable_icr0(pf);
11547	}
11548
11549	return 0;
11550}
11551
11552/**
11553 * i40e_setup_misc_vector - Setup the misc vector to handle non queue events
11554 * @pf: board private structure
11555 *
11556 * This sets up the handler for MSIX 0, which is used to manage the
11557 * non-queue interrupts, e.g. AdminQ and errors.  This is not used
11558 * when in MSI or Legacy interrupt mode.
11559 **/
11560static int i40e_setup_misc_vector(struct i40e_pf *pf)
11561{
11562	struct i40e_hw *hw = &pf->hw;
11563	int err = 0;
11564
11565	/* Only request the IRQ once, the first time through. */
11566	if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) {
11567		err = request_irq(pf->msix_entries[0].vector,
11568				  i40e_intr, 0, pf->int_name, pf);
11569		if (err) {
11570			clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
11571			dev_info(&pf->pdev->dev,
11572				 "request_irq for %s failed: %d\n",
11573				 pf->int_name, err);
11574			return -EFAULT;
11575		}
11576	}
11577
11578	i40e_enable_misc_int_causes(pf);
11579
11580	/* associate no queues to the misc vector */
11581	wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST);
11582	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K >> 1);
11583
11584	i40e_flush(hw);
11585
11586	i40e_irq_dynamic_enable_icr0(pf);
11587
11588	return err;
11589}
11590
11591/**
11592 * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands
11593 * @vsi: Pointer to vsi structure
11594 * @seed: Buffter to store the hash keys
11595 * @lut: Buffer to store the lookup table entries
11596 * @lut_size: Size of buffer to store the lookup table entries
11597 *
11598 * Return 0 on success, negative on failure
11599 */
11600static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
11601			   u8 *lut, u16 lut_size)
11602{
11603	struct i40e_pf *pf = vsi->back;
11604	struct i40e_hw *hw = &pf->hw;
11605	int ret = 0;
11606
11607	if (seed) {
11608		ret = i40e_aq_get_rss_key(hw, vsi->id,
11609			(struct i40e_aqc_get_set_rss_key_data *)seed);
11610		if (ret) {
11611			dev_info(&pf->pdev->dev,
11612				 "Cannot get RSS key, err %s aq_err %s\n",
11613				 i40e_stat_str(&pf->hw, ret),
11614				 i40e_aq_str(&pf->hw,
11615					     pf->hw.aq.asq_last_status));
11616			return ret;
11617		}
11618	}
11619
11620	if (lut) {
11621		bool pf_lut = vsi->type == I40E_VSI_MAIN;
11622
11623		ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
11624		if (ret) {
11625			dev_info(&pf->pdev->dev,
11626				 "Cannot get RSS lut, err %s aq_err %s\n",
11627				 i40e_stat_str(&pf->hw, ret),
11628				 i40e_aq_str(&pf->hw,
11629					     pf->hw.aq.asq_last_status));
11630			return ret;
11631		}
11632	}
11633
11634	return ret;
11635}
11636
11637/**
11638 * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
11639 * @vsi: Pointer to vsi structure
11640 * @seed: RSS hash seed
11641 * @lut: Lookup table
11642 * @lut_size: Lookup table size
11643 *
11644 * Returns 0 on success, negative on failure
11645 **/
11646static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed,
11647			       const u8 *lut, u16 lut_size)
11648{
11649	struct i40e_pf *pf = vsi->back;
11650	struct i40e_hw *hw = &pf->hw;
11651	u16 vf_id = vsi->vf_id;
11652	u8 i;
11653
11654	/* Fill out hash function seed */
11655	if (seed) {
11656		u32 *seed_dw = (u32 *)seed;
11657
11658		if (vsi->type == I40E_VSI_MAIN) {
11659			for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
11660				wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
11661		} else if (vsi->type == I40E_VSI_SRIOV) {
11662			for (i = 0; i <= I40E_VFQF_HKEY1_MAX_INDEX; i++)
11663				wr32(hw, I40E_VFQF_HKEY1(i, vf_id), seed_dw[i]);
11664		} else {
11665			dev_err(&pf->pdev->dev, "Cannot set RSS seed - invalid VSI type\n");
11666		}
11667	}
11668
11669	if (lut) {
11670		u32 *lut_dw = (u32 *)lut;
11671
11672		if (vsi->type == I40E_VSI_MAIN) {
11673			if (lut_size != I40E_HLUT_ARRAY_SIZE)
11674				return -EINVAL;
11675			for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
11676				wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]);
11677		} else if (vsi->type == I40E_VSI_SRIOV) {
11678			if (lut_size != I40E_VF_HLUT_ARRAY_SIZE)
11679				return -EINVAL;
11680			for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
11681				wr32(hw, I40E_VFQF_HLUT1(i, vf_id), lut_dw[i]);
11682		} else {
11683			dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
11684		}
11685	}
11686	i40e_flush(hw);
11687
11688	return 0;
11689}
11690
11691/**
11692 * i40e_get_rss_reg - Get the RSS keys and lut by reading registers
11693 * @vsi: Pointer to VSI structure
11694 * @seed: Buffer to store the keys
11695 * @lut: Buffer to store the lookup table entries
11696 * @lut_size: Size of buffer to store the lookup table entries
11697 *
11698 * Returns 0 on success, negative on failure
11699 */
11700static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed,
11701			    u8 *lut, u16 lut_size)
11702{
11703	struct i40e_pf *pf = vsi->back;
11704	struct i40e_hw *hw = &pf->hw;
11705	u16 i;
11706
11707	if (seed) {
11708		u32 *seed_dw = (u32 *)seed;
11709
11710		for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
11711			seed_dw[i] = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i));
11712	}
11713	if (lut) {
11714		u32 *lut_dw = (u32 *)lut;
11715
11716		if (lut_size != I40E_HLUT_ARRAY_SIZE)
11717			return -EINVAL;
11718		for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
11719			lut_dw[i] = rd32(hw, I40E_PFQF_HLUT(i));
11720	}
11721
11722	return 0;
11723}
11724
11725/**
11726 * i40e_config_rss - Configure RSS keys and lut
11727 * @vsi: Pointer to VSI structure
11728 * @seed: RSS hash seed
11729 * @lut: Lookup table
11730 * @lut_size: Lookup table size
11731 *
11732 * Returns 0 on success, negative on failure
11733 */
11734int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
11735{
11736	struct i40e_pf *pf = vsi->back;
11737
11738	if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
11739		return i40e_config_rss_aq(vsi, seed, lut, lut_size);
11740	else
11741		return i40e_config_rss_reg(vsi, seed, lut, lut_size);
11742}
11743
11744/**
11745 * i40e_get_rss - Get RSS keys and lut
11746 * @vsi: Pointer to VSI structure
11747 * @seed: Buffer to store the keys
11748 * @lut: Buffer to store the lookup table entries
11749 * @lut_size: Size of buffer to store the lookup table entries
11750 *
11751 * Returns 0 on success, negative on failure
11752 */
11753int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
11754{
11755	struct i40e_pf *pf = vsi->back;
11756
11757	if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
11758		return i40e_get_rss_aq(vsi, seed, lut, lut_size);
11759	else
11760		return i40e_get_rss_reg(vsi, seed, lut, lut_size);
11761}
11762
11763/**
11764 * i40e_fill_rss_lut - Fill the RSS lookup table with default values
11765 * @pf: Pointer to board private structure
11766 * @lut: Lookup table
11767 * @rss_table_size: Lookup table size
11768 * @rss_size: Range of queue number for hashing
11769 */
11770void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
11771		       u16 rss_table_size, u16 rss_size)
11772{
11773	u16 i;
11774
11775	for (i = 0; i < rss_table_size; i++)
11776		lut[i] = i % rss_size;
11777}
11778
11779/**
11780 * i40e_pf_config_rss - Prepare for RSS if used
11781 * @pf: board private structure
11782 **/
11783static int i40e_pf_config_rss(struct i40e_pf *pf)
11784{
11785	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
11786	u8 seed[I40E_HKEY_ARRAY_SIZE];
11787	u8 *lut;
11788	struct i40e_hw *hw = &pf->hw;
11789	u32 reg_val;
11790	u64 hena;
11791	int ret;
11792
11793	/* By default we enable TCP/UDP with IPv4/IPv6 ptypes */
11794	hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) |
11795		((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32);
11796	hena |= i40e_pf_get_default_rss_hena(pf);
11797
11798	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena);
11799	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
11800
11801	/* Determine the RSS table size based on the hardware capabilities */
11802	reg_val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0);
11803	reg_val = (pf->rss_table_size == 512) ?
11804			(reg_val | I40E_PFQF_CTL_0_HASHLUTSIZE_512) :
11805			(reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512);
11806	i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val);
11807
11808	/* Determine the RSS size of the VSI */
11809	if (!vsi->rss_size) {
11810		u16 qcount;
11811		/* If the firmware does something weird during VSI init, we
11812		 * could end up with zero TCs. Check for that to avoid
11813		 * divide-by-zero. It probably won't pass traffic, but it also
11814		 * won't panic.
11815		 */
11816		qcount = vsi->num_queue_pairs /
11817			 (vsi->tc_config.numtc ? vsi->tc_config.numtc : 1);
11818		vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
11819	}
11820	if (!vsi->rss_size)
11821		return -EINVAL;
11822
11823	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
11824	if (!lut)
11825		return -ENOMEM;
11826
11827	/* Use user configured lut if there is one, otherwise use default */
11828	if (vsi->rss_lut_user)
11829		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
11830	else
11831		i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
11832
11833	/* Use user configured hash key if there is one, otherwise
11834	 * use default.
11835	 */
11836	if (vsi->rss_hkey_user)
11837		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
11838	else
11839		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
11840	ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
11841	kfree(lut);
11842
11843	return ret;
11844}
11845
11846/**
11847 * i40e_reconfig_rss_queues - change number of queues for rss and rebuild
11848 * @pf: board private structure
11849 * @queue_count: the requested queue count for rss.
11850 *
11851 * returns 0 if rss is not enabled, if enabled returns the final rss queue
11852 * count which may be different from the requested queue count.
11853 * Note: expects to be called while under rtnl_lock()
11854 **/
11855int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
11856{
11857	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
11858	int new_rss_size;
11859
11860	if (!(pf->flags & I40E_FLAG_RSS_ENABLED))
11861		return 0;
11862
11863	queue_count = min_t(int, queue_count, num_online_cpus());
11864	new_rss_size = min_t(int, queue_count, pf->rss_size_max);
11865
11866	if (queue_count != vsi->num_queue_pairs) {
11867		u16 qcount;
11868
11869		vsi->req_queue_pairs = queue_count;
11870		i40e_prep_for_reset(pf, true);
11871
11872		pf->alloc_rss_size = new_rss_size;
11873
11874		i40e_reset_and_rebuild(pf, true, true);
11875
11876		/* Discard the user configured hash keys and lut, if less
11877		 * queues are enabled.
11878		 */
11879		if (queue_count < vsi->rss_size) {
11880			i40e_clear_rss_config_user(vsi);
11881			dev_dbg(&pf->pdev->dev,
11882				"discard user configured hash keys and lut\n");
11883		}
11884
11885		/* Reset vsi->rss_size, as number of enabled queues changed */
11886		qcount = vsi->num_queue_pairs / vsi->tc_config.numtc;
11887		vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
11888
11889		i40e_pf_config_rss(pf);
11890	}
11891	dev_info(&pf->pdev->dev, "User requested queue count/HW max RSS count:  %d/%d\n",
11892		 vsi->req_queue_pairs, pf->rss_size_max);
11893	return pf->alloc_rss_size;
11894}
11895
11896/**
11897 * i40e_get_partition_bw_setting - Retrieve BW settings for this PF partition
11898 * @pf: board private structure
11899 **/
11900i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf)
11901{
11902	i40e_status status;
11903	bool min_valid, max_valid;
11904	u32 max_bw, min_bw;
11905
11906	status = i40e_read_bw_from_alt_ram(&pf->hw, &max_bw, &min_bw,
11907					   &min_valid, &max_valid);
11908
11909	if (!status) {
11910		if (min_valid)
11911			pf->min_bw = min_bw;
11912		if (max_valid)
11913			pf->max_bw = max_bw;
11914	}
11915
11916	return status;
11917}
11918
11919/**
11920 * i40e_set_partition_bw_setting - Set BW settings for this PF partition
11921 * @pf: board private structure
11922 **/
11923i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf)
11924{
11925	struct i40e_aqc_configure_partition_bw_data bw_data;
11926	i40e_status status;
11927
11928	memset(&bw_data, 0, sizeof(bw_data));
11929
11930	/* Set the valid bit for this PF */
11931	bw_data.pf_valid_bits = cpu_to_le16(BIT(pf->hw.pf_id));
11932	bw_data.max_bw[pf->hw.pf_id] = pf->max_bw & I40E_ALT_BW_VALUE_MASK;
11933	bw_data.min_bw[pf->hw.pf_id] = pf->min_bw & I40E_ALT_BW_VALUE_MASK;
11934
11935	/* Set the new bandwidths */
11936	status = i40e_aq_configure_partition_bw(&pf->hw, &bw_data, NULL);
11937
11938	return status;
11939}
11940
11941/**
11942 * i40e_commit_partition_bw_setting - Commit BW settings for this PF partition
11943 * @pf: board private structure
11944 **/
11945i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf)
11946{
11947	/* Commit temporary BW setting to permanent NVM image */
11948	enum i40e_admin_queue_err last_aq_status;
11949	i40e_status ret;
11950	u16 nvm_word;
11951
11952	if (pf->hw.partition_id != 1) {
11953		dev_info(&pf->pdev->dev,
11954			 "Commit BW only works on partition 1! This is partition %d",
11955			 pf->hw.partition_id);
11956		ret = I40E_NOT_SUPPORTED;
11957		goto bw_commit_out;
11958	}
11959
11960	/* Acquire NVM for read access */
11961	ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_READ);
11962	last_aq_status = pf->hw.aq.asq_last_status;
11963	if (ret) {
11964		dev_info(&pf->pdev->dev,
11965			 "Cannot acquire NVM for read access, err %s aq_err %s\n",
11966			 i40e_stat_str(&pf->hw, ret),
11967			 i40e_aq_str(&pf->hw, last_aq_status));
11968		goto bw_commit_out;
11969	}
11970
11971	/* Read word 0x10 of NVM - SW compatibility word 1 */
11972	ret = i40e_aq_read_nvm(&pf->hw,
11973			       I40E_SR_NVM_CONTROL_WORD,
11974			       0x10, sizeof(nvm_word), &nvm_word,
11975			       false, NULL);
11976	/* Save off last admin queue command status before releasing
11977	 * the NVM
11978	 */
11979	last_aq_status = pf->hw.aq.asq_last_status;
11980	i40e_release_nvm(&pf->hw);
11981	if (ret) {
11982		dev_info(&pf->pdev->dev, "NVM read error, err %s aq_err %s\n",
11983			 i40e_stat_str(&pf->hw, ret),
11984			 i40e_aq_str(&pf->hw, last_aq_status));
11985		goto bw_commit_out;
11986	}
11987
11988	/* Wait a bit for NVM release to complete */
11989	msleep(50);
11990
11991	/* Acquire NVM for write access */
11992	ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_WRITE);
11993	last_aq_status = pf->hw.aq.asq_last_status;
11994	if (ret) {
11995		dev_info(&pf->pdev->dev,
11996			 "Cannot acquire NVM for write access, err %s aq_err %s\n",
11997			 i40e_stat_str(&pf->hw, ret),
11998			 i40e_aq_str(&pf->hw, last_aq_status));
11999		goto bw_commit_out;
12000	}
12001	/* Write it back out unchanged to initiate update NVM,
12002	 * which will force a write of the shadow (alt) RAM to
12003	 * the NVM - thus storing the bandwidth values permanently.
12004	 */
12005	ret = i40e_aq_update_nvm(&pf->hw,
12006				 I40E_SR_NVM_CONTROL_WORD,
12007				 0x10, sizeof(nvm_word),
12008				 &nvm_word, true, 0, NULL);
12009	/* Save off last admin queue command status before releasing
12010	 * the NVM
12011	 */
12012	last_aq_status = pf->hw.aq.asq_last_status;
12013	i40e_release_nvm(&pf->hw);
12014	if (ret)
12015		dev_info(&pf->pdev->dev,
12016			 "BW settings NOT SAVED, err %s aq_err %s\n",
12017			 i40e_stat_str(&pf->hw, ret),
12018			 i40e_aq_str(&pf->hw, last_aq_status));
12019bw_commit_out:
12020
12021	return ret;
12022}
12023
12024/**
12025 * i40e_is_total_port_shutdown_enabled - read NVM and return value
12026 * if total port shutdown feature is enabled for this PF
12027 * @pf: board private structure
12028 **/
12029static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf)
12030{
12031#define I40E_TOTAL_PORT_SHUTDOWN_ENABLED	BIT(4)
12032#define I40E_FEATURES_ENABLE_PTR		0x2A
12033#define I40E_CURRENT_SETTING_PTR		0x2B
12034#define I40E_LINK_BEHAVIOR_WORD_OFFSET		0x2D
12035#define I40E_LINK_BEHAVIOR_WORD_LENGTH		0x1
12036#define I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED	BIT(0)
12037#define I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH	4
12038	i40e_status read_status = I40E_SUCCESS;
12039	u16 sr_emp_sr_settings_ptr = 0;
12040	u16 features_enable = 0;
12041	u16 link_behavior = 0;
12042	bool ret = false;
12043
12044	read_status = i40e_read_nvm_word(&pf->hw,
12045					 I40E_SR_EMP_SR_SETTINGS_PTR,
12046					 &sr_emp_sr_settings_ptr);
12047	if (read_status)
12048		goto err_nvm;
12049	read_status = i40e_read_nvm_word(&pf->hw,
12050					 sr_emp_sr_settings_ptr +
12051					 I40E_FEATURES_ENABLE_PTR,
12052					 &features_enable);
12053	if (read_status)
12054		goto err_nvm;
12055	if (I40E_TOTAL_PORT_SHUTDOWN_ENABLED & features_enable) {
12056		read_status = i40e_read_nvm_module_data(&pf->hw,
12057							I40E_SR_EMP_SR_SETTINGS_PTR,
12058							I40E_CURRENT_SETTING_PTR,
12059							I40E_LINK_BEHAVIOR_WORD_OFFSET,
12060							I40E_LINK_BEHAVIOR_WORD_LENGTH,
12061							&link_behavior);
12062		if (read_status)
12063			goto err_nvm;
12064		link_behavior >>= (pf->hw.port * I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH);
12065		ret = I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED & link_behavior;
12066	}
12067	return ret;
12068
12069err_nvm:
12070	dev_warn(&pf->pdev->dev,
12071		 "total-port-shutdown feature is off due to read nvm error: %s\n",
12072		 i40e_stat_str(&pf->hw, read_status));
12073	return ret;
12074}
12075
12076/**
12077 * i40e_sw_init - Initialize general software structures (struct i40e_pf)
12078 * @pf: board private structure to initialize
12079 *
12080 * i40e_sw_init initializes the Adapter private data structure.
12081 * Fields are initialized based on PCI device information and
12082 * OS network device settings (MTU size).
12083 **/
12084static int i40e_sw_init(struct i40e_pf *pf)
12085{
12086	int err = 0;
12087	int size;
12088	u16 pow;
12089
12090	/* Set default capability flags */
12091	pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
12092		    I40E_FLAG_MSI_ENABLED     |
12093		    I40E_FLAG_MSIX_ENABLED;
12094
12095	/* Set default ITR */
12096	pf->rx_itr_default = I40E_ITR_RX_DEF;
12097	pf->tx_itr_default = I40E_ITR_TX_DEF;
12098
12099	/* Depending on PF configurations, it is possible that the RSS
12100	 * maximum might end up larger than the available queues
12101	 */
12102	pf->rss_size_max = BIT(pf->hw.func_caps.rss_table_entry_width);
12103	pf->alloc_rss_size = 1;
12104	pf->rss_table_size = pf->hw.func_caps.rss_table_size;
12105	pf->rss_size_max = min_t(int, pf->rss_size_max,
12106				 pf->hw.func_caps.num_tx_qp);
12107
12108	/* find the next higher power-of-2 of num cpus */
12109	pow = roundup_pow_of_two(num_online_cpus());
12110	pf->rss_size_max = min_t(int, pf->rss_size_max, pow);
12111
12112	if (pf->hw.func_caps.rss) {
12113		pf->flags |= I40E_FLAG_RSS_ENABLED;
12114		pf->alloc_rss_size = min_t(int, pf->rss_size_max,
12115					   num_online_cpus());
12116	}
12117
12118	/* MFP mode enabled */
12119	if (pf->hw.func_caps.npar_enable || pf->hw.func_caps.flex10_enable) {
12120		pf->flags |= I40E_FLAG_MFP_ENABLED;
12121		dev_info(&pf->pdev->dev, "MFP mode Enabled\n");
12122		if (i40e_get_partition_bw_setting(pf)) {
12123			dev_warn(&pf->pdev->dev,
12124				 "Could not get partition bw settings\n");
12125		} else {
12126			dev_info(&pf->pdev->dev,
12127				 "Partition BW Min = %8.8x, Max = %8.8x\n",
12128				 pf->min_bw, pf->max_bw);
12129
12130			/* nudge the Tx scheduler */
12131			i40e_set_partition_bw_setting(pf);
12132		}
12133	}
12134
12135	if ((pf->hw.func_caps.fd_filters_guaranteed > 0) ||
12136	    (pf->hw.func_caps.fd_filters_best_effort > 0)) {
12137		pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
12138		pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE;
12139		if (pf->flags & I40E_FLAG_MFP_ENABLED &&
12140		    pf->hw.num_partitions > 1)
12141			dev_info(&pf->pdev->dev,
12142				 "Flow Director Sideband mode Disabled in MFP mode\n");
12143		else
12144			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
12145		pf->fdir_pf_filter_count =
12146				 pf->hw.func_caps.fd_filters_guaranteed;
12147		pf->hw.fdir_shared_filter_count =
12148				 pf->hw.func_caps.fd_filters_best_effort;
12149	}
12150
12151	if (pf->hw.mac.type == I40E_MAC_X722) {
12152		pf->hw_features |= (I40E_HW_RSS_AQ_CAPABLE |
12153				    I40E_HW_128_QP_RSS_CAPABLE |
12154				    I40E_HW_ATR_EVICT_CAPABLE |
12155				    I40E_HW_WB_ON_ITR_CAPABLE |
12156				    I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE |
12157				    I40E_HW_NO_PCI_LINK_CHECK |
12158				    I40E_HW_USE_SET_LLDP_MIB |
12159				    I40E_HW_GENEVE_OFFLOAD_CAPABLE |
12160				    I40E_HW_PTP_L4_CAPABLE |
12161				    I40E_HW_WOL_MC_MAGIC_PKT_WAKE |
12162				    I40E_HW_OUTER_UDP_CSUM_CAPABLE);
12163
12164#define I40E_FDEVICT_PCTYPE_DEFAULT 0xc03
12165		if (rd32(&pf->hw, I40E_GLQF_FDEVICTENA(1)) !=
12166		    I40E_FDEVICT_PCTYPE_DEFAULT) {
12167			dev_warn(&pf->pdev->dev,
12168				 "FD EVICT PCTYPES are not right, disable FD HW EVICT\n");
12169			pf->hw_features &= ~I40E_HW_ATR_EVICT_CAPABLE;
12170		}
12171	} else if ((pf->hw.aq.api_maj_ver > 1) ||
12172		   ((pf->hw.aq.api_maj_ver == 1) &&
12173		    (pf->hw.aq.api_min_ver > 4))) {
12174		/* Supported in FW API version higher than 1.4 */
12175		pf->hw_features |= I40E_HW_GENEVE_OFFLOAD_CAPABLE;
12176	}
12177
12178	/* Enable HW ATR eviction if possible */
12179	if (pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)
12180		pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
12181
12182	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
12183	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
12184	    (pf->hw.aq.fw_maj_ver < 4))) {
12185		pf->hw_features |= I40E_HW_RESTART_AUTONEG;
12186		/* No DCB support  for FW < v4.33 */
12187		pf->hw_features |= I40E_HW_NO_DCB_SUPPORT;
12188	}
12189
12190	/* Disable FW LLDP if FW < v4.3 */
12191	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
12192	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) ||
12193	    (pf->hw.aq.fw_maj_ver < 4)))
12194		pf->hw_features |= I40E_HW_STOP_FW_LLDP;
12195
12196	/* Use the FW Set LLDP MIB API if FW > v4.40 */
12197	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
12198	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) ||
12199	    (pf->hw.aq.fw_maj_ver >= 5)))
12200		pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
12201
12202	/* Enable PTP L4 if FW > v6.0 */
12203	if (pf->hw.mac.type == I40E_MAC_XL710 &&
12204	    pf->hw.aq.fw_maj_ver >= 6)
12205		pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
12206
12207	if (pf->hw.func_caps.vmdq && num_online_cpus() != 1) {
12208		pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
12209		pf->flags |= I40E_FLAG_VMDQ_ENABLED;
12210		pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
12211	}
12212
12213	if (pf->hw.func_caps.iwarp && num_online_cpus() != 1) {
12214		pf->flags |= I40E_FLAG_IWARP_ENABLED;
12215		/* IWARP needs one extra vector for CQP just like MISC.*/
12216		pf->num_iwarp_msix = (int)num_online_cpus() + 1;
12217	}
12218	/* Stopping FW LLDP engine is supported on XL710 and X722
12219	 * starting from FW versions determined in i40e_init_adminq.
12220	 * Stopping the FW LLDP engine is not supported on XL710
12221	 * if NPAR is functioning so unset this hw flag in this case.
12222	 */
12223	if (pf->hw.mac.type == I40E_MAC_XL710 &&
12224	    pf->hw.func_caps.npar_enable &&
12225	    (pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
12226		pf->hw.flags &= ~I40E_HW_FLAG_FW_LLDP_STOPPABLE;
12227
12228#ifdef CONFIG_PCI_IOV
12229	if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
12230		pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
12231		pf->flags |= I40E_FLAG_SRIOV_ENABLED;
12232		pf->num_req_vfs = min_t(int,
12233					pf->hw.func_caps.num_vfs,
12234					I40E_MAX_VF_COUNT);
12235	}
12236#endif /* CONFIG_PCI_IOV */
12237	pf->eeprom_version = 0xDEAD;
12238	pf->lan_veb = I40E_NO_VEB;
12239	pf->lan_vsi = I40E_NO_VSI;
12240
12241	/* By default FW has this off for performance reasons */
12242	pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
12243
12244	/* set up queue assignment tracking */
12245	size = sizeof(struct i40e_lump_tracking)
12246		+ (sizeof(u16) * pf->hw.func_caps.num_tx_qp);
12247	pf->qp_pile = kzalloc(size, GFP_KERNEL);
12248	if (!pf->qp_pile) {
12249		err = -ENOMEM;
12250		goto sw_init_done;
12251	}
12252	pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp;
12253
12254	pf->tx_timeout_recovery_level = 1;
12255
12256	if (pf->hw.mac.type != I40E_MAC_X722 &&
12257	    i40e_is_total_port_shutdown_enabled(pf)) {
12258		/* Link down on close must be on when total port shutdown
12259		 * is enabled for a given port
12260		 */
12261		pf->flags |= (I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED |
12262			      I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED);
12263		dev_info(&pf->pdev->dev,
12264			 "total-port-shutdown was enabled, link-down-on-close is forced on\n");
12265	}
12266	mutex_init(&pf->switch_mutex);
12267
12268sw_init_done:
12269	return err;
12270}
12271
12272/**
12273 * i40e_set_ntuple - set the ntuple feature flag and take action
12274 * @pf: board private structure to initialize
12275 * @features: the feature set that the stack is suggesting
12276 *
12277 * returns a bool to indicate if reset needs to happen
12278 **/
12279bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
12280{
12281	bool need_reset = false;
12282
12283	/* Check if Flow Director n-tuple support was enabled or disabled.  If
12284	 * the state changed, we need to reset.
12285	 */
12286	if (features & NETIF_F_NTUPLE) {
12287		/* Enable filters and mark for reset */
12288		if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
12289			need_reset = true;
12290		/* enable FD_SB only if there is MSI-X vector and no cloud
12291		 * filters exist
12292		 */
12293		if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) {
12294			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
12295			pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
12296		}
12297	} else {
12298		/* turn off filters, mark for reset and clear SW filter list */
12299		if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
12300			need_reset = true;
12301			i40e_fdir_filter_exit(pf);
12302		}
12303		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
12304		clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state);
12305		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
12306
12307		/* reset fd counters */
12308		pf->fd_add_err = 0;
12309		pf->fd_atr_cnt = 0;
12310		/* if ATR was auto disabled it can be re-enabled. */
12311		if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
12312			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
12313			    (I40E_DEBUG_FD & pf->hw.debug_mask))
12314				dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
12315	}
12316	return need_reset;
12317}
12318
12319/**
12320 * i40e_clear_rss_lut - clear the rx hash lookup table
12321 * @vsi: the VSI being configured
12322 **/
12323static void i40e_clear_rss_lut(struct i40e_vsi *vsi)
12324{
12325	struct i40e_pf *pf = vsi->back;
12326	struct i40e_hw *hw = &pf->hw;
12327	u16 vf_id = vsi->vf_id;
12328	u8 i;
12329
12330	if (vsi->type == I40E_VSI_MAIN) {
12331		for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
12332			wr32(hw, I40E_PFQF_HLUT(i), 0);
12333	} else if (vsi->type == I40E_VSI_SRIOV) {
12334		for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
12335			i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, vf_id), 0);
12336	} else {
12337		dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
12338	}
12339}
12340
12341/**
12342 * i40e_set_features - set the netdev feature flags
12343 * @netdev: ptr to the netdev being adjusted
12344 * @features: the feature set that the stack is suggesting
12345 * Note: expects to be called while under rtnl_lock()
12346 **/
12347static int i40e_set_features(struct net_device *netdev,
12348			     netdev_features_t features)
12349{
12350	struct i40e_netdev_priv *np = netdev_priv(netdev);
12351	struct i40e_vsi *vsi = np->vsi;
12352	struct i40e_pf *pf = vsi->back;
12353	bool need_reset;
12354
12355	if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH))
12356		i40e_pf_config_rss(pf);
12357	else if (!(features & NETIF_F_RXHASH) &&
12358		 netdev->features & NETIF_F_RXHASH)
12359		i40e_clear_rss_lut(vsi);
12360
12361	if (features & NETIF_F_HW_VLAN_CTAG_RX)
12362		i40e_vlan_stripping_enable(vsi);
12363	else
12364		i40e_vlan_stripping_disable(vsi);
12365
12366	if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
12367		dev_err(&pf->pdev->dev,
12368			"Offloaded tc filters active, can't turn hw_tc_offload off");
12369		return -EINVAL;
12370	}
12371
12372	if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt)
12373		i40e_del_all_macvlans(vsi);
12374
12375	need_reset = i40e_set_ntuple(pf, features);
12376
12377	if (need_reset)
12378		i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
12379
12380	return 0;
12381}
12382
12383static int i40e_udp_tunnel_set_port(struct net_device *netdev,
12384				    unsigned int table, unsigned int idx,
12385				    struct udp_tunnel_info *ti)
12386{
12387	struct i40e_netdev_priv *np = netdev_priv(netdev);
12388	struct i40e_hw *hw = &np->vsi->back->hw;
12389	u8 type, filter_index;
12390	i40e_status ret;
12391
12392	type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN :
12393						   I40E_AQC_TUNNEL_TYPE_NGE;
12394
12395	ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index,
12396				     NULL);
12397	if (ret) {
12398		netdev_info(netdev, "add UDP port failed, err %s aq_err %s\n",
12399			    i40e_stat_str(hw, ret),
12400			    i40e_aq_str(hw, hw->aq.asq_last_status));
12401		return -EIO;
12402	}
12403
12404	udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index);
12405	return 0;
12406}
12407
12408static int i40e_udp_tunnel_unset_port(struct net_device *netdev,
12409				      unsigned int table, unsigned int idx,
12410				      struct udp_tunnel_info *ti)
12411{
12412	struct i40e_netdev_priv *np = netdev_priv(netdev);
12413	struct i40e_hw *hw = &np->vsi->back->hw;
12414	i40e_status ret;
12415
12416	ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL);
12417	if (ret) {
12418		netdev_info(netdev, "delete UDP port failed, err %s aq_err %s\n",
12419			    i40e_stat_str(hw, ret),
12420			    i40e_aq_str(hw, hw->aq.asq_last_status));
12421		return -EIO;
12422	}
12423
12424	return 0;
12425}
12426
12427static int i40e_get_phys_port_id(struct net_device *netdev,
12428				 struct netdev_phys_item_id *ppid)
12429{
12430	struct i40e_netdev_priv *np = netdev_priv(netdev);
12431	struct i40e_pf *pf = np->vsi->back;
12432	struct i40e_hw *hw = &pf->hw;
12433
12434	if (!(pf->hw_features & I40E_HW_PORT_ID_VALID))
12435		return -EOPNOTSUPP;
12436
12437	ppid->id_len = min_t(int, sizeof(hw->mac.port_addr), sizeof(ppid->id));
12438	memcpy(ppid->id, hw->mac.port_addr, ppid->id_len);
12439
12440	return 0;
12441}
12442
12443/**
12444 * i40e_ndo_fdb_add - add an entry to the hardware database
12445 * @ndm: the input from the stack
12446 * @tb: pointer to array of nladdr (unused)
12447 * @dev: the net device pointer
12448 * @addr: the MAC address entry being added
12449 * @vid: VLAN ID
12450 * @flags: instructions from stack about fdb operation
12451 * @extack: netlink extended ack, unused currently
12452 */
12453static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
12454			    struct net_device *dev,
12455			    const unsigned char *addr, u16 vid,
12456			    u16 flags,
12457			    struct netlink_ext_ack *extack)
12458{
12459	struct i40e_netdev_priv *np = netdev_priv(dev);
12460	struct i40e_pf *pf = np->vsi->back;
12461	int err = 0;
12462
12463	if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED))
12464		return -EOPNOTSUPP;
12465
12466	if (vid) {
12467		pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
12468		return -EINVAL;
12469	}
12470
12471	/* Hardware does not support aging addresses so if a
12472	 * ndm_state is given only allow permanent addresses
12473	 */
12474	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
12475		netdev_info(dev, "FDB only supports static addresses\n");
12476		return -EINVAL;
12477	}
12478
12479	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
12480		err = dev_uc_add_excl(dev, addr);
12481	else if (is_multicast_ether_addr(addr))
12482		err = dev_mc_add_excl(dev, addr);
12483	else
12484		err = -EINVAL;
12485
12486	/* Only return duplicate errors if NLM_F_EXCL is set */
12487	if (err == -EEXIST && !(flags & NLM_F_EXCL))
12488		err = 0;
12489
12490	return err;
12491}
12492
12493/**
12494 * i40e_ndo_bridge_setlink - Set the hardware bridge mode
12495 * @dev: the netdev being configured
12496 * @nlh: RTNL message
12497 * @flags: bridge flags
12498 * @extack: netlink extended ack
12499 *
12500 * Inserts a new hardware bridge if not already created and
12501 * enables the bridging mode requested (VEB or VEPA). If the
12502 * hardware bridge has already been inserted and the request
12503 * is to change the mode then that requires a PF reset to
12504 * allow rebuild of the components with required hardware
12505 * bridge mode enabled.
12506 *
12507 * Note: expects to be called while under rtnl_lock()
12508 **/
12509static int i40e_ndo_bridge_setlink(struct net_device *dev,
12510				   struct nlmsghdr *nlh,
12511				   u16 flags,
12512				   struct netlink_ext_ack *extack)
12513{
12514	struct i40e_netdev_priv *np = netdev_priv(dev);
12515	struct i40e_vsi *vsi = np->vsi;
12516	struct i40e_pf *pf = vsi->back;
12517	struct i40e_veb *veb = NULL;
12518	struct nlattr *attr, *br_spec;
12519	int i, rem;
12520
12521	/* Only for PF VSI for now */
12522	if (vsi->seid != pf->vsi[pf->lan_vsi]->seid)
12523		return -EOPNOTSUPP;
12524
12525	/* Find the HW bridge for PF VSI */
12526	for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
12527		if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
12528			veb = pf->veb[i];
12529	}
12530
12531	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
12532	if (!br_spec)
12533		return -EINVAL;
12534
12535	nla_for_each_nested(attr, br_spec, rem) {
12536		__u16 mode;
12537
12538		if (nla_type(attr) != IFLA_BRIDGE_MODE)
12539			continue;
12540
12541		mode = nla_get_u16(attr);
12542		if ((mode != BRIDGE_MODE_VEPA) &&
12543		    (mode != BRIDGE_MODE_VEB))
12544			return -EINVAL;
12545
12546		/* Insert a new HW bridge */
12547		if (!veb) {
12548			veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid,
12549					     vsi->tc_config.enabled_tc);
12550			if (veb) {
12551				veb->bridge_mode = mode;
12552				i40e_config_bridge_mode(veb);
12553			} else {
12554				/* No Bridge HW offload available */
12555				return -ENOENT;
12556			}
12557			break;
12558		} else if (mode != veb->bridge_mode) {
12559			/* Existing HW bridge but different mode needs reset */
12560			veb->bridge_mode = mode;
12561			/* TODO: If no VFs or VMDq VSIs, disallow VEB mode */
12562			if (mode == BRIDGE_MODE_VEB)
12563				pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
12564			else
12565				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
12566			i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
12567			break;
12568		}
12569	}
12570
12571	return 0;
12572}
12573
12574/**
12575 * i40e_ndo_bridge_getlink - Get the hardware bridge mode
12576 * @skb: skb buff
12577 * @pid: process id
12578 * @seq: RTNL message seq #
12579 * @dev: the netdev being configured
12580 * @filter_mask: unused
12581 * @nlflags: netlink flags passed in
12582 *
12583 * Return the mode in which the hardware bridge is operating in
12584 * i.e VEB or VEPA.
12585 **/
12586static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
12587				   struct net_device *dev,
12588				   u32 __always_unused filter_mask,
12589				   int nlflags)
12590{
12591	struct i40e_netdev_priv *np = netdev_priv(dev);
12592	struct i40e_vsi *vsi = np->vsi;
12593	struct i40e_pf *pf = vsi->back;
12594	struct i40e_veb *veb = NULL;
12595	int i;
12596
12597	/* Only for PF VSI for now */
12598	if (vsi->seid != pf->vsi[pf->lan_vsi]->seid)
12599		return -EOPNOTSUPP;
12600
12601	/* Find the HW bridge for the PF VSI */
12602	for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
12603		if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
12604			veb = pf->veb[i];
12605	}
12606
12607	if (!veb)
12608		return 0;
12609
12610	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode,
12611				       0, 0, nlflags, filter_mask, NULL);
12612}
12613
12614/**
12615 * i40e_features_check - Validate encapsulated packet conforms to limits
12616 * @skb: skb buff
12617 * @dev: This physical port's netdev
12618 * @features: Offload features that the stack believes apply
12619 **/
12620static netdev_features_t i40e_features_check(struct sk_buff *skb,
12621					     struct net_device *dev,
12622					     netdev_features_t features)
12623{
12624	size_t len;
12625
12626	/* No point in doing any of this if neither checksum nor GSO are
12627	 * being requested for this frame.  We can rule out both by just
12628	 * checking for CHECKSUM_PARTIAL
12629	 */
12630	if (skb->ip_summed != CHECKSUM_PARTIAL)
12631		return features;
12632
12633	/* We cannot support GSO if the MSS is going to be less than
12634	 * 64 bytes.  If it is then we need to drop support for GSO.
12635	 */
12636	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
12637		features &= ~NETIF_F_GSO_MASK;
12638
12639	/* MACLEN can support at most 63 words */
12640	len = skb_network_header(skb) - skb->data;
12641	if (len & ~(63 * 2))
12642		goto out_err;
12643
12644	/* IPLEN and EIPLEN can support at most 127 dwords */
12645	len = skb_transport_header(skb) - skb_network_header(skb);
12646	if (len & ~(127 * 4))
12647		goto out_err;
12648
12649	if (skb->encapsulation) {
12650		/* L4TUNLEN can support 127 words */
12651		len = skb_inner_network_header(skb) - skb_transport_header(skb);
12652		if (len & ~(127 * 2))
12653			goto out_err;
12654
12655		/* IPLEN can support at most 127 dwords */
12656		len = skb_inner_transport_header(skb) -
12657		      skb_inner_network_header(skb);
12658		if (len & ~(127 * 4))
12659			goto out_err;
12660	}
12661
12662	/* No need to validate L4LEN as TCP is the only protocol with a
12663	 * a flexible value and we support all possible values supported
12664	 * by TCP, which is at most 15 dwords
12665	 */
12666
12667	return features;
12668out_err:
12669	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
12670}
12671
12672/**
12673 * i40e_xdp_setup - add/remove an XDP program
12674 * @vsi: VSI to changed
12675 * @prog: XDP program
12676 **/
12677static int i40e_xdp_setup(struct i40e_vsi *vsi,
12678			  struct bpf_prog *prog)
12679{
12680	int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
12681	struct i40e_pf *pf = vsi->back;
12682	struct bpf_prog *old_prog;
12683	bool need_reset;
12684	int i;
12685
12686	/* Don't allow frames that span over multiple buffers */
12687	if (frame_size > vsi->rx_buf_len)
12688		return -EINVAL;
12689
12690	if (!i40e_enabled_xdp_vsi(vsi) && !prog)
12691		return 0;
12692
12693	/* When turning XDP on->off/off->on we reset and rebuild the rings. */
12694	need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
12695
12696	if (need_reset)
12697		i40e_prep_for_reset(pf, true);
12698
12699	old_prog = xchg(&vsi->xdp_prog, prog);
12700
12701	if (need_reset) {
12702		if (!prog)
12703			/* Wait until ndo_xsk_wakeup completes. */
12704			synchronize_rcu();
12705		i40e_reset_and_rebuild(pf, true, true);
12706	}
12707
12708	if (!i40e_enabled_xdp_vsi(vsi) && prog) {
12709		if (i40e_realloc_rx_bi_zc(vsi, true))
12710			return -ENOMEM;
12711	} else if (i40e_enabled_xdp_vsi(vsi) && !prog) {
12712		if (i40e_realloc_rx_bi_zc(vsi, false))
12713			return -ENOMEM;
12714	}
12715
12716	for (i = 0; i < vsi->num_queue_pairs; i++)
12717		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
12718
12719	if (old_prog)
12720		bpf_prog_put(old_prog);
12721
12722	/* Kick start the NAPI context if there is an AF_XDP socket open
12723	 * on that queue id. This so that receiving will start.
12724	 */
12725	if (need_reset && prog)
12726		for (i = 0; i < vsi->num_queue_pairs; i++)
12727			if (vsi->xdp_rings[i]->xsk_pool)
12728				(void)i40e_xsk_wakeup(vsi->netdev, i,
12729						      XDP_WAKEUP_RX);
12730
12731	return 0;
12732}
12733
12734/**
12735 * i40e_enter_busy_conf - Enters busy config state
12736 * @vsi: vsi
12737 *
12738 * Returns 0 on success, <0 for failure.
12739 **/
12740static int i40e_enter_busy_conf(struct i40e_vsi *vsi)
12741{
12742	struct i40e_pf *pf = vsi->back;
12743	int timeout = 50;
12744
12745	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
12746		timeout--;
12747		if (!timeout)
12748			return -EBUSY;
12749		usleep_range(1000, 2000);
12750	}
12751
12752	return 0;
12753}
12754
12755/**
12756 * i40e_exit_busy_conf - Exits busy config state
12757 * @vsi: vsi
12758 **/
12759static void i40e_exit_busy_conf(struct i40e_vsi *vsi)
12760{
12761	struct i40e_pf *pf = vsi->back;
12762
12763	clear_bit(__I40E_CONFIG_BUSY, pf->state);
12764}
12765
12766/**
12767 * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair
12768 * @vsi: vsi
12769 * @queue_pair: queue pair
12770 **/
12771static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair)
12772{
12773	memset(&vsi->rx_rings[queue_pair]->rx_stats, 0,
12774	       sizeof(vsi->rx_rings[queue_pair]->rx_stats));
12775	memset(&vsi->tx_rings[queue_pair]->stats, 0,
12776	       sizeof(vsi->tx_rings[queue_pair]->stats));
12777	if (i40e_enabled_xdp_vsi(vsi)) {
12778		memset(&vsi->xdp_rings[queue_pair]->stats, 0,
12779		       sizeof(vsi->xdp_rings[queue_pair]->stats));
12780	}
12781}
12782
12783/**
12784 * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair
12785 * @vsi: vsi
12786 * @queue_pair: queue pair
12787 **/
12788static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair)
12789{
12790	i40e_clean_tx_ring(vsi->tx_rings[queue_pair]);
12791	if (i40e_enabled_xdp_vsi(vsi)) {
12792		/* Make sure that in-progress ndo_xdp_xmit calls are
12793		 * completed.
12794		 */
12795		synchronize_rcu();
12796		i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]);
12797	}
12798	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
12799}
12800
12801/**
12802 * i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair
12803 * @vsi: vsi
12804 * @queue_pair: queue pair
12805 * @enable: true for enable, false for disable
12806 **/
12807static void i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair,
12808					bool enable)
12809{
12810	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
12811	struct i40e_q_vector *q_vector = rxr->q_vector;
12812
12813	if (!vsi->netdev)
12814		return;
12815
12816	/* All rings in a qp belong to the same qvector. */
12817	if (q_vector->rx.ring || q_vector->tx.ring) {
12818		if (enable)
12819			napi_enable(&q_vector->napi);
12820		else
12821			napi_disable(&q_vector->napi);
12822	}
12823}
12824
12825/**
12826 * i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair
12827 * @vsi: vsi
12828 * @queue_pair: queue pair
12829 * @enable: true for enable, false for disable
12830 *
12831 * Returns 0 on success, <0 on failure.
12832 **/
12833static int i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair,
12834					bool enable)
12835{
12836	struct i40e_pf *pf = vsi->back;
12837	int pf_q, ret = 0;
12838
12839	pf_q = vsi->base_queue + queue_pair;
12840	ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q,
12841				     false /*is xdp*/, enable);
12842	if (ret) {
12843		dev_info(&pf->pdev->dev,
12844			 "VSI seid %d Tx ring %d %sable timeout\n",
12845			 vsi->seid, pf_q, (enable ? "en" : "dis"));
12846		return ret;
12847	}
12848
12849	i40e_control_rx_q(pf, pf_q, enable);
12850	ret = i40e_pf_rxq_wait(pf, pf_q, enable);
12851	if (ret) {
12852		dev_info(&pf->pdev->dev,
12853			 "VSI seid %d Rx ring %d %sable timeout\n",
12854			 vsi->seid, pf_q, (enable ? "en" : "dis"));
12855		return ret;
12856	}
12857
12858	/* Due to HW errata, on Rx disable only, the register can
12859	 * indicate done before it really is. Needs 50ms to be sure
12860	 */
12861	if (!enable)
12862		mdelay(50);
12863
12864	if (!i40e_enabled_xdp_vsi(vsi))
12865		return ret;
12866
12867	ret = i40e_control_wait_tx_q(vsi->seid, pf,
12868				     pf_q + vsi->alloc_queue_pairs,
12869				     true /*is xdp*/, enable);
12870	if (ret) {
12871		dev_info(&pf->pdev->dev,
12872			 "VSI seid %d XDP Tx ring %d %sable timeout\n",
12873			 vsi->seid, pf_q, (enable ? "en" : "dis"));
12874	}
12875
12876	return ret;
12877}
12878
12879/**
12880 * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair
12881 * @vsi: vsi
12882 * @queue_pair: queue_pair
12883 **/
12884static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair)
12885{
12886	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
12887	struct i40e_pf *pf = vsi->back;
12888	struct i40e_hw *hw = &pf->hw;
12889
12890	/* All rings in a qp belong to the same qvector. */
12891	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
12892		i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx);
12893	else
12894		i40e_irq_dynamic_enable_icr0(pf);
12895
12896	i40e_flush(hw);
12897}
12898
12899/**
12900 * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair
12901 * @vsi: vsi
12902 * @queue_pair: queue_pair
12903 **/
12904static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair)
12905{
12906	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
12907	struct i40e_pf *pf = vsi->back;
12908	struct i40e_hw *hw = &pf->hw;
12909
12910	/* For simplicity, instead of removing the qp interrupt causes
12911	 * from the interrupt linked list, we simply disable the interrupt, and
12912	 * leave the list intact.
12913	 *
12914	 * All rings in a qp belong to the same qvector.
12915	 */
12916	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
12917		u32 intpf = vsi->base_vector + rxr->q_vector->v_idx;
12918
12919		wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0);
12920		i40e_flush(hw);
12921		synchronize_irq(pf->msix_entries[intpf].vector);
12922	} else {
12923		/* Legacy and MSI mode - this stops all interrupt handling */
12924		wr32(hw, I40E_PFINT_ICR0_ENA, 0);
12925		wr32(hw, I40E_PFINT_DYN_CTL0, 0);
12926		i40e_flush(hw);
12927		synchronize_irq(pf->pdev->irq);
12928	}
12929}
12930
12931/**
12932 * i40e_queue_pair_disable - Disables a queue pair
12933 * @vsi: vsi
12934 * @queue_pair: queue pair
12935 *
12936 * Returns 0 on success, <0 on failure.
12937 **/
12938int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
12939{
12940	int err;
12941
12942	err = i40e_enter_busy_conf(vsi);
12943	if (err)
12944		return err;
12945
12946	i40e_queue_pair_disable_irq(vsi, queue_pair);
12947	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
12948	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
12949	i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
12950	i40e_queue_pair_clean_rings(vsi, queue_pair);
12951	i40e_queue_pair_reset_stats(vsi, queue_pair);
12952
12953	return err;
12954}
12955
12956/**
12957 * i40e_queue_pair_enable - Enables a queue pair
12958 * @vsi: vsi
12959 * @queue_pair: queue pair
12960 *
12961 * Returns 0 on success, <0 on failure.
12962 **/
12963int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair)
12964{
12965	int err;
12966
12967	err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]);
12968	if (err)
12969		return err;
12970
12971	if (i40e_enabled_xdp_vsi(vsi)) {
12972		err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]);
12973		if (err)
12974			return err;
12975	}
12976
12977	err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]);
12978	if (err)
12979		return err;
12980
12981	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true /* on */);
12982	i40e_queue_pair_toggle_napi(vsi, queue_pair, true /* on */);
12983	i40e_queue_pair_enable_irq(vsi, queue_pair);
12984
12985	i40e_exit_busy_conf(vsi);
12986
12987	return err;
12988}
12989
12990/**
12991 * i40e_xdp - implements ndo_bpf for i40e
12992 * @dev: netdevice
12993 * @xdp: XDP command
12994 **/
12995static int i40e_xdp(struct net_device *dev,
12996		    struct netdev_bpf *xdp)
12997{
12998	struct i40e_netdev_priv *np = netdev_priv(dev);
12999	struct i40e_vsi *vsi = np->vsi;
13000
13001	if (vsi->type != I40E_VSI_MAIN)
13002		return -EINVAL;
13003
13004	switch (xdp->command) {
13005	case XDP_SETUP_PROG:
13006		return i40e_xdp_setup(vsi, xdp->prog);
13007	case XDP_SETUP_XSK_POOL:
13008		return i40e_xsk_pool_setup(vsi, xdp->xsk.pool,
13009					   xdp->xsk.queue_id);
13010	default:
13011		return -EINVAL;
13012	}
13013}
13014
13015static const struct net_device_ops i40e_netdev_ops = {
13016	.ndo_open		= i40e_open,
13017	.ndo_stop		= i40e_close,
13018	.ndo_start_xmit		= i40e_lan_xmit_frame,
13019	.ndo_get_stats64	= i40e_get_netdev_stats_struct,
13020	.ndo_set_rx_mode	= i40e_set_rx_mode,
13021	.ndo_validate_addr	= eth_validate_addr,
13022	.ndo_set_mac_address	= i40e_set_mac,
13023	.ndo_change_mtu		= i40e_change_mtu,
13024	.ndo_do_ioctl		= i40e_ioctl,
13025	.ndo_tx_timeout		= i40e_tx_timeout,
13026	.ndo_vlan_rx_add_vid	= i40e_vlan_rx_add_vid,
13027	.ndo_vlan_rx_kill_vid	= i40e_vlan_rx_kill_vid,
13028#ifdef CONFIG_NET_POLL_CONTROLLER
13029	.ndo_poll_controller	= i40e_netpoll,
13030#endif
13031	.ndo_setup_tc		= __i40e_setup_tc,
13032	.ndo_select_queue	= i40e_lan_select_queue,
13033	.ndo_set_features	= i40e_set_features,
13034	.ndo_set_vf_mac		= i40e_ndo_set_vf_mac,
13035	.ndo_set_vf_vlan	= i40e_ndo_set_vf_port_vlan,
13036	.ndo_get_vf_stats	= i40e_get_vf_stats,
13037	.ndo_set_vf_rate	= i40e_ndo_set_vf_bw,
13038	.ndo_get_vf_config	= i40e_ndo_get_vf_config,
13039	.ndo_set_vf_link_state	= i40e_ndo_set_vf_link_state,
13040	.ndo_set_vf_spoofchk	= i40e_ndo_set_vf_spoofchk,
13041	.ndo_set_vf_trust	= i40e_ndo_set_vf_trust,
13042	.ndo_udp_tunnel_add	= udp_tunnel_nic_add_port,
13043	.ndo_udp_tunnel_del	= udp_tunnel_nic_del_port,
13044	.ndo_get_phys_port_id	= i40e_get_phys_port_id,
13045	.ndo_fdb_add		= i40e_ndo_fdb_add,
13046	.ndo_features_check	= i40e_features_check,
13047	.ndo_bridge_getlink	= i40e_ndo_bridge_getlink,
13048	.ndo_bridge_setlink	= i40e_ndo_bridge_setlink,
13049	.ndo_bpf		= i40e_xdp,
13050	.ndo_xdp_xmit		= i40e_xdp_xmit,
13051	.ndo_xsk_wakeup	        = i40e_xsk_wakeup,
13052	.ndo_dfwd_add_station	= i40e_fwd_add,
13053	.ndo_dfwd_del_station	= i40e_fwd_del,
13054};
13055
13056/**
13057 * i40e_config_netdev - Setup the netdev flags
13058 * @vsi: the VSI being configured
13059 *
13060 * Returns 0 on success, negative value on failure
13061 **/
13062static int i40e_config_netdev(struct i40e_vsi *vsi)
13063{
13064	struct i40e_pf *pf = vsi->back;
13065	struct i40e_hw *hw = &pf->hw;
13066	struct i40e_netdev_priv *np;
13067	struct net_device *netdev;
13068	u8 broadcast[ETH_ALEN];
13069	u8 mac_addr[ETH_ALEN];
13070	int etherdev_size;
13071	netdev_features_t hw_enc_features;
13072	netdev_features_t hw_features;
13073
13074	etherdev_size = sizeof(struct i40e_netdev_priv);
13075	netdev = alloc_etherdev_mq(etherdev_size, vsi->alloc_queue_pairs);
13076	if (!netdev)
13077		return -ENOMEM;
13078
13079	vsi->netdev = netdev;
13080	np = netdev_priv(netdev);
13081	np->vsi = vsi;
13082
13083	hw_enc_features = NETIF_F_SG			|
13084			  NETIF_F_IP_CSUM		|
13085			  NETIF_F_IPV6_CSUM		|
13086			  NETIF_F_HIGHDMA		|
13087			  NETIF_F_SOFT_FEATURES		|
13088			  NETIF_F_TSO			|
13089			  NETIF_F_TSO_ECN		|
13090			  NETIF_F_TSO6			|
13091			  NETIF_F_GSO_GRE		|
13092			  NETIF_F_GSO_GRE_CSUM		|
13093			  NETIF_F_GSO_PARTIAL		|
13094			  NETIF_F_GSO_IPXIP4		|
13095			  NETIF_F_GSO_IPXIP6		|
13096			  NETIF_F_GSO_UDP_TUNNEL	|
13097			  NETIF_F_GSO_UDP_TUNNEL_CSUM	|
13098			  NETIF_F_GSO_UDP_L4		|
13099			  NETIF_F_SCTP_CRC		|
13100			  NETIF_F_RXHASH		|
13101			  NETIF_F_RXCSUM		|
13102			  0;
13103
13104	if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE))
13105		netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
13106
13107	netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic;
13108
13109	netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
13110
13111	netdev->hw_enc_features |= hw_enc_features;
13112
13113	/* record features VLANs can make use of */
13114	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
13115
13116	/* enable macvlan offloads */
13117	netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
13118
13119	hw_features = hw_enc_features		|
13120		      NETIF_F_HW_VLAN_CTAG_TX	|
13121		      NETIF_F_HW_VLAN_CTAG_RX;
13122
13123	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
13124		hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
13125
13126	netdev->hw_features |= hw_features;
13127
13128	netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
13129	netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
13130
13131	if (vsi->type == I40E_VSI_MAIN) {
13132		SET_NETDEV_DEV(netdev, &pf->pdev->dev);
13133		ether_addr_copy(mac_addr, hw->mac.perm_addr);
13134		/* The following steps are necessary for two reasons. First,
13135		 * some older NVM configurations load a default MAC-VLAN
13136		 * filter that will accept any tagged packet, and we want to
13137		 * replace this with a normal filter. Additionally, it is
13138		 * possible our MAC address was provided by the platform using
13139		 * Open Firmware or similar.
13140		 *
13141		 * Thus, we need to remove the default filter and install one
13142		 * specific to the MAC address.
13143		 */
13144		i40e_rm_default_mac_filter(vsi, mac_addr);
13145		spin_lock_bh(&vsi->mac_filter_hash_lock);
13146		i40e_add_mac_filter(vsi, mac_addr);
13147		spin_unlock_bh(&vsi->mac_filter_hash_lock);
13148	} else {
13149		/* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we
13150		 * are still limited by IFNAMSIZ, but we're adding 'v%d\0' to
13151		 * the end, which is 4 bytes long, so force truncation of the
13152		 * original name by IFNAMSIZ - 4
13153		 */
13154		snprintf(netdev->name, IFNAMSIZ, "%.*sv%%d",
13155			 IFNAMSIZ - 4,
13156			 pf->vsi[pf->lan_vsi]->netdev->name);
13157		eth_random_addr(mac_addr);
13158
13159		spin_lock_bh(&vsi->mac_filter_hash_lock);
13160		i40e_add_mac_filter(vsi, mac_addr);
13161		spin_unlock_bh(&vsi->mac_filter_hash_lock);
13162	}
13163
13164	/* Add the broadcast filter so that we initially will receive
13165	 * broadcast packets. Note that when a new VLAN is first added the
13166	 * driver will convert all filters marked I40E_VLAN_ANY into VLAN
13167	 * specific filters as part of transitioning into "vlan" operation.
13168	 * When more VLANs are added, the driver will copy each existing MAC
13169	 * filter and add it for the new VLAN.
13170	 *
13171	 * Broadcast filters are handled specially by
13172	 * i40e_sync_filters_subtask, as the driver must to set the broadcast
13173	 * promiscuous bit instead of adding this directly as a MAC/VLAN
13174	 * filter. The subtask will update the correct broadcast promiscuous
13175	 * bits as VLANs become active or inactive.
13176	 */
13177	eth_broadcast_addr(broadcast);
13178	spin_lock_bh(&vsi->mac_filter_hash_lock);
13179	i40e_add_mac_filter(vsi, broadcast);
13180	spin_unlock_bh(&vsi->mac_filter_hash_lock);
13181
13182	ether_addr_copy(netdev->dev_addr, mac_addr);
13183	ether_addr_copy(netdev->perm_addr, mac_addr);
13184
13185	/* i40iw_net_event() reads 16 bytes from neigh->primary_key */
13186	netdev->neigh_priv_len = sizeof(u32) * 4;
13187
13188	netdev->priv_flags |= IFF_UNICAST_FLT;
13189	netdev->priv_flags |= IFF_SUPP_NOFCS;
13190	/* Setup netdev TC information */
13191	i40e_vsi_config_netdev_tc(vsi, vsi->tc_config.enabled_tc);
13192
13193	netdev->netdev_ops = &i40e_netdev_ops;
13194	netdev->watchdog_timeo = 5 * HZ;
13195	i40e_set_ethtool_ops(netdev);
13196
13197	/* MTU range: 68 - 9706 */
13198	netdev->min_mtu = ETH_MIN_MTU;
13199	netdev->max_mtu = I40E_MAX_RXBUFFER - I40E_PACKET_HDR_PAD;
13200
13201	return 0;
13202}
13203
13204/**
13205 * i40e_vsi_delete - Delete a VSI from the switch
13206 * @vsi: the VSI being removed
13207 *
13208 * Returns 0 on success, negative value on failure
13209 **/
13210static void i40e_vsi_delete(struct i40e_vsi *vsi)
13211{
13212	/* remove default VSI is not allowed */
13213	if (vsi == vsi->back->vsi[vsi->back->lan_vsi])
13214		return;
13215
13216	i40e_aq_delete_element(&vsi->back->hw, vsi->seid, NULL);
13217}
13218
13219/**
13220 * i40e_is_vsi_uplink_mode_veb - Check if the VSI's uplink bridge mode is VEB
13221 * @vsi: the VSI being queried
13222 *
13223 * Returns 1 if HW bridge mode is VEB and return 0 in case of VEPA mode
13224 **/
13225int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi)
13226{
13227	struct i40e_veb *veb;
13228	struct i40e_pf *pf = vsi->back;
13229
13230	/* Uplink is not a bridge so default to VEB */
13231	if (vsi->veb_idx >= I40E_MAX_VEB)
13232		return 1;
13233
13234	veb = pf->veb[vsi->veb_idx];
13235	if (!veb) {
13236		dev_info(&pf->pdev->dev,
13237			 "There is no veb associated with the bridge\n");
13238		return -ENOENT;
13239	}
13240
13241	/* Uplink is a bridge in VEPA mode */
13242	if (veb->bridge_mode & BRIDGE_MODE_VEPA) {
13243		return 0;
13244	} else {
13245		/* Uplink is a bridge in VEB mode */
13246		return 1;
13247	}
13248
13249	/* VEPA is now default bridge, so return 0 */
13250	return 0;
13251}
13252
13253/**
13254 * i40e_add_vsi - Add a VSI to the switch
13255 * @vsi: the VSI being configured
13256 *
13257 * This initializes a VSI context depending on the VSI type to be added and
13258 * passes it down to the add_vsi aq command.
13259 **/
13260static int i40e_add_vsi(struct i40e_vsi *vsi)
13261{
13262	int ret = -ENODEV;
13263	struct i40e_pf *pf = vsi->back;
13264	struct i40e_hw *hw = &pf->hw;
13265	struct i40e_vsi_context ctxt;
13266	struct i40e_mac_filter *f;
13267	struct hlist_node *h;
13268	int bkt;
13269
13270	u8 enabled_tc = 0x1; /* TC0 enabled */
13271	int f_count = 0;
13272
13273	memset(&ctxt, 0, sizeof(ctxt));
13274	switch (vsi->type) {
13275	case I40E_VSI_MAIN:
13276		/* The PF's main VSI is already setup as part of the
13277		 * device initialization, so we'll not bother with
13278		 * the add_vsi call, but we will retrieve the current
13279		 * VSI context.
13280		 */
13281		ctxt.seid = pf->main_vsi_seid;
13282		ctxt.pf_num = pf->hw.pf_id;
13283		ctxt.vf_num = 0;
13284		ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
13285		ctxt.flags = I40E_AQ_VSI_TYPE_PF;
13286		if (ret) {
13287			dev_info(&pf->pdev->dev,
13288				 "couldn't get PF vsi config, err %s aq_err %s\n",
13289				 i40e_stat_str(&pf->hw, ret),
13290				 i40e_aq_str(&pf->hw,
13291					     pf->hw.aq.asq_last_status));
13292			return -ENOENT;
13293		}
13294		vsi->info = ctxt.info;
13295		vsi->info.valid_sections = 0;
13296
13297		vsi->seid = ctxt.seid;
13298		vsi->id = ctxt.vsi_number;
13299
13300		enabled_tc = i40e_pf_get_tc_map(pf);
13301
13302		/* Source pruning is enabled by default, so the flag is
13303		 * negative logic - if it's set, we need to fiddle with
13304		 * the VSI to disable source pruning.
13305		 */
13306		if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) {
13307			memset(&ctxt, 0, sizeof(ctxt));
13308			ctxt.seid = pf->main_vsi_seid;
13309			ctxt.pf_num = pf->hw.pf_id;
13310			ctxt.vf_num = 0;
13311			ctxt.info.valid_sections |=
13312				     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
13313			ctxt.info.switch_id =
13314				   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
13315			ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
13316			if (ret) {
13317				dev_info(&pf->pdev->dev,
13318					 "update vsi failed, err %s aq_err %s\n",
13319					 i40e_stat_str(&pf->hw, ret),
13320					 i40e_aq_str(&pf->hw,
13321						     pf->hw.aq.asq_last_status));
13322				ret = -ENOENT;
13323				goto err;
13324			}
13325		}
13326
13327		/* MFP mode setup queue map and update VSI */
13328		if ((pf->flags & I40E_FLAG_MFP_ENABLED) &&
13329		    !(pf->hw.func_caps.iscsi)) { /* NIC type PF */
13330			memset(&ctxt, 0, sizeof(ctxt));
13331			ctxt.seid = pf->main_vsi_seid;
13332			ctxt.pf_num = pf->hw.pf_id;
13333			ctxt.vf_num = 0;
13334			i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
13335			ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
13336			if (ret) {
13337				dev_info(&pf->pdev->dev,
13338					 "update vsi failed, err %s aq_err %s\n",
13339					 i40e_stat_str(&pf->hw, ret),
13340					 i40e_aq_str(&pf->hw,
13341						    pf->hw.aq.asq_last_status));
13342				ret = -ENOENT;
13343				goto err;
13344			}
13345			/* update the local VSI info queue map */
13346			i40e_vsi_update_queue_map(vsi, &ctxt);
13347			vsi->info.valid_sections = 0;
13348		} else {
13349			/* Default/Main VSI is only enabled for TC0
13350			 * reconfigure it to enable all TCs that are
13351			 * available on the port in SFP mode.
13352			 * For MFP case the iSCSI PF would use this
13353			 * flow to enable LAN+iSCSI TC.
13354			 */
13355			ret = i40e_vsi_config_tc(vsi, enabled_tc);
13356			if (ret) {
13357				/* Single TC condition is not fatal,
13358				 * message and continue
13359				 */
13360				dev_info(&pf->pdev->dev,
13361					 "failed to configure TCs for main VSI tc_map 0x%08x, err %s aq_err %s\n",
13362					 enabled_tc,
13363					 i40e_stat_str(&pf->hw, ret),
13364					 i40e_aq_str(&pf->hw,
13365						    pf->hw.aq.asq_last_status));
13366			}
13367		}
13368		break;
13369
13370	case I40E_VSI_FDIR:
13371		ctxt.pf_num = hw->pf_id;
13372		ctxt.vf_num = 0;
13373		ctxt.uplink_seid = vsi->uplink_seid;
13374		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
13375		ctxt.flags = I40E_AQ_VSI_TYPE_PF;
13376		if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) &&
13377		    (i40e_is_vsi_uplink_mode_veb(vsi))) {
13378			ctxt.info.valid_sections |=
13379			     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
13380			ctxt.info.switch_id =
13381			   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
13382		}
13383		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
13384		break;
13385
13386	case I40E_VSI_VMDQ2:
13387		ctxt.pf_num = hw->pf_id;
13388		ctxt.vf_num = 0;
13389		ctxt.uplink_seid = vsi->uplink_seid;
13390		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
13391		ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
13392
13393		/* This VSI is connected to VEB so the switch_id
13394		 * should be set to zero by default.
13395		 */
13396		if (i40e_is_vsi_uplink_mode_veb(vsi)) {
13397			ctxt.info.valid_sections |=
13398				cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
13399			ctxt.info.switch_id =
13400				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
13401		}
13402
13403		/* Setup the VSI tx/rx queue map for TC0 only for now */
13404		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
13405		break;
13406
13407	case I40E_VSI_SRIOV:
13408		ctxt.pf_num = hw->pf_id;
13409		ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id;
13410		ctxt.uplink_seid = vsi->uplink_seid;
13411		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
13412		ctxt.flags = I40E_AQ_VSI_TYPE_VF;
13413
13414		/* This VSI is connected to VEB so the switch_id
13415		 * should be set to zero by default.
13416		 */
13417		if (i40e_is_vsi_uplink_mode_veb(vsi)) {
13418			ctxt.info.valid_sections |=
13419				cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
13420			ctxt.info.switch_id =
13421				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
13422		}
13423
13424		if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
13425			ctxt.info.valid_sections |=
13426				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
13427			ctxt.info.queueing_opt_flags |=
13428				(I40E_AQ_VSI_QUE_OPT_TCP_ENA |
13429				 I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI);
13430		}
13431
13432		ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
13433		ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL;
13434		if (pf->vf[vsi->vf_id].spoofchk) {
13435			ctxt.info.valid_sections |=
13436				cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
13437			ctxt.info.sec_flags |=
13438				(I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK |
13439				 I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK);
13440		}
13441		/* Setup the VSI tx/rx queue map for TC0 only for now */
13442		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
13443		break;
13444
13445	case I40E_VSI_IWARP:
13446		/* send down message to iWARP */
13447		break;
13448
13449	default:
13450		return -ENODEV;
13451	}
13452
13453	if (vsi->type != I40E_VSI_MAIN) {
13454		ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
13455		if (ret) {
13456			dev_info(&vsi->back->pdev->dev,
13457				 "add vsi failed, err %s aq_err %s\n",
13458				 i40e_stat_str(&pf->hw, ret),
13459				 i40e_aq_str(&pf->hw,
13460					     pf->hw.aq.asq_last_status));
13461			ret = -ENOENT;
13462			goto err;
13463		}
13464		vsi->info = ctxt.info;
13465		vsi->info.valid_sections = 0;
13466		vsi->seid = ctxt.seid;
13467		vsi->id = ctxt.vsi_number;
13468	}
13469
13470	spin_lock_bh(&vsi->mac_filter_hash_lock);
13471	vsi->active_filters = 0;
13472	/* If macvlan filters already exist, force them to get loaded */
13473	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
13474		f->state = I40E_FILTER_NEW;
13475		f_count++;
13476	}
13477	spin_unlock_bh(&vsi->mac_filter_hash_lock);
13478	clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
13479
13480	if (f_count) {
13481		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
13482		set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
13483	}
13484
13485	/* Update VSI BW information */
13486	ret = i40e_vsi_get_bw_info(vsi);
13487	if (ret) {
13488		dev_info(&pf->pdev->dev,
13489			 "couldn't get vsi bw info, err %s aq_err %s\n",
13490			 i40e_stat_str(&pf->hw, ret),
13491			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
13492		/* VSI is already added so not tearing that up */
13493		ret = 0;
13494	}
13495
13496err:
13497	return ret;
13498}
13499
13500/**
13501 * i40e_vsi_release - Delete a VSI and free its resources
13502 * @vsi: the VSI being removed
13503 *
13504 * Returns 0 on success or < 0 on error
13505 **/
13506int i40e_vsi_release(struct i40e_vsi *vsi)
13507{
13508	struct i40e_mac_filter *f;
13509	struct hlist_node *h;
13510	struct i40e_veb *veb = NULL;
13511	struct i40e_pf *pf;
13512	u16 uplink_seid;
13513	int i, n, bkt;
13514
13515	pf = vsi->back;
13516
13517	/* release of a VEB-owner or last VSI is not allowed */
13518	if (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) {
13519		dev_info(&pf->pdev->dev, "VSI %d has existing VEB %d\n",
13520			 vsi->seid, vsi->uplink_seid);
13521		return -ENODEV;
13522	}
13523	if (vsi == pf->vsi[pf->lan_vsi] &&
13524	    !test_bit(__I40E_DOWN, pf->state)) {
13525		dev_info(&pf->pdev->dev, "Can't remove PF VSI\n");
13526		return -ENODEV;
13527	}
13528	set_bit(__I40E_VSI_RELEASING, vsi->state);
13529	uplink_seid = vsi->uplink_seid;
13530	if (vsi->type != I40E_VSI_SRIOV) {
13531		if (vsi->netdev_registered) {
13532			vsi->netdev_registered = false;
13533			if (vsi->netdev) {
13534				/* results in a call to i40e_close() */
13535				unregister_netdev(vsi->netdev);
13536			}
13537		} else {
13538			i40e_vsi_close(vsi);
13539		}
13540		i40e_vsi_disable_irq(vsi);
13541	}
13542
13543	spin_lock_bh(&vsi->mac_filter_hash_lock);
13544
13545	/* clear the sync flag on all filters */
13546	if (vsi->netdev) {
13547		__dev_uc_unsync(vsi->netdev, NULL);
13548		__dev_mc_unsync(vsi->netdev, NULL);
13549	}
13550
13551	/* make sure any remaining filters are marked for deletion */
13552	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
13553		__i40e_del_filter(vsi, f);
13554
13555	spin_unlock_bh(&vsi->mac_filter_hash_lock);
13556
13557	i40e_sync_vsi_filters(vsi);
13558
13559	i40e_vsi_delete(vsi);
13560	i40e_vsi_free_q_vectors(vsi);
13561	if (vsi->netdev) {
13562		free_netdev(vsi->netdev);
13563		vsi->netdev = NULL;
13564	}
13565	i40e_vsi_clear_rings(vsi);
13566	i40e_vsi_clear(vsi);
13567
13568	/* If this was the last thing on the VEB, except for the
13569	 * controlling VSI, remove the VEB, which puts the controlling
13570	 * VSI onto the next level down in the switch.
13571	 *
13572	 * Well, okay, there's one more exception here: don't remove
13573	 * the orphan VEBs yet.  We'll wait for an explicit remove request
13574	 * from up the network stack.
13575	 */
13576	for (n = 0, i = 0; i < pf->num_alloc_vsi; i++) {
13577		if (pf->vsi[i] &&
13578		    pf->vsi[i]->uplink_seid == uplink_seid &&
13579		    (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
13580			n++;      /* count the VSIs */
13581		}
13582	}
13583	for (i = 0; i < I40E_MAX_VEB; i++) {
13584		if (!pf->veb[i])
13585			continue;
13586		if (pf->veb[i]->uplink_seid == uplink_seid)
13587			n++;     /* count the VEBs */
13588		if (pf->veb[i]->seid == uplink_seid)
13589			veb = pf->veb[i];
13590	}
13591	if (n == 0 && veb && veb->uplink_seid != 0)
13592		i40e_veb_release(veb);
13593
13594	return 0;
13595}
13596
13597/**
13598 * i40e_vsi_setup_vectors - Set up the q_vectors for the given VSI
13599 * @vsi: ptr to the VSI
13600 *
13601 * This should only be called after i40e_vsi_mem_alloc() which allocates the
13602 * corresponding SW VSI structure and initializes num_queue_pairs for the
13603 * newly allocated VSI.
13604 *
13605 * Returns 0 on success or negative on failure
13606 **/
13607static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi)
13608{
13609	int ret = -ENOENT;
13610	struct i40e_pf *pf = vsi->back;
13611
13612	if (vsi->q_vectors[0]) {
13613		dev_info(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
13614			 vsi->seid);
13615		return -EEXIST;
13616	}
13617
13618	if (vsi->base_vector) {
13619		dev_info(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
13620			 vsi->seid, vsi->base_vector);
13621		return -EEXIST;
13622	}
13623
13624	ret = i40e_vsi_alloc_q_vectors(vsi);
13625	if (ret) {
13626		dev_info(&pf->pdev->dev,
13627			 "failed to allocate %d q_vector for VSI %d, ret=%d\n",
13628			 vsi->num_q_vectors, vsi->seid, ret);
13629		vsi->num_q_vectors = 0;
13630		goto vector_setup_out;
13631	}
13632
13633	/* In Legacy mode, we do not have to get any other vector since we
13634	 * piggyback on the misc/ICR0 for queue interrupts.
13635	*/
13636	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
13637		return ret;
13638	if (vsi->num_q_vectors)
13639		vsi->base_vector = i40e_get_lump(pf, pf->irq_pile,
13640						 vsi->num_q_vectors, vsi->idx);
13641	if (vsi->base_vector < 0) {
13642		dev_info(&pf->pdev->dev,
13643			 "failed to get tracking for %d vectors for VSI %d, err=%d\n",
13644			 vsi->num_q_vectors, vsi->seid, vsi->base_vector);
13645		i40e_vsi_free_q_vectors(vsi);
13646		ret = -ENOENT;
13647		goto vector_setup_out;
13648	}
13649
13650vector_setup_out:
13651	return ret;
13652}
13653
13654/**
13655 * i40e_vsi_reinit_setup - return and reallocate resources for a VSI
13656 * @vsi: pointer to the vsi.
13657 *
13658 * This re-allocates a vsi's queue resources.
13659 *
13660 * Returns pointer to the successfully allocated and configured VSI sw struct
13661 * on success, otherwise returns NULL on failure.
13662 **/
13663static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
13664{
13665	u16 alloc_queue_pairs;
13666	struct i40e_pf *pf;
13667	u8 enabled_tc;
13668	int ret;
13669
13670	if (!vsi)
13671		return NULL;
13672
13673	pf = vsi->back;
13674
13675	i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
13676	i40e_vsi_clear_rings(vsi);
13677
13678	i40e_vsi_free_arrays(vsi, false);
13679	i40e_set_num_rings_in_vsi(vsi);
13680	ret = i40e_vsi_alloc_arrays(vsi, false);
13681	if (ret)
13682		goto err_vsi;
13683
13684	alloc_queue_pairs = vsi->alloc_queue_pairs *
13685			    (i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
13686
13687	ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx);
13688	if (ret < 0) {
13689		dev_info(&pf->pdev->dev,
13690			 "failed to get tracking for %d queues for VSI %d err %d\n",
13691			 alloc_queue_pairs, vsi->seid, ret);
13692		goto err_vsi;
13693	}
13694	vsi->base_queue = ret;
13695
13696	/* Update the FW view of the VSI. Force a reset of TC and queue
13697	 * layout configurations.
13698	 */
13699	enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
13700	pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
13701	pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
13702	i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
13703	if (vsi->type == I40E_VSI_MAIN)
13704		i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr);
13705
13706	/* assign it some queues */
13707	ret = i40e_alloc_rings(vsi);
13708	if (ret)
13709		goto err_rings;
13710
13711	/* map all of the rings to the q_vectors */
13712	i40e_vsi_map_rings_to_vectors(vsi);
13713	return vsi;
13714
13715err_rings:
13716	i40e_vsi_free_q_vectors(vsi);
13717	if (vsi->netdev_registered) {
13718		vsi->netdev_registered = false;
13719		unregister_netdev(vsi->netdev);
13720		free_netdev(vsi->netdev);
13721		vsi->netdev = NULL;
13722	}
13723	i40e_aq_delete_element(&pf->hw, vsi->seid, NULL);
13724err_vsi:
13725	i40e_vsi_clear(vsi);
13726	return NULL;
13727}
13728
13729/**
13730 * i40e_vsi_setup - Set up a VSI by a given type
13731 * @pf: board private structure
13732 * @type: VSI type
13733 * @uplink_seid: the switch element to link to
13734 * @param1: usage depends upon VSI type. For VF types, indicates VF id
13735 *
13736 * This allocates the sw VSI structure and its queue resources, then add a VSI
13737 * to the identified VEB.
13738 *
13739 * Returns pointer to the successfully allocated and configure VSI sw struct on
13740 * success, otherwise returns NULL on failure.
13741 **/
13742struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
13743				u16 uplink_seid, u32 param1)
13744{
13745	struct i40e_vsi *vsi = NULL;
13746	struct i40e_veb *veb = NULL;
13747	u16 alloc_queue_pairs;
13748	int ret, i;
13749	int v_idx;
13750
13751	/* The requested uplink_seid must be either
13752	 *     - the PF's port seid
13753	 *              no VEB is needed because this is the PF
13754	 *              or this is a Flow Director special case VSI
13755	 *     - seid of an existing VEB
13756	 *     - seid of a VSI that owns an existing VEB
13757	 *     - seid of a VSI that doesn't own a VEB
13758	 *              a new VEB is created and the VSI becomes the owner
13759	 *     - seid of the PF VSI, which is what creates the first VEB
13760	 *              this is a special case of the previous
13761	 *
13762	 * Find which uplink_seid we were given and create a new VEB if needed
13763	 */
13764	for (i = 0; i < I40E_MAX_VEB; i++) {
13765		if (pf->veb[i] && pf->veb[i]->seid == uplink_seid) {
13766			veb = pf->veb[i];
13767			break;
13768		}
13769	}
13770
13771	if (!veb && uplink_seid != pf->mac_seid) {
13772
13773		for (i = 0; i < pf->num_alloc_vsi; i++) {
13774			if (pf->vsi[i] && pf->vsi[i]->seid == uplink_seid) {
13775				vsi = pf->vsi[i];
13776				break;
13777			}
13778		}
13779		if (!vsi) {
13780			dev_info(&pf->pdev->dev, "no such uplink_seid %d\n",
13781				 uplink_seid);
13782			return NULL;
13783		}
13784
13785		if (vsi->uplink_seid == pf->mac_seid)
13786			veb = i40e_veb_setup(pf, 0, pf->mac_seid, vsi->seid,
13787					     vsi->tc_config.enabled_tc);
13788		else if ((vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0)
13789			veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid,
13790					     vsi->tc_config.enabled_tc);
13791		if (veb) {
13792			if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) {
13793				dev_info(&vsi->back->pdev->dev,
13794					 "New VSI creation error, uplink seid of LAN VSI expected.\n");
13795				return NULL;
13796			}
13797			/* We come up by default in VEPA mode if SRIOV is not
13798			 * already enabled, in which case we can't force VEPA
13799			 * mode.
13800			 */
13801			if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
13802				veb->bridge_mode = BRIDGE_MODE_VEPA;
13803				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
13804			}
13805			i40e_config_bridge_mode(veb);
13806		}
13807		for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
13808			if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
13809				veb = pf->veb[i];
13810		}
13811		if (!veb) {
13812			dev_info(&pf->pdev->dev, "couldn't add VEB\n");
13813			return NULL;
13814		}
13815
13816		vsi->flags |= I40E_VSI_FLAG_VEB_OWNER;
13817		uplink_seid = veb->seid;
13818	}
13819
13820	/* get vsi sw struct */
13821	v_idx = i40e_vsi_mem_alloc(pf, type);
13822	if (v_idx < 0)
13823		goto err_alloc;
13824	vsi = pf->vsi[v_idx];
13825	if (!vsi)
13826		goto err_alloc;
13827	vsi->type = type;
13828	vsi->veb_idx = (veb ? veb->idx : I40E_NO_VEB);
13829
13830	if (type == I40E_VSI_MAIN)
13831		pf->lan_vsi = v_idx;
13832	else if (type == I40E_VSI_SRIOV)
13833		vsi->vf_id = param1;
13834	/* assign it some queues */
13835	alloc_queue_pairs = vsi->alloc_queue_pairs *
13836			    (i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
13837
13838	ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx);
13839	if (ret < 0) {
13840		dev_info(&pf->pdev->dev,
13841			 "failed to get tracking for %d queues for VSI %d err=%d\n",
13842			 alloc_queue_pairs, vsi->seid, ret);
13843		goto err_vsi;
13844	}
13845	vsi->base_queue = ret;
13846
13847	/* get a VSI from the hardware */
13848	vsi->uplink_seid = uplink_seid;
13849	ret = i40e_add_vsi(vsi);
13850	if (ret)
13851		goto err_vsi;
13852
13853	switch (vsi->type) {
13854	/* setup the netdev if needed */
13855	case I40E_VSI_MAIN:
13856	case I40E_VSI_VMDQ2:
13857		ret = i40e_config_netdev(vsi);
13858		if (ret)
13859			goto err_netdev;
13860		ret = i40e_netif_set_realnum_tx_rx_queues(vsi);
13861		if (ret)
13862			goto err_netdev;
13863		ret = register_netdev(vsi->netdev);
13864		if (ret)
13865			goto err_netdev;
13866		vsi->netdev_registered = true;
13867		netif_carrier_off(vsi->netdev);
13868#ifdef CONFIG_I40E_DCB
13869		/* Setup DCB netlink interface */
13870		i40e_dcbnl_setup(vsi);
13871#endif /* CONFIG_I40E_DCB */
13872		fallthrough;
13873	case I40E_VSI_FDIR:
13874		/* set up vectors and rings if needed */
13875		ret = i40e_vsi_setup_vectors(vsi);
13876		if (ret)
13877			goto err_msix;
13878
13879		ret = i40e_alloc_rings(vsi);
13880		if (ret)
13881			goto err_rings;
13882
13883		/* map all of the rings to the q_vectors */
13884		i40e_vsi_map_rings_to_vectors(vsi);
13885
13886		i40e_vsi_reset_stats(vsi);
13887		break;
13888	default:
13889		/* no netdev or rings for the other VSI types */
13890		break;
13891	}
13892
13893	if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) &&
13894	    (vsi->type == I40E_VSI_VMDQ2)) {
13895		ret = i40e_vsi_config_rss(vsi);
13896	}
13897	return vsi;
13898
13899err_rings:
13900	i40e_vsi_free_q_vectors(vsi);
13901err_msix:
13902	if (vsi->netdev_registered) {
13903		vsi->netdev_registered = false;
13904		unregister_netdev(vsi->netdev);
13905		free_netdev(vsi->netdev);
13906		vsi->netdev = NULL;
13907	}
13908err_netdev:
13909	i40e_aq_delete_element(&pf->hw, vsi->seid, NULL);
13910err_vsi:
13911	i40e_vsi_clear(vsi);
13912err_alloc:
13913	return NULL;
13914}
13915
13916/**
13917 * i40e_veb_get_bw_info - Query VEB BW information
13918 * @veb: the veb to query
13919 *
13920 * Query the Tx scheduler BW configuration data for given VEB
13921 **/
13922static int i40e_veb_get_bw_info(struct i40e_veb *veb)
13923{
13924	struct i40e_aqc_query_switching_comp_ets_config_resp ets_data;
13925	struct i40e_aqc_query_switching_comp_bw_config_resp bw_data;
13926	struct i40e_pf *pf = veb->pf;
13927	struct i40e_hw *hw = &pf->hw;
13928	u32 tc_bw_max;
13929	int ret = 0;
13930	int i;
13931
13932	ret = i40e_aq_query_switch_comp_bw_config(hw, veb->seid,
13933						  &bw_data, NULL);
13934	if (ret) {
13935		dev_info(&pf->pdev->dev,
13936			 "query veb bw config failed, err %s aq_err %s\n",
13937			 i40e_stat_str(&pf->hw, ret),
13938			 i40e_aq_str(&pf->hw, hw->aq.asq_last_status));
13939		goto out;
13940	}
13941
13942	ret = i40e_aq_query_switch_comp_ets_config(hw, veb->seid,
13943						   &ets_data, NULL);
13944	if (ret) {
13945		dev_info(&pf->pdev->dev,
13946			 "query veb bw ets config failed, err %s aq_err %s\n",
13947			 i40e_stat_str(&pf->hw, ret),
13948			 i40e_aq_str(&pf->hw, hw->aq.asq_last_status));
13949		goto out;
13950	}
13951
13952	veb->bw_limit = le16_to_cpu(ets_data.port_bw_limit);
13953	veb->bw_max_quanta = ets_data.tc_bw_max;
13954	veb->is_abs_credits = bw_data.absolute_credits_enable;
13955	veb->enabled_tc = ets_data.tc_valid_bits;
13956	tc_bw_max = le16_to_cpu(bw_data.tc_bw_max[0]) |
13957		    (le16_to_cpu(bw_data.tc_bw_max[1]) << 16);
13958	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
13959		veb->bw_tc_share_credits[i] = bw_data.tc_bw_share_credits[i];
13960		veb->bw_tc_limit_credits[i] =
13961					le16_to_cpu(bw_data.tc_bw_limits[i]);
13962		veb->bw_tc_max_quanta[i] = ((tc_bw_max >> (i*4)) & 0x7);
13963	}
13964
13965out:
13966	return ret;
13967}
13968
13969/**
13970 * i40e_veb_mem_alloc - Allocates the next available struct veb in the PF
13971 * @pf: board private structure
13972 *
13973 * On error: returns error code (negative)
13974 * On success: returns vsi index in PF (positive)
13975 **/
13976static int i40e_veb_mem_alloc(struct i40e_pf *pf)
13977{
13978	int ret = -ENOENT;
13979	struct i40e_veb *veb;
13980	int i;
13981
13982	/* Need to protect the allocation of switch elements at the PF level */
13983	mutex_lock(&pf->switch_mutex);
13984
13985	/* VEB list may be fragmented if VEB creation/destruction has
13986	 * been happening.  We can afford to do a quick scan to look
13987	 * for any free slots in the list.
13988	 *
13989	 * find next empty veb slot, looping back around if necessary
13990	 */
13991	i = 0;
13992	while ((i < I40E_MAX_VEB) && (pf->veb[i] != NULL))
13993		i++;
13994	if (i >= I40E_MAX_VEB) {
13995		ret = -ENOMEM;
13996		goto err_alloc_veb;  /* out of VEB slots! */
13997	}
13998
13999	veb = kzalloc(sizeof(*veb), GFP_KERNEL);
14000	if (!veb) {
14001		ret = -ENOMEM;
14002		goto err_alloc_veb;
14003	}
14004	veb->pf = pf;
14005	veb->idx = i;
14006	veb->enabled_tc = 1;
14007
14008	pf->veb[i] = veb;
14009	ret = i;
14010err_alloc_veb:
14011	mutex_unlock(&pf->switch_mutex);
14012	return ret;
14013}
14014
14015/**
14016 * i40e_switch_branch_release - Delete a branch of the switch tree
14017 * @branch: where to start deleting
14018 *
14019 * This uses recursion to find the tips of the branch to be
14020 * removed, deleting until we get back to and can delete this VEB.
14021 **/
14022static void i40e_switch_branch_release(struct i40e_veb *branch)
14023{
14024	struct i40e_pf *pf = branch->pf;
14025	u16 branch_seid = branch->seid;
14026	u16 veb_idx = branch->idx;
14027	int i;
14028
14029	/* release any VEBs on this VEB - RECURSION */
14030	for (i = 0; i < I40E_MAX_VEB; i++) {
14031		if (!pf->veb[i])
14032			continue;
14033		if (pf->veb[i]->uplink_seid == branch->seid)
14034			i40e_switch_branch_release(pf->veb[i]);
14035	}
14036
14037	/* Release the VSIs on this VEB, but not the owner VSI.
14038	 *
14039	 * NOTE: Removing the last VSI on a VEB has the SIDE EFFECT of removing
14040	 *       the VEB itself, so don't use (*branch) after this loop.
14041	 */
14042	for (i = 0; i < pf->num_alloc_vsi; i++) {
14043		if (!pf->vsi[i])
14044			continue;
14045		if (pf->vsi[i]->uplink_seid == branch_seid &&
14046		   (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
14047			i40e_vsi_release(pf->vsi[i]);
14048		}
14049	}
14050
14051	/* There's one corner case where the VEB might not have been
14052	 * removed, so double check it here and remove it if needed.
14053	 * This case happens if the veb was created from the debugfs
14054	 * commands and no VSIs were added to it.
14055	 */
14056	if (pf->veb[veb_idx])
14057		i40e_veb_release(pf->veb[veb_idx]);
14058}
14059
14060/**
14061 * i40e_veb_clear - remove veb struct
14062 * @veb: the veb to remove
14063 **/
14064static void i40e_veb_clear(struct i40e_veb *veb)
14065{
14066	if (!veb)
14067		return;
14068
14069	if (veb->pf) {
14070		struct i40e_pf *pf = veb->pf;
14071
14072		mutex_lock(&pf->switch_mutex);
14073		if (pf->veb[veb->idx] == veb)
14074			pf->veb[veb->idx] = NULL;
14075		mutex_unlock(&pf->switch_mutex);
14076	}
14077
14078	kfree(veb);
14079}
14080
14081/**
14082 * i40e_veb_release - Delete a VEB and free its resources
14083 * @veb: the VEB being removed
14084 **/
14085void i40e_veb_release(struct i40e_veb *veb)
14086{
14087	struct i40e_vsi *vsi = NULL;
14088	struct i40e_pf *pf;
14089	int i, n = 0;
14090
14091	pf = veb->pf;
14092
14093	/* find the remaining VSI and check for extras */
14094	for (i = 0; i < pf->num_alloc_vsi; i++) {
14095		if (pf->vsi[i] && pf->vsi[i]->uplink_seid == veb->seid) {
14096			n++;
14097			vsi = pf->vsi[i];
14098		}
14099	}
14100	if (n != 1) {
14101		dev_info(&pf->pdev->dev,
14102			 "can't remove VEB %d with %d VSIs left\n",
14103			 veb->seid, n);
14104		return;
14105	}
14106
14107	/* move the remaining VSI to uplink veb */
14108	vsi->flags &= ~I40E_VSI_FLAG_VEB_OWNER;
14109	if (veb->uplink_seid) {
14110		vsi->uplink_seid = veb->uplink_seid;
14111		if (veb->uplink_seid == pf->mac_seid)
14112			vsi->veb_idx = I40E_NO_VEB;
14113		else
14114			vsi->veb_idx = veb->veb_idx;
14115	} else {
14116		/* floating VEB */
14117		vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid;
14118		vsi->veb_idx = pf->vsi[pf->lan_vsi]->veb_idx;
14119	}
14120
14121	i40e_aq_delete_element(&pf->hw, veb->seid, NULL);
14122	i40e_veb_clear(veb);
14123}
14124
14125/**
14126 * i40e_add_veb - create the VEB in the switch
14127 * @veb: the VEB to be instantiated
14128 * @vsi: the controlling VSI
14129 **/
14130static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
14131{
14132	struct i40e_pf *pf = veb->pf;
14133	bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED);
14134	int ret;
14135
14136	ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid,
14137			      veb->enabled_tc, false,
14138			      &veb->seid, enable_stats, NULL);
14139
14140	/* get a VEB from the hardware */
14141	if (ret) {
14142		dev_info(&pf->pdev->dev,
14143			 "couldn't add VEB, err %s aq_err %s\n",
14144			 i40e_stat_str(&pf->hw, ret),
14145			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
14146		return -EPERM;
14147	}
14148
14149	/* get statistics counter */
14150	ret = i40e_aq_get_veb_parameters(&pf->hw, veb->seid, NULL, NULL,
14151					 &veb->stats_idx, NULL, NULL, NULL);
14152	if (ret) {
14153		dev_info(&pf->pdev->dev,
14154			 "couldn't get VEB statistics idx, err %s aq_err %s\n",
14155			 i40e_stat_str(&pf->hw, ret),
14156			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
14157		return -EPERM;
14158	}
14159	ret = i40e_veb_get_bw_info(veb);
14160	if (ret) {
14161		dev_info(&pf->pdev->dev,
14162			 "couldn't get VEB bw info, err %s aq_err %s\n",
14163			 i40e_stat_str(&pf->hw, ret),
14164			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
14165		i40e_aq_delete_element(&pf->hw, veb->seid, NULL);
14166		return -ENOENT;
14167	}
14168
14169	vsi->uplink_seid = veb->seid;
14170	vsi->veb_idx = veb->idx;
14171	vsi->flags |= I40E_VSI_FLAG_VEB_OWNER;
14172
14173	return 0;
14174}
14175
14176/**
14177 * i40e_veb_setup - Set up a VEB
14178 * @pf: board private structure
14179 * @flags: VEB setup flags
14180 * @uplink_seid: the switch element to link to
14181 * @vsi_seid: the initial VSI seid
14182 * @enabled_tc: Enabled TC bit-map
14183 *
14184 * This allocates the sw VEB structure and links it into the switch
14185 * It is possible and legal for this to be a duplicate of an already
14186 * existing VEB.  It is also possible for both uplink and vsi seids
14187 * to be zero, in order to create a floating VEB.
14188 *
14189 * Returns pointer to the successfully allocated VEB sw struct on
14190 * success, otherwise returns NULL on failure.
14191 **/
14192struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags,
14193				u16 uplink_seid, u16 vsi_seid,
14194				u8 enabled_tc)
14195{
14196	struct i40e_veb *veb, *uplink_veb = NULL;
14197	int vsi_idx, veb_idx;
14198	int ret;
14199
14200	/* if one seid is 0, the other must be 0 to create a floating relay */
14201	if ((uplink_seid == 0 || vsi_seid == 0) &&
14202	    (uplink_seid + vsi_seid != 0)) {
14203		dev_info(&pf->pdev->dev,
14204			 "one, not both seid's are 0: uplink=%d vsi=%d\n",
14205			 uplink_seid, vsi_seid);
14206		return NULL;
14207	}
14208
14209	/* make sure there is such a vsi and uplink */
14210	for (vsi_idx = 0; vsi_idx < pf->num_alloc_vsi; vsi_idx++)
14211		if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid)
14212			break;
14213	if (vsi_idx == pf->num_alloc_vsi && vsi_seid != 0) {
14214		dev_info(&pf->pdev->dev, "vsi seid %d not found\n",
14215			 vsi_seid);
14216		return NULL;
14217	}
14218
14219	if (uplink_seid && uplink_seid != pf->mac_seid) {
14220		for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) {
14221			if (pf->veb[veb_idx] &&
14222			    pf->veb[veb_idx]->seid == uplink_seid) {
14223				uplink_veb = pf->veb[veb_idx];
14224				break;
14225			}
14226		}
14227		if (!uplink_veb) {
14228			dev_info(&pf->pdev->dev,
14229				 "uplink seid %d not found\n", uplink_seid);
14230			return NULL;
14231		}
14232	}
14233
14234	/* get veb sw struct */
14235	veb_idx = i40e_veb_mem_alloc(pf);
14236	if (veb_idx < 0)
14237		goto err_alloc;
14238	veb = pf->veb[veb_idx];
14239	veb->flags = flags;
14240	veb->uplink_seid = uplink_seid;
14241	veb->veb_idx = (uplink_veb ? uplink_veb->idx : I40E_NO_VEB);
14242	veb->enabled_tc = (enabled_tc ? enabled_tc : 0x1);
14243
14244	/* create the VEB in the switch */
14245	ret = i40e_add_veb(veb, pf->vsi[vsi_idx]);
14246	if (ret)
14247		goto err_veb;
14248	if (vsi_idx == pf->lan_vsi)
14249		pf->lan_veb = veb->idx;
14250
14251	return veb;
14252
14253err_veb:
14254	i40e_veb_clear(veb);
14255err_alloc:
14256	return NULL;
14257}
14258
14259/**
14260 * i40e_setup_pf_switch_element - set PF vars based on switch type
14261 * @pf: board private structure
14262 * @ele: element we are building info from
14263 * @num_reported: total number of elements
14264 * @printconfig: should we print the contents
14265 *
14266 * helper function to assist in extracting a few useful SEID values.
14267 **/
14268static void i40e_setup_pf_switch_element(struct i40e_pf *pf,
14269				struct i40e_aqc_switch_config_element_resp *ele,
14270				u16 num_reported, bool printconfig)
14271{
14272	u16 downlink_seid = le16_to_cpu(ele->downlink_seid);
14273	u16 uplink_seid = le16_to_cpu(ele->uplink_seid);
14274	u8 element_type = ele->element_type;
14275	u16 seid = le16_to_cpu(ele->seid);
14276
14277	if (printconfig)
14278		dev_info(&pf->pdev->dev,
14279			 "type=%d seid=%d uplink=%d downlink=%d\n",
14280			 element_type, seid, uplink_seid, downlink_seid);
14281
14282	switch (element_type) {
14283	case I40E_SWITCH_ELEMENT_TYPE_MAC:
14284		pf->mac_seid = seid;
14285		break;
14286	case I40E_SWITCH_ELEMENT_TYPE_VEB:
14287		/* Main VEB? */
14288		if (uplink_seid != pf->mac_seid)
14289			break;
14290		if (pf->lan_veb >= I40E_MAX_VEB) {
14291			int v;
14292
14293			/* find existing or else empty VEB */
14294			for (v = 0; v < I40E_MAX_VEB; v++) {
14295				if (pf->veb[v] && (pf->veb[v]->seid == seid)) {
14296					pf->lan_veb = v;
14297					break;
14298				}
14299			}
14300			if (pf->lan_veb >= I40E_MAX_VEB) {
14301				v = i40e_veb_mem_alloc(pf);
14302				if (v < 0)
14303					break;
14304				pf->lan_veb = v;
14305			}
14306		}
14307		if (pf->lan_veb >= I40E_MAX_VEB)
14308			break;
14309
14310		pf->veb[pf->lan_veb]->seid = seid;
14311		pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid;
14312		pf->veb[pf->lan_veb]->pf = pf;
14313		pf->veb[pf->lan_veb]->veb_idx = I40E_NO_VEB;
14314		break;
14315	case I40E_SWITCH_ELEMENT_TYPE_VSI:
14316		if (num_reported != 1)
14317			break;
14318		/* This is immediately after a reset so we can assume this is
14319		 * the PF's VSI
14320		 */
14321		pf->mac_seid = uplink_seid;
14322		pf->pf_seid = downlink_seid;
14323		pf->main_vsi_seid = seid;
14324		if (printconfig)
14325			dev_info(&pf->pdev->dev,
14326				 "pf_seid=%d main_vsi_seid=%d\n",
14327				 pf->pf_seid, pf->main_vsi_seid);
14328		break;
14329	case I40E_SWITCH_ELEMENT_TYPE_PF:
14330	case I40E_SWITCH_ELEMENT_TYPE_VF:
14331	case I40E_SWITCH_ELEMENT_TYPE_EMP:
14332	case I40E_SWITCH_ELEMENT_TYPE_BMC:
14333	case I40E_SWITCH_ELEMENT_TYPE_PE:
14334	case I40E_SWITCH_ELEMENT_TYPE_PA:
14335		/* ignore these for now */
14336		break;
14337	default:
14338		dev_info(&pf->pdev->dev, "unknown element type=%d seid=%d\n",
14339			 element_type, seid);
14340		break;
14341	}
14342}
14343
14344/**
14345 * i40e_fetch_switch_configuration - Get switch config from firmware
14346 * @pf: board private structure
14347 * @printconfig: should we print the contents
14348 *
14349 * Get the current switch configuration from the device and
14350 * extract a few useful SEID values.
14351 **/
14352int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig)
14353{
14354	struct i40e_aqc_get_switch_config_resp *sw_config;
14355	u16 next_seid = 0;
14356	int ret = 0;
14357	u8 *aq_buf;
14358	int i;
14359
14360	aq_buf = kzalloc(I40E_AQ_LARGE_BUF, GFP_KERNEL);
14361	if (!aq_buf)
14362		return -ENOMEM;
14363
14364	sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf;
14365	do {
14366		u16 num_reported, num_total;
14367
14368		ret = i40e_aq_get_switch_config(&pf->hw, sw_config,
14369						I40E_AQ_LARGE_BUF,
14370						&next_seid, NULL);
14371		if (ret) {
14372			dev_info(&pf->pdev->dev,
14373				 "get switch config failed err %s aq_err %s\n",
14374				 i40e_stat_str(&pf->hw, ret),
14375				 i40e_aq_str(&pf->hw,
14376					     pf->hw.aq.asq_last_status));
14377			kfree(aq_buf);
14378			return -ENOENT;
14379		}
14380
14381		num_reported = le16_to_cpu(sw_config->header.num_reported);
14382		num_total = le16_to_cpu(sw_config->header.num_total);
14383
14384		if (printconfig)
14385			dev_info(&pf->pdev->dev,
14386				 "header: %d reported %d total\n",
14387				 num_reported, num_total);
14388
14389		for (i = 0; i < num_reported; i++) {
14390			struct i40e_aqc_switch_config_element_resp *ele =
14391				&sw_config->element[i];
14392
14393			i40e_setup_pf_switch_element(pf, ele, num_reported,
14394						     printconfig);
14395		}
14396	} while (next_seid != 0);
14397
14398	kfree(aq_buf);
14399	return ret;
14400}
14401
14402/**
14403 * i40e_setup_pf_switch - Setup the HW switch on startup or after reset
14404 * @pf: board private structure
14405 * @reinit: if the Main VSI needs to re-initialized.
14406 * @lock_acquired: indicates whether or not the lock has been acquired
14407 *
14408 * Returns 0 on success, negative value on failure
14409 **/
14410static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired)
14411{
14412	u16 flags = 0;
14413	int ret;
14414
14415	/* find out what's out there already */
14416	ret = i40e_fetch_switch_configuration(pf, false);
14417	if (ret) {
14418		dev_info(&pf->pdev->dev,
14419			 "couldn't fetch switch config, err %s aq_err %s\n",
14420			 i40e_stat_str(&pf->hw, ret),
14421			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
14422		return ret;
14423	}
14424	i40e_pf_reset_stats(pf);
14425
14426	/* set the switch config bit for the whole device to
14427	 * support limited promisc or true promisc
14428	 * when user requests promisc. The default is limited
14429	 * promisc.
14430	*/
14431
14432	if ((pf->hw.pf_id == 0) &&
14433	    !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
14434		flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
14435		pf->last_sw_conf_flags = flags;
14436	}
14437
14438	if (pf->hw.pf_id == 0) {
14439		u16 valid_flags;
14440
14441		valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
14442		ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0,
14443						NULL);
14444		if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
14445			dev_info(&pf->pdev->dev,
14446				 "couldn't set switch config bits, err %s aq_err %s\n",
14447				 i40e_stat_str(&pf->hw, ret),
14448				 i40e_aq_str(&pf->hw,
14449					     pf->hw.aq.asq_last_status));
14450			/* not a fatal problem, just keep going */
14451		}
14452		pf->last_sw_conf_valid_flags = valid_flags;
14453	}
14454
14455	/* first time setup */
14456	if (pf->lan_vsi == I40E_NO_VSI || reinit) {
14457		struct i40e_vsi *vsi = NULL;
14458		u16 uplink_seid;
14459
14460		/* Set up the PF VSI associated with the PF's main VSI
14461		 * that is already in the HW switch
14462		 */
14463		if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb])
14464			uplink_seid = pf->veb[pf->lan_veb]->seid;
14465		else
14466			uplink_seid = pf->mac_seid;
14467		if (pf->lan_vsi == I40E_NO_VSI)
14468			vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, uplink_seid, 0);
14469		else if (reinit)
14470			vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]);
14471		if (!vsi) {
14472			dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
14473			i40e_cloud_filter_exit(pf);
14474			i40e_fdir_teardown(pf);
14475			return -EAGAIN;
14476		}
14477	} else {
14478		/* force a reset of TC and queue layout configurations */
14479		u8 enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
14480
14481		pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
14482		pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
14483		i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
14484	}
14485	i40e_vlan_stripping_disable(pf->vsi[pf->lan_vsi]);
14486
14487	i40e_fdir_sb_setup(pf);
14488
14489	/* Setup static PF queue filter control settings */
14490	ret = i40e_setup_pf_filter_control(pf);
14491	if (ret) {
14492		dev_info(&pf->pdev->dev, "setup_pf_filter_control failed: %d\n",
14493			 ret);
14494		/* Failure here should not stop continuing other steps */
14495	}
14496
14497	/* enable RSS in the HW, even for only one queue, as the stack can use
14498	 * the hash
14499	 */
14500	if ((pf->flags & I40E_FLAG_RSS_ENABLED))
14501		i40e_pf_config_rss(pf);
14502
14503	/* fill in link information and enable LSE reporting */
14504	i40e_link_event(pf);
14505
14506	/* Initialize user-specific link properties */
14507	pf->fc_autoneg_status = ((pf->hw.phy.link_info.an_info &
14508				  I40E_AQ_AN_COMPLETED) ? true : false);
14509
14510	i40e_ptp_init(pf);
14511
14512	if (!lock_acquired)
14513		rtnl_lock();
14514
14515	/* repopulate tunnel port filters */
14516	udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev);
14517
14518	if (!lock_acquired)
14519		rtnl_unlock();
14520
14521	return ret;
14522}
14523
14524/**
14525 * i40e_determine_queue_usage - Work out queue distribution
14526 * @pf: board private structure
14527 **/
14528static void i40e_determine_queue_usage(struct i40e_pf *pf)
14529{
14530	int queues_left;
14531	int q_max;
14532
14533	pf->num_lan_qps = 0;
14534
14535	/* Find the max queues to be put into basic use.  We'll always be
14536	 * using TC0, whether or not DCB is running, and TC0 will get the
14537	 * big RSS set.
14538	 */
14539	queues_left = pf->hw.func_caps.num_tx_qp;
14540
14541	if ((queues_left == 1) ||
14542	    !(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
14543		/* one qp for PF, no queues for anything else */
14544		queues_left = 0;
14545		pf->alloc_rss_size = pf->num_lan_qps = 1;
14546
14547		/* make sure all the fancies are disabled */
14548		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
14549			       I40E_FLAG_IWARP_ENABLED	|
14550			       I40E_FLAG_FD_SB_ENABLED	|
14551			       I40E_FLAG_FD_ATR_ENABLED	|
14552			       I40E_FLAG_DCB_CAPABLE	|
14553			       I40E_FLAG_DCB_ENABLED	|
14554			       I40E_FLAG_SRIOV_ENABLED	|
14555			       I40E_FLAG_VMDQ_ENABLED);
14556		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
14557	} else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
14558				  I40E_FLAG_FD_SB_ENABLED |
14559				  I40E_FLAG_FD_ATR_ENABLED |
14560				  I40E_FLAG_DCB_CAPABLE))) {
14561		/* one qp for PF */
14562		pf->alloc_rss_size = pf->num_lan_qps = 1;
14563		queues_left -= pf->num_lan_qps;
14564
14565		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
14566			       I40E_FLAG_IWARP_ENABLED	|
14567			       I40E_FLAG_FD_SB_ENABLED	|
14568			       I40E_FLAG_FD_ATR_ENABLED	|
14569			       I40E_FLAG_DCB_ENABLED	|
14570			       I40E_FLAG_VMDQ_ENABLED);
14571		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
14572	} else {
14573		/* Not enough queues for all TCs */
14574		if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
14575		    (queues_left < I40E_MAX_TRAFFIC_CLASS)) {
14576			pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
14577					I40E_FLAG_DCB_ENABLED);
14578			dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
14579		}
14580
14581		/* limit lan qps to the smaller of qps, cpus or msix */
14582		q_max = max_t(int, pf->rss_size_max, num_online_cpus());
14583		q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp);
14584		q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors);
14585		pf->num_lan_qps = q_max;
14586
14587		queues_left -= pf->num_lan_qps;
14588	}
14589
14590	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
14591		if (queues_left > 1) {
14592			queues_left -= 1; /* save 1 queue for FD */
14593		} else {
14594			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
14595			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
14596			dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n");
14597		}
14598	}
14599
14600	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
14601	    pf->num_vf_qps && pf->num_req_vfs && queues_left) {
14602		pf->num_req_vfs = min_t(int, pf->num_req_vfs,
14603					(queues_left / pf->num_vf_qps));
14604		queues_left -= (pf->num_req_vfs * pf->num_vf_qps);
14605	}
14606
14607	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
14608	    pf->num_vmdq_vsis && pf->num_vmdq_qps && queues_left) {
14609		pf->num_vmdq_vsis = min_t(int, pf->num_vmdq_vsis,
14610					  (queues_left / pf->num_vmdq_qps));
14611		queues_left -= (pf->num_vmdq_vsis * pf->num_vmdq_qps);
14612	}
14613
14614	pf->queues_left = queues_left;
14615	dev_dbg(&pf->pdev->dev,
14616		"qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n",
14617		pf->hw.func_caps.num_tx_qp,
14618		!!(pf->flags & I40E_FLAG_FD_SB_ENABLED),
14619		pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs,
14620		pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps,
14621		queues_left);
14622}
14623
14624/**
14625 * i40e_setup_pf_filter_control - Setup PF static filter control
14626 * @pf: PF to be setup
14627 *
14628 * i40e_setup_pf_filter_control sets up a PF's initial filter control
14629 * settings. If PE/FCoE are enabled then it will also set the per PF
14630 * based filter sizes required for them. It also enables Flow director,
14631 * ethertype and macvlan type filter settings for the pf.
14632 *
14633 * Returns 0 on success, negative on failure
14634 **/
14635static int i40e_setup_pf_filter_control(struct i40e_pf *pf)
14636{
14637	struct i40e_filter_control_settings *settings = &pf->filter_settings;
14638
14639	settings->hash_lut_size = I40E_HASH_LUT_SIZE_128;
14640
14641	/* Flow Director is enabled */
14642	if (pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))
14643		settings->enable_fdir = true;
14644
14645	/* Ethtype and MACVLAN filters enabled for PF */
14646	settings->enable_ethtype = true;
14647	settings->enable_macvlan = true;
14648
14649	if (i40e_set_filter_control(&pf->hw, settings))
14650		return -ENOENT;
14651
14652	return 0;
14653}
14654
14655#define INFO_STRING_LEN 255
14656#define REMAIN(__x) (INFO_STRING_LEN - (__x))
14657static void i40e_print_features(struct i40e_pf *pf)
14658{
14659	struct i40e_hw *hw = &pf->hw;
14660	char *buf;
14661	int i;
14662
14663	buf = kmalloc(INFO_STRING_LEN, GFP_KERNEL);
14664	if (!buf)
14665		return;
14666
14667	i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id);
14668#ifdef CONFIG_PCI_IOV
14669	i += scnprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs);
14670#endif
14671	i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d",
14672		      pf->hw.func_caps.num_vsis,
14673		      pf->vsi[pf->lan_vsi]->num_queue_pairs);
14674	if (pf->flags & I40E_FLAG_RSS_ENABLED)
14675		i += scnprintf(&buf[i], REMAIN(i), " RSS");
14676	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
14677		i += scnprintf(&buf[i], REMAIN(i), " FD_ATR");
14678	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
14679		i += scnprintf(&buf[i], REMAIN(i), " FD_SB");
14680		i += scnprintf(&buf[i], REMAIN(i), " NTUPLE");
14681	}
14682	if (pf->flags & I40E_FLAG_DCB_CAPABLE)
14683		i += scnprintf(&buf[i], REMAIN(i), " DCB");
14684	i += scnprintf(&buf[i], REMAIN(i), " VxLAN");
14685	i += scnprintf(&buf[i], REMAIN(i), " Geneve");
14686	if (pf->flags & I40E_FLAG_PTP)
14687		i += scnprintf(&buf[i], REMAIN(i), " PTP");
14688	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
14689		i += scnprintf(&buf[i], REMAIN(i), " VEB");
14690	else
14691		i += scnprintf(&buf[i], REMAIN(i), " VEPA");
14692
14693	dev_info(&pf->pdev->dev, "%s\n", buf);
14694	kfree(buf);
14695	WARN_ON(i > INFO_STRING_LEN);
14696}
14697
14698/**
14699 * i40e_get_platform_mac_addr - get platform-specific MAC address
14700 * @pdev: PCI device information struct
14701 * @pf: board private structure
14702 *
14703 * Look up the MAC address for the device. First we'll try
14704 * eth_platform_get_mac_address, which will check Open Firmware, or arch
14705 * specific fallback. Otherwise, we'll default to the stored value in
14706 * firmware.
14707 **/
14708static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
14709{
14710	if (eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr))
14711		i40e_get_mac_addr(&pf->hw, pf->hw.mac.addr);
14712}
14713
14714/**
14715 * i40e_set_fec_in_flags - helper function for setting FEC options in flags
14716 * @fec_cfg: FEC option to set in flags
14717 * @flags: ptr to flags in which we set FEC option
14718 **/
14719void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags)
14720{
14721	if (fec_cfg & I40E_AQ_SET_FEC_AUTO)
14722		*flags |= I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC;
14723	if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) ||
14724	    (fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) {
14725		*flags |= I40E_FLAG_RS_FEC;
14726		*flags &= ~I40E_FLAG_BASE_R_FEC;
14727	}
14728	if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) ||
14729	    (fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) {
14730		*flags |= I40E_FLAG_BASE_R_FEC;
14731		*flags &= ~I40E_FLAG_RS_FEC;
14732	}
14733	if (fec_cfg == 0)
14734		*flags &= ~(I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC);
14735}
14736
14737/**
14738 * i40e_check_recovery_mode - check if we are running transition firmware
14739 * @pf: board private structure
14740 *
14741 * Check registers indicating the firmware runs in recovery mode. Sets the
14742 * appropriate driver state.
14743 *
14744 * Returns true if the recovery mode was detected, false otherwise
14745 **/
14746static bool i40e_check_recovery_mode(struct i40e_pf *pf)
14747{
14748	u32 val = rd32(&pf->hw, I40E_GL_FWSTS);
14749
14750	if (val & I40E_GL_FWSTS_FWS1B_MASK) {
14751		dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n");
14752		dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
14753		set_bit(__I40E_RECOVERY_MODE, pf->state);
14754
14755		return true;
14756	}
14757	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
14758		dev_info(&pf->pdev->dev, "Please do Power-On Reset to initialize adapter in normal mode with full functionality.\n");
14759
14760	return false;
14761}
14762
14763/**
14764 * i40e_pf_loop_reset - perform reset in a loop.
14765 * @pf: board private structure
14766 *
14767 * This function is useful when a NIC is about to enter recovery mode.
14768 * When a NIC's internal data structures are corrupted the NIC's
14769 * firmware is going to enter recovery mode.
14770 * Right after a POR it takes about 7 minutes for firmware to enter
14771 * recovery mode. Until that time a NIC is in some kind of intermediate
14772 * state. After that time period the NIC almost surely enters
14773 * recovery mode. The only way for a driver to detect intermediate
14774 * state is to issue a series of pf-resets and check a return value.
14775 * If a PF reset returns success then the firmware could be in recovery
14776 * mode so the caller of this code needs to check for recovery mode
14777 * if this function returns success. There is a little chance that
14778 * firmware will hang in intermediate state forever.
14779 * Since waiting 7 minutes is quite a lot of time this function waits
14780 * 10 seconds and then gives up by returning an error.
14781 *
14782 * Return 0 on success, negative on failure.
14783 **/
14784static i40e_status i40e_pf_loop_reset(struct i40e_pf *pf)
14785{
14786	/* wait max 10 seconds for PF reset to succeed */
14787	const unsigned long time_end = jiffies + 10 * HZ;
14788
14789	struct i40e_hw *hw = &pf->hw;
14790	i40e_status ret;
14791
14792	ret = i40e_pf_reset(hw);
14793	while (ret != I40E_SUCCESS && time_before(jiffies, time_end)) {
14794		usleep_range(10000, 20000);
14795		ret = i40e_pf_reset(hw);
14796	}
14797
14798	if (ret == I40E_SUCCESS)
14799		pf->pfr_count++;
14800	else
14801		dev_info(&pf->pdev->dev, "PF reset failed: %d\n", ret);
14802
14803	return ret;
14804}
14805
14806/**
14807 * i40e_check_fw_empr - check if FW issued unexpected EMP Reset
14808 * @pf: board private structure
14809 *
14810 * Check FW registers to determine if FW issued unexpected EMP Reset.
14811 * Every time when unexpected EMP Reset occurs the FW increments
14812 * a counter of unexpected EMP Resets. When the counter reaches 10
14813 * the FW should enter the Recovery mode
14814 *
14815 * Returns true if FW issued unexpected EMP Reset
14816 **/
14817static bool i40e_check_fw_empr(struct i40e_pf *pf)
14818{
14819	const u32 fw_sts = rd32(&pf->hw, I40E_GL_FWSTS) &
14820			   I40E_GL_FWSTS_FWS1B_MASK;
14821	return (fw_sts > I40E_GL_FWSTS_FWS1B_EMPR_0) &&
14822	       (fw_sts <= I40E_GL_FWSTS_FWS1B_EMPR_10);
14823}
14824
14825/**
14826 * i40e_handle_resets - handle EMP resets and PF resets
14827 * @pf: board private structure
14828 *
14829 * Handle both EMP resets and PF resets and conclude whether there are
14830 * any issues regarding these resets. If there are any issues then
14831 * generate log entry.
14832 *
14833 * Return 0 if NIC is healthy or negative value when there are issues
14834 * with resets
14835 **/
14836static i40e_status i40e_handle_resets(struct i40e_pf *pf)
14837{
14838	const i40e_status pfr = i40e_pf_loop_reset(pf);
14839	const bool is_empr = i40e_check_fw_empr(pf);
14840
14841	if (is_empr || pfr != I40E_SUCCESS)
14842		dev_crit(&pf->pdev->dev, "Entering recovery mode due to repeated FW resets. This may take several minutes. Refer to the Intel(R) Ethernet Adapters and Devices User Guide.\n");
14843
14844	return is_empr ? I40E_ERR_RESET_FAILED : pfr;
14845}
14846
14847/**
14848 * i40e_init_recovery_mode - initialize subsystems needed in recovery mode
14849 * @pf: board private structure
14850 * @hw: ptr to the hardware info
14851 *
14852 * This function does a minimal setup of all subsystems needed for running
14853 * recovery mode.
14854 *
14855 * Returns 0 on success, negative on failure
14856 **/
14857static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
14858{
14859	struct i40e_vsi *vsi;
14860	int err;
14861	int v_idx;
14862
14863	pci_set_drvdata(pf->pdev, pf);
14864	pci_save_state(pf->pdev);
14865
14866	/* set up periodic task facility */
14867	timer_setup(&pf->service_timer, i40e_service_timer, 0);
14868	pf->service_timer_period = HZ;
14869
14870	INIT_WORK(&pf->service_task, i40e_service_task);
14871	clear_bit(__I40E_SERVICE_SCHED, pf->state);
14872
14873	err = i40e_init_interrupt_scheme(pf);
14874	if (err)
14875		goto err_switch_setup;
14876
14877	/* The number of VSIs reported by the FW is the minimum guaranteed
14878	 * to us; HW supports far more and we share the remaining pool with
14879	 * the other PFs. We allocate space for more than the guarantee with
14880	 * the understanding that we might not get them all later.
14881	 */
14882	if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC)
14883		pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
14884	else
14885		pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
14886
14887	/* Set up the vsi struct and our local tracking of the MAIN PF vsi. */
14888	pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
14889			  GFP_KERNEL);
14890	if (!pf->vsi) {
14891		err = -ENOMEM;
14892		goto err_switch_setup;
14893	}
14894
14895	/* We allocate one VSI which is needed as absolute minimum
14896	 * in order to register the netdev
14897	 */
14898	v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN);
14899	if (v_idx < 0) {
14900		err = v_idx;
14901		goto err_switch_setup;
14902	}
14903	pf->lan_vsi = v_idx;
14904	vsi = pf->vsi[v_idx];
14905	if (!vsi) {
14906		err = -EFAULT;
14907		goto err_switch_setup;
14908	}
14909	vsi->alloc_queue_pairs = 1;
14910	err = i40e_config_netdev(vsi);
14911	if (err)
14912		goto err_switch_setup;
14913	err = register_netdev(vsi->netdev);
14914	if (err)
14915		goto err_switch_setup;
14916	vsi->netdev_registered = true;
14917	i40e_dbg_pf_init(pf);
14918
14919	err = i40e_setup_misc_vector_for_recovery_mode(pf);
14920	if (err)
14921		goto err_switch_setup;
14922
14923	/* tell the firmware that we're starting */
14924	i40e_send_version(pf);
14925
14926	/* since everything's happy, start the service_task timer */
14927	mod_timer(&pf->service_timer,
14928		  round_jiffies(jiffies + pf->service_timer_period));
14929
14930	return 0;
14931
14932err_switch_setup:
14933	i40e_reset_interrupt_capability(pf);
14934	del_timer_sync(&pf->service_timer);
14935	i40e_shutdown_adminq(hw);
14936	iounmap(hw->hw_addr);
14937	pci_disable_pcie_error_reporting(pf->pdev);
14938	pci_release_mem_regions(pf->pdev);
14939	pci_disable_device(pf->pdev);
14940	kfree(pf);
14941
14942	return err;
14943}
14944
14945/**
14946 * i40e_probe - Device initialization routine
14947 * @pdev: PCI device information struct
14948 * @ent: entry in i40e_pci_tbl
14949 *
14950 * i40e_probe initializes a PF identified by a pci_dev structure.
14951 * The OS initialization, configuring of the PF private structure,
14952 * and a hardware reset occur.
14953 *
14954 * Returns 0 on success, negative on failure
14955 **/
14956static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
14957{
14958	struct i40e_aq_get_phy_abilities_resp abilities;
14959	struct i40e_pf *pf;
14960	struct i40e_hw *hw;
14961	static u16 pfs_found;
14962	u16 wol_nvm_bits;
14963	u16 link_status;
14964	int err;
14965	u32 val;
14966	u32 i;
14967
14968	err = pci_enable_device_mem(pdev);
14969	if (err)
14970		return err;
14971
14972	/* set up for high or low dma */
14973	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
14974	if (err) {
14975		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
14976		if (err) {
14977			dev_err(&pdev->dev,
14978				"DMA configuration failed: 0x%x\n", err);
14979			goto err_dma;
14980		}
14981	}
14982
14983	/* set up pci connections */
14984	err = pci_request_mem_regions(pdev, i40e_driver_name);
14985	if (err) {
14986		dev_info(&pdev->dev,
14987			 "pci_request_selected_regions failed %d\n", err);
14988		goto err_pci_reg;
14989	}
14990
14991	pci_enable_pcie_error_reporting(pdev);
14992	pci_set_master(pdev);
14993
14994	/* Now that we have a PCI connection, we need to do the
14995	 * low level device setup.  This is primarily setting up
14996	 * the Admin Queue structures and then querying for the
14997	 * device's current profile information.
14998	 */
14999	pf = kzalloc(sizeof(*pf), GFP_KERNEL);
15000	if (!pf) {
15001		err = -ENOMEM;
15002		goto err_pf_alloc;
15003	}
15004	pf->next_vsi = 0;
15005	pf->pdev = pdev;
15006	set_bit(__I40E_DOWN, pf->state);
15007
15008	hw = &pf->hw;
15009	hw->back = pf;
15010
15011	pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
15012				I40E_MAX_CSR_SPACE);
15013	/* We believe that the highest register to read is
15014	 * I40E_GLGEN_STAT_CLEAR, so we check if the BAR size
15015	 * is not less than that before mapping to prevent a
15016	 * kernel panic.
15017	 */
15018	if (pf->ioremap_len < I40E_GLGEN_STAT_CLEAR) {
15019		dev_err(&pdev->dev, "Cannot map registers, bar size 0x%X too small, aborting\n",
15020			pf->ioremap_len);
15021		err = -ENOMEM;
15022		goto err_ioremap;
15023	}
15024	hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len);
15025	if (!hw->hw_addr) {
15026		err = -EIO;
15027		dev_info(&pdev->dev, "ioremap(0x%04x, 0x%04x) failed: 0x%x\n",
15028			 (unsigned int)pci_resource_start(pdev, 0),
15029			 pf->ioremap_len, err);
15030		goto err_ioremap;
15031	}
15032	hw->vendor_id = pdev->vendor;
15033	hw->device_id = pdev->device;
15034	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
15035	hw->subsystem_vendor_id = pdev->subsystem_vendor;
15036	hw->subsystem_device_id = pdev->subsystem_device;
15037	hw->bus.device = PCI_SLOT(pdev->devfn);
15038	hw->bus.func = PCI_FUNC(pdev->devfn);
15039	hw->bus.bus_id = pdev->bus->number;
15040	pf->instance = pfs_found;
15041
15042	/* Select something other than the 802.1ad ethertype for the
15043	 * switch to use internally and drop on ingress.
15044	 */
15045	hw->switch_tag = 0xffff;
15046	hw->first_tag = ETH_P_8021AD;
15047	hw->second_tag = ETH_P_8021Q;
15048
15049	INIT_LIST_HEAD(&pf->l3_flex_pit_list);
15050	INIT_LIST_HEAD(&pf->l4_flex_pit_list);
15051	INIT_LIST_HEAD(&pf->ddp_old_prof);
15052
15053	/* set up the locks for the AQ, do this only once in probe
15054	 * and destroy them only once in remove
15055	 */
15056	mutex_init(&hw->aq.asq_mutex);
15057	mutex_init(&hw->aq.arq_mutex);
15058
15059	pf->msg_enable = netif_msg_init(debug,
15060					NETIF_MSG_DRV |
15061					NETIF_MSG_PROBE |
15062					NETIF_MSG_LINK);
15063	if (debug < -1)
15064		pf->hw.debug_mask = debug;
15065
15066	/* do a special CORER for clearing PXE mode once at init */
15067	if (hw->revision_id == 0 &&
15068	    (rd32(hw, I40E_GLLAN_RCTL_0) & I40E_GLLAN_RCTL_0_PXE_MODE_MASK)) {
15069		wr32(hw, I40E_GLGEN_RTRIG, I40E_GLGEN_RTRIG_CORER_MASK);
15070		i40e_flush(hw);
15071		msleep(200);
15072		pf->corer_count++;
15073
15074		i40e_clear_pxe_mode(hw);
15075	}
15076
15077	/* Reset here to make sure all is clean and to define PF 'n' */
15078	i40e_clear_hw(hw);
15079
15080	err = i40e_set_mac_type(hw);
15081	if (err) {
15082		dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n",
15083			 err);
15084		goto err_pf_reset;
15085	}
15086
15087	err = i40e_handle_resets(pf);
15088	if (err)
15089		goto err_pf_reset;
15090
15091	i40e_check_recovery_mode(pf);
15092
15093	hw->aq.num_arq_entries = I40E_AQ_LEN;
15094	hw->aq.num_asq_entries = I40E_AQ_LEN;
15095	hw->aq.arq_buf_size = I40E_MAX_AQ_BUF_SIZE;
15096	hw->aq.asq_buf_size = I40E_MAX_AQ_BUF_SIZE;
15097	pf->adminq_work_limit = I40E_AQ_WORK_LIMIT;
15098
15099	snprintf(pf->int_name, sizeof(pf->int_name) - 1,
15100		 "%s-%s:misc",
15101		 dev_driver_string(&pf->pdev->dev), dev_name(&pdev->dev));
15102
15103	err = i40e_init_shared_code(hw);
15104	if (err) {
15105		dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n",
15106			 err);
15107		goto err_pf_reset;
15108	}
15109
15110	/* set up a default setting for link flow control */
15111	pf->hw.fc.requested_mode = I40E_FC_NONE;
15112
15113	err = i40e_init_adminq(hw);
15114	if (err) {
15115		if (err == I40E_ERR_FIRMWARE_API_VERSION)
15116			dev_info(&pdev->dev,
15117				 "The driver for the device stopped because the NVM image v%u.%u is newer than expected v%u.%u. You must install the most recent version of the network driver.\n",
15118				 hw->aq.api_maj_ver,
15119				 hw->aq.api_min_ver,
15120				 I40E_FW_API_VERSION_MAJOR,
15121				 I40E_FW_MINOR_VERSION(hw));
15122		else
15123			dev_info(&pdev->dev,
15124				 "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n");
15125
15126		goto err_pf_reset;
15127	}
15128	i40e_get_oem_version(hw);
15129
15130	/* provide nvm, fw, api versions, vendor:device id, subsys vendor:device id */
15131	dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s [%04x:%04x] [%04x:%04x]\n",
15132		 hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build,
15133		 hw->aq.api_maj_ver, hw->aq.api_min_ver,
15134		 i40e_nvm_version_str(hw), hw->vendor_id, hw->device_id,
15135		 hw->subsystem_vendor_id, hw->subsystem_device_id);
15136
15137	if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
15138	    hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
15139		dev_dbg(&pdev->dev,
15140			"The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n",
15141			 hw->aq.api_maj_ver,
15142			 hw->aq.api_min_ver,
15143			 I40E_FW_API_VERSION_MAJOR,
15144			 I40E_FW_MINOR_VERSION(hw));
15145	else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
15146		dev_info(&pdev->dev,
15147			 "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n",
15148			 hw->aq.api_maj_ver,
15149			 hw->aq.api_min_ver,
15150			 I40E_FW_API_VERSION_MAJOR,
15151			 I40E_FW_MINOR_VERSION(hw));
15152
15153	i40e_verify_eeprom(pf);
15154
15155	/* Rev 0 hardware was never productized */
15156	if (hw->revision_id < 1)
15157		dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n");
15158
15159	i40e_clear_pxe_mode(hw);
15160
15161	err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
15162	if (err)
15163		goto err_adminq_setup;
15164
15165	err = i40e_sw_init(pf);
15166	if (err) {
15167		dev_info(&pdev->dev, "sw_init failed: %d\n", err);
15168		goto err_sw_init;
15169	}
15170
15171	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
15172		return i40e_init_recovery_mode(pf, hw);
15173
15174	err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
15175				hw->func_caps.num_rx_qp, 0, 0);
15176	if (err) {
15177		dev_info(&pdev->dev, "init_lan_hmc failed: %d\n", err);
15178		goto err_init_lan_hmc;
15179	}
15180
15181	err = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY);
15182	if (err) {
15183		dev_info(&pdev->dev, "configure_lan_hmc failed: %d\n", err);
15184		err = -ENOENT;
15185		goto err_configure_lan_hmc;
15186	}
15187
15188	/* Disable LLDP for NICs that have firmware versions lower than v4.3.
15189	 * Ignore error return codes because if it was already disabled via
15190	 * hardware settings this will fail
15191	 */
15192	if (pf->hw_features & I40E_HW_STOP_FW_LLDP) {
15193		dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n");
15194		i40e_aq_stop_lldp(hw, true, false, NULL);
15195	}
15196
15197	/* allow a platform config to override the HW addr */
15198	i40e_get_platform_mac_addr(pdev, pf);
15199
15200	if (!is_valid_ether_addr(hw->mac.addr)) {
15201		dev_info(&pdev->dev, "invalid MAC address %pM\n", hw->mac.addr);
15202		err = -EIO;
15203		goto err_mac_addr;
15204	}
15205	dev_info(&pdev->dev, "MAC address: %pM\n", hw->mac.addr);
15206	ether_addr_copy(hw->mac.perm_addr, hw->mac.addr);
15207	i40e_get_port_mac_addr(hw, hw->mac.port_addr);
15208	if (is_valid_ether_addr(hw->mac.port_addr))
15209		pf->hw_features |= I40E_HW_PORT_ID_VALID;
15210
15211	pci_set_drvdata(pdev, pf);
15212	pci_save_state(pdev);
15213
15214	dev_info(&pdev->dev,
15215		 (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ?
15216			"FW LLDP is disabled\n" :
15217			"FW LLDP is enabled\n");
15218
15219	/* Enable FW to write default DCB config on link-up */
15220	i40e_aq_set_dcb_parameters(hw, true, NULL);
15221
15222#ifdef CONFIG_I40E_DCB
15223	err = i40e_init_pf_dcb(pf);
15224	if (err) {
15225		dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err);
15226		pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | I40E_FLAG_DCB_ENABLED);
15227		/* Continue without DCB enabled */
15228	}
15229#endif /* CONFIG_I40E_DCB */
15230
15231	/* set up periodic task facility */
15232	timer_setup(&pf->service_timer, i40e_service_timer, 0);
15233	pf->service_timer_period = HZ;
15234
15235	INIT_WORK(&pf->service_task, i40e_service_task);
15236	clear_bit(__I40E_SERVICE_SCHED, pf->state);
15237
15238	/* NVM bit on means WoL disabled for the port */
15239	i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits);
15240	if (BIT (hw->port) & wol_nvm_bits || hw->partition_id != 1)
15241		pf->wol_en = false;
15242	else
15243		pf->wol_en = true;
15244	device_set_wakeup_enable(&pf->pdev->dev, pf->wol_en);
15245
15246	/* set up the main switch operations */
15247	i40e_determine_queue_usage(pf);
15248	err = i40e_init_interrupt_scheme(pf);
15249	if (err)
15250		goto err_switch_setup;
15251
15252	pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port;
15253	pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port;
15254	pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
15255	pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared;
15256	pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS;
15257	pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN |
15258						    UDP_TUNNEL_TYPE_GENEVE;
15259
15260	/* The number of VSIs reported by the FW is the minimum guaranteed
15261	 * to us; HW supports far more and we share the remaining pool with
15262	 * the other PFs. We allocate space for more than the guarantee with
15263	 * the understanding that we might not get them all later.
15264	 */
15265	if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC)
15266		pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
15267	else
15268		pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
15269	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
15270		dev_warn(&pf->pdev->dev,
15271			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
15272			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
15273		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
15274	}
15275
15276	/* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */
15277	pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
15278			  GFP_KERNEL);
15279	if (!pf->vsi) {
15280		err = -ENOMEM;
15281		goto err_switch_setup;
15282	}
15283
15284#ifdef CONFIG_PCI_IOV
15285	/* prep for VF support */
15286	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
15287	    (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
15288	    !test_bit(__I40E_BAD_EEPROM, pf->state)) {
15289		if (pci_num_vf(pdev))
15290			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
15291	}
15292#endif
15293	err = i40e_setup_pf_switch(pf, false, false);
15294	if (err) {
15295		dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
15296		goto err_vsis;
15297	}
15298	INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list);
15299
15300	/* if FDIR VSI was set up, start it now */
15301	for (i = 0; i < pf->num_alloc_vsi; i++) {
15302		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
15303			i40e_vsi_open(pf->vsi[i]);
15304			break;
15305		}
15306	}
15307
15308	/* The driver only wants link up/down and module qualification
15309	 * reports from firmware.  Note the negative logic.
15310	 */
15311	err = i40e_aq_set_phy_int_mask(&pf->hw,
15312				       ~(I40E_AQ_EVENT_LINK_UPDOWN |
15313					 I40E_AQ_EVENT_MEDIA_NA |
15314					 I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
15315	if (err)
15316		dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n",
15317			 i40e_stat_str(&pf->hw, err),
15318			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
15319
15320	/* Reconfigure hardware for allowing smaller MSS in the case
15321	 * of TSO, so that we avoid the MDD being fired and causing
15322	 * a reset in the case of small MSS+TSO.
15323	 */
15324	val = rd32(hw, I40E_REG_MSS);
15325	if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) {
15326		val &= ~I40E_REG_MSS_MIN_MASK;
15327		val |= I40E_64BYTE_MSS;
15328		wr32(hw, I40E_REG_MSS, val);
15329	}
15330
15331	if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
15332		msleep(75);
15333		err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
15334		if (err)
15335			dev_info(&pf->pdev->dev, "link restart failed, err %s aq_err %s\n",
15336				 i40e_stat_str(&pf->hw, err),
15337				 i40e_aq_str(&pf->hw,
15338					     pf->hw.aq.asq_last_status));
15339	}
15340	/* The main driver is (mostly) up and happy. We need to set this state
15341	 * before setting up the misc vector or we get a race and the vector
15342	 * ends up disabled forever.
15343	 */
15344	clear_bit(__I40E_DOWN, pf->state);
15345
15346	/* In case of MSIX we are going to setup the misc vector right here
15347	 * to handle admin queue events etc. In case of legacy and MSI
15348	 * the misc functionality and queue processing is combined in
15349	 * the same vector and that gets setup at open.
15350	 */
15351	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
15352		err = i40e_setup_misc_vector(pf);
15353		if (err) {
15354			dev_info(&pdev->dev,
15355				 "setup of misc vector failed: %d\n", err);
15356			i40e_cloud_filter_exit(pf);
15357			i40e_fdir_teardown(pf);
15358			goto err_vsis;
15359		}
15360	}
15361
15362#ifdef CONFIG_PCI_IOV
15363	/* prep for VF support */
15364	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
15365	    (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
15366	    !test_bit(__I40E_BAD_EEPROM, pf->state)) {
15367		/* disable link interrupts for VFs */
15368		val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM);
15369		val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK;
15370		wr32(hw, I40E_PFGEN_PORTMDIO_NUM, val);
15371		i40e_flush(hw);
15372
15373		if (pci_num_vf(pdev)) {
15374			dev_info(&pdev->dev,
15375				 "Active VFs found, allocating resources.\n");
15376			err = i40e_alloc_vfs(pf, pci_num_vf(pdev));
15377			if (err)
15378				dev_info(&pdev->dev,
15379					 "Error %d allocating resources for existing VFs\n",
15380					 err);
15381		}
15382	}
15383#endif /* CONFIG_PCI_IOV */
15384
15385	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
15386		pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile,
15387						      pf->num_iwarp_msix,
15388						      I40E_IWARP_IRQ_PILE_ID);
15389		if (pf->iwarp_base_vector < 0) {
15390			dev_info(&pdev->dev,
15391				 "failed to get tracking for %d vectors for IWARP err=%d\n",
15392				 pf->num_iwarp_msix, pf->iwarp_base_vector);
15393			pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
15394		}
15395	}
15396
15397	i40e_dbg_pf_init(pf);
15398
15399	/* tell the firmware that we're starting */
15400	i40e_send_version(pf);
15401
15402	/* since everything's happy, start the service_task timer */
15403	mod_timer(&pf->service_timer,
15404		  round_jiffies(jiffies + pf->service_timer_period));
15405
15406	/* add this PF to client device list and launch a client service task */
15407	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
15408		err = i40e_lan_add_device(pf);
15409		if (err)
15410			dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
15411				 err);
15412	}
15413
15414#define PCI_SPEED_SIZE 8
15415#define PCI_WIDTH_SIZE 8
15416	/* Devices on the IOSF bus do not have this information
15417	 * and will report PCI Gen 1 x 1 by default so don't bother
15418	 * checking them.
15419	 */
15420	if (!(pf->hw_features & I40E_HW_NO_PCI_LINK_CHECK)) {
15421		char speed[PCI_SPEED_SIZE] = "Unknown";
15422		char width[PCI_WIDTH_SIZE] = "Unknown";
15423
15424		/* Get the negotiated link width and speed from PCI config
15425		 * space
15426		 */
15427		pcie_capability_read_word(pf->pdev, PCI_EXP_LNKSTA,
15428					  &link_status);
15429
15430		i40e_set_pci_config_data(hw, link_status);
15431
15432		switch (hw->bus.speed) {
15433		case i40e_bus_speed_8000:
15434			strlcpy(speed, "8.0", PCI_SPEED_SIZE); break;
15435		case i40e_bus_speed_5000:
15436			strlcpy(speed, "5.0", PCI_SPEED_SIZE); break;
15437		case i40e_bus_speed_2500:
15438			strlcpy(speed, "2.5", PCI_SPEED_SIZE); break;
15439		default:
15440			break;
15441		}
15442		switch (hw->bus.width) {
15443		case i40e_bus_width_pcie_x8:
15444			strlcpy(width, "8", PCI_WIDTH_SIZE); break;
15445		case i40e_bus_width_pcie_x4:
15446			strlcpy(width, "4", PCI_WIDTH_SIZE); break;
15447		case i40e_bus_width_pcie_x2:
15448			strlcpy(width, "2", PCI_WIDTH_SIZE); break;
15449		case i40e_bus_width_pcie_x1:
15450			strlcpy(width, "1", PCI_WIDTH_SIZE); break;
15451		default:
15452			break;
15453		}
15454
15455		dev_info(&pdev->dev, "PCI-Express: Speed %sGT/s Width x%s\n",
15456			 speed, width);
15457
15458		if (hw->bus.width < i40e_bus_width_pcie_x8 ||
15459		    hw->bus.speed < i40e_bus_speed_8000) {
15460			dev_warn(&pdev->dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
15461			dev_warn(&pdev->dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
15462		}
15463	}
15464
15465	/* get the requested speeds from the fw */
15466	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL);
15467	if (err)
15468		dev_dbg(&pf->pdev->dev, "get requested speeds ret =  %s last_status =  %s\n",
15469			i40e_stat_str(&pf->hw, err),
15470			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
15471	pf->hw.phy.link_info.requested_speeds = abilities.link_speed;
15472
15473	/* set the FEC config due to the board capabilities */
15474	i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, &pf->flags);
15475
15476	/* get the supported phy types from the fw */
15477	err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL);
15478	if (err)
15479		dev_dbg(&pf->pdev->dev, "get supported phy types ret =  %s last_status =  %s\n",
15480			i40e_stat_str(&pf->hw, err),
15481			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
15482
15483	/* make sure the MFS hasn't been set lower than the default */
15484#define MAX_FRAME_SIZE_DEFAULT 0x2600
15485	val = (rd32(&pf->hw, I40E_PRTGL_SAH) &
15486	       I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT;
15487	if (val < MAX_FRAME_SIZE_DEFAULT)
15488		dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n",
15489			 pf->hw.port, val);
15490
15491	/* Add a filter to drop all Flow control frames from any VSI from being
15492	 * transmitted. By doing so we stop a malicious VF from sending out
15493	 * PAUSE or PFC frames and potentially controlling traffic for other
15494	 * PF/VF VSIs.
15495	 * The FW can still send Flow control frames if enabled.
15496	 */
15497	i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
15498						       pf->main_vsi_seid);
15499
15500	if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
15501		(pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
15502		pf->hw_features |= I40E_HW_PHY_CONTROLS_LEDS;
15503	if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722)
15504		pf->hw_features |= I40E_HW_HAVE_CRT_RETIMER;
15505	/* print a string summarizing features */
15506	i40e_print_features(pf);
15507
15508	return 0;
15509
15510	/* Unwind what we've done if something failed in the setup */
15511err_vsis:
15512	set_bit(__I40E_DOWN, pf->state);
15513	i40e_clear_interrupt_scheme(pf);
15514	kfree(pf->vsi);
15515err_switch_setup:
15516	i40e_reset_interrupt_capability(pf);
15517	del_timer_sync(&pf->service_timer);
15518err_mac_addr:
15519err_configure_lan_hmc:
15520	(void)i40e_shutdown_lan_hmc(hw);
15521err_init_lan_hmc:
15522	kfree(pf->qp_pile);
15523err_sw_init:
15524err_adminq_setup:
15525err_pf_reset:
15526	iounmap(hw->hw_addr);
15527err_ioremap:
15528	kfree(pf);
15529err_pf_alloc:
15530	pci_disable_pcie_error_reporting(pdev);
15531	pci_release_mem_regions(pdev);
15532err_pci_reg:
15533err_dma:
15534	pci_disable_device(pdev);
15535	return err;
15536}
15537
15538/**
15539 * i40e_remove - Device removal routine
15540 * @pdev: PCI device information struct
15541 *
15542 * i40e_remove is called by the PCI subsystem to alert the driver
15543 * that is should release a PCI device.  This could be caused by a
15544 * Hot-Plug event, or because the driver is going to be removed from
15545 * memory.
15546 **/
15547static void i40e_remove(struct pci_dev *pdev)
15548{
15549	struct i40e_pf *pf = pci_get_drvdata(pdev);
15550	struct i40e_hw *hw = &pf->hw;
15551	i40e_status ret_code;
15552	int i;
15553
15554	i40e_dbg_pf_exit(pf);
15555
15556	i40e_ptp_stop(pf);
15557
15558	/* Disable RSS in hw */
15559	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0);
15560	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0);
15561
15562	while (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
15563		usleep_range(1000, 2000);
15564
15565	if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
15566		set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
15567		i40e_free_vfs(pf);
15568		pf->flags &= ~I40E_FLAG_SRIOV_ENABLED;
15569	}
15570	/* no more scheduling of any task */
15571	set_bit(__I40E_SUSPENDED, pf->state);
15572	set_bit(__I40E_DOWN, pf->state);
15573	if (pf->service_timer.function)
15574		del_timer_sync(&pf->service_timer);
15575	if (pf->service_task.func)
15576		cancel_work_sync(&pf->service_task);
15577
15578	if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
15579		struct i40e_vsi *vsi = pf->vsi[0];
15580
15581		/* We know that we have allocated only one vsi for this PF,
15582		 * it was just for registering netdevice, so the interface
15583		 * could be visible in the 'ifconfig' output
15584		 */
15585		unregister_netdev(vsi->netdev);
15586		free_netdev(vsi->netdev);
15587
15588		goto unmap;
15589	}
15590
15591	/* Client close must be called explicitly here because the timer
15592	 * has been stopped.
15593	 */
15594	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
15595
15596	i40e_fdir_teardown(pf);
15597
15598	/* If there is a switch structure or any orphans, remove them.
15599	 * This will leave only the PF's VSI remaining.
15600	 */
15601	for (i = 0; i < I40E_MAX_VEB; i++) {
15602		if (!pf->veb[i])
15603			continue;
15604
15605		if (pf->veb[i]->uplink_seid == pf->mac_seid ||
15606		    pf->veb[i]->uplink_seid == 0)
15607			i40e_switch_branch_release(pf->veb[i]);
15608	}
15609
15610	/* Now we can shutdown the PF's VSIs, just before we kill
15611	 * adminq and hmc.
15612	 */
15613	for (i = pf->num_alloc_vsi; i--;)
15614		if (pf->vsi[i]) {
15615			i40e_vsi_close(pf->vsi[i]);
15616			i40e_vsi_release(pf->vsi[i]);
15617			pf->vsi[i] = NULL;
15618		}
15619
15620	i40e_cloud_filter_exit(pf);
15621
15622	/* remove attached clients */
15623	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
15624		ret_code = i40e_lan_del_device(pf);
15625		if (ret_code)
15626			dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
15627				 ret_code);
15628	}
15629
15630	/* shutdown and destroy the HMC */
15631	if (hw->hmc.hmc_obj) {
15632		ret_code = i40e_shutdown_lan_hmc(hw);
15633		if (ret_code)
15634			dev_warn(&pdev->dev,
15635				 "Failed to destroy the HMC resources: %d\n",
15636				 ret_code);
15637	}
15638
15639unmap:
15640	/* Free MSI/legacy interrupt 0 when in recovery mode. */
15641	if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
15642	    !(pf->flags & I40E_FLAG_MSIX_ENABLED))
15643		free_irq(pf->pdev->irq, pf);
15644
15645	/* shutdown the adminq */
15646	i40e_shutdown_adminq(hw);
15647
15648	/* destroy the locks only once, here */
15649	mutex_destroy(&hw->aq.arq_mutex);
15650	mutex_destroy(&hw->aq.asq_mutex);
15651
15652	/* Clear all dynamic memory lists of rings, q_vectors, and VSIs */
15653	rtnl_lock();
15654	i40e_clear_interrupt_scheme(pf);
15655	for (i = 0; i < pf->num_alloc_vsi; i++) {
15656		if (pf->vsi[i]) {
15657			if (!test_bit(__I40E_RECOVERY_MODE, pf->state))
15658				i40e_vsi_clear_rings(pf->vsi[i]);
15659			i40e_vsi_clear(pf->vsi[i]);
15660			pf->vsi[i] = NULL;
15661		}
15662	}
15663	rtnl_unlock();
15664
15665	for (i = 0; i < I40E_MAX_VEB; i++) {
15666		kfree(pf->veb[i]);
15667		pf->veb[i] = NULL;
15668	}
15669
15670	kfree(pf->qp_pile);
15671	kfree(pf->vsi);
15672
15673	iounmap(hw->hw_addr);
15674	kfree(pf);
15675	pci_release_mem_regions(pdev);
15676
15677	pci_disable_pcie_error_reporting(pdev);
15678	pci_disable_device(pdev);
15679}
15680
15681/**
15682 * i40e_pci_error_detected - warning that something funky happened in PCI land
15683 * @pdev: PCI device information struct
15684 * @error: the type of PCI error
15685 *
15686 * Called to warn that something happened and the error handling steps
15687 * are in progress.  Allows the driver to quiesce things, be ready for
15688 * remediation.
15689 **/
15690static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev,
15691						pci_channel_state_t error)
15692{
15693	struct i40e_pf *pf = pci_get_drvdata(pdev);
15694
15695	dev_info(&pdev->dev, "%s: error %d\n", __func__, error);
15696
15697	if (!pf) {
15698		dev_info(&pdev->dev,
15699			 "Cannot recover - error happened during device probe\n");
15700		return PCI_ERS_RESULT_DISCONNECT;
15701	}
15702
15703	/* shutdown all operations */
15704	if (!test_bit(__I40E_SUSPENDED, pf->state))
15705		i40e_prep_for_reset(pf, false);
15706
15707	/* Request a slot reset */
15708	return PCI_ERS_RESULT_NEED_RESET;
15709}
15710
15711/**
15712 * i40e_pci_error_slot_reset - a PCI slot reset just happened
15713 * @pdev: PCI device information struct
15714 *
15715 * Called to find if the driver can work with the device now that
15716 * the pci slot has been reset.  If a basic connection seems good
15717 * (registers are readable and have sane content) then return a
15718 * happy little PCI_ERS_RESULT_xxx.
15719 **/
15720static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev)
15721{
15722	struct i40e_pf *pf = pci_get_drvdata(pdev);
15723	pci_ers_result_t result;
15724	u32 reg;
15725
15726	dev_dbg(&pdev->dev, "%s\n", __func__);
15727	if (pci_enable_device_mem(pdev)) {
15728		dev_info(&pdev->dev,
15729			 "Cannot re-enable PCI device after reset.\n");
15730		result = PCI_ERS_RESULT_DISCONNECT;
15731	} else {
15732		pci_set_master(pdev);
15733		pci_restore_state(pdev);
15734		pci_save_state(pdev);
15735		pci_wake_from_d3(pdev, false);
15736
15737		reg = rd32(&pf->hw, I40E_GLGEN_RTRIG);
15738		if (reg == 0)
15739			result = PCI_ERS_RESULT_RECOVERED;
15740		else
15741			result = PCI_ERS_RESULT_DISCONNECT;
15742	}
15743
15744	return result;
15745}
15746
15747/**
15748 * i40e_pci_error_reset_prepare - prepare device driver for pci reset
15749 * @pdev: PCI device information struct
15750 */
15751static void i40e_pci_error_reset_prepare(struct pci_dev *pdev)
15752{
15753	struct i40e_pf *pf = pci_get_drvdata(pdev);
15754
15755	i40e_prep_for_reset(pf, false);
15756}
15757
15758/**
15759 * i40e_pci_error_reset_done - pci reset done, device driver reset can begin
15760 * @pdev: PCI device information struct
15761 */
15762static void i40e_pci_error_reset_done(struct pci_dev *pdev)
15763{
15764	struct i40e_pf *pf = pci_get_drvdata(pdev);
15765
15766	i40e_reset_and_rebuild(pf, false, false);
15767#ifdef CONFIG_PCI_IOV
15768	i40e_restore_all_vfs_msi_state(pdev);
15769#endif /* CONFIG_PCI_IOV */
15770}
15771
15772/**
15773 * i40e_pci_error_resume - restart operations after PCI error recovery
15774 * @pdev: PCI device information struct
15775 *
15776 * Called to allow the driver to bring things back up after PCI error
15777 * and/or reset recovery has finished.
15778 **/
15779static void i40e_pci_error_resume(struct pci_dev *pdev)
15780{
15781	struct i40e_pf *pf = pci_get_drvdata(pdev);
15782
15783	dev_dbg(&pdev->dev, "%s\n", __func__);
15784	if (test_bit(__I40E_SUSPENDED, pf->state))
15785		return;
15786
15787	i40e_handle_reset_warning(pf, false);
15788}
15789
15790/**
15791 * i40e_enable_mc_magic_wake - enable multicast magic packet wake up
15792 * using the mac_address_write admin q function
15793 * @pf: pointer to i40e_pf struct
15794 **/
15795static void i40e_enable_mc_magic_wake(struct i40e_pf *pf)
15796{
15797	struct i40e_hw *hw = &pf->hw;
15798	i40e_status ret;
15799	u8 mac_addr[6];
15800	u16 flags = 0;
15801
15802	/* Get current MAC address in case it's an LAA */
15803	if (pf->vsi[pf->lan_vsi] && pf->vsi[pf->lan_vsi]->netdev) {
15804		ether_addr_copy(mac_addr,
15805				pf->vsi[pf->lan_vsi]->netdev->dev_addr);
15806	} else {
15807		dev_err(&pf->pdev->dev,
15808			"Failed to retrieve MAC address; using default\n");
15809		ether_addr_copy(mac_addr, hw->mac.addr);
15810	}
15811
15812	/* The FW expects the mac address write cmd to first be called with
15813	 * one of these flags before calling it again with the multicast
15814	 * enable flags.
15815	 */
15816	flags = I40E_AQC_WRITE_TYPE_LAA_WOL;
15817
15818	if (hw->func_caps.flex10_enable && hw->partition_id != 1)
15819		flags = I40E_AQC_WRITE_TYPE_LAA_ONLY;
15820
15821	ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
15822	if (ret) {
15823		dev_err(&pf->pdev->dev,
15824			"Failed to update MAC address registers; cannot enable Multicast Magic packet wake up");
15825		return;
15826	}
15827
15828	flags = I40E_AQC_MC_MAG_EN
15829			| I40E_AQC_WOL_PRESERVE_ON_PFR
15830			| I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG;
15831	ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
15832	if (ret)
15833		dev_err(&pf->pdev->dev,
15834			"Failed to enable Multicast Magic Packet wake up\n");
15835}
15836
15837/**
15838 * i40e_shutdown - PCI callback for shutting down
15839 * @pdev: PCI device information struct
15840 **/
15841static void i40e_shutdown(struct pci_dev *pdev)
15842{
15843	struct i40e_pf *pf = pci_get_drvdata(pdev);
15844	struct i40e_hw *hw = &pf->hw;
15845
15846	set_bit(__I40E_SUSPENDED, pf->state);
15847	set_bit(__I40E_DOWN, pf->state);
15848
15849	del_timer_sync(&pf->service_timer);
15850	cancel_work_sync(&pf->service_task);
15851	i40e_cloud_filter_exit(pf);
15852	i40e_fdir_teardown(pf);
15853
15854	/* Client close must be called explicitly here because the timer
15855	 * has been stopped.
15856	 */
15857	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
15858
15859	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
15860		i40e_enable_mc_magic_wake(pf);
15861
15862	i40e_prep_for_reset(pf, false);
15863
15864	wr32(hw, I40E_PFPM_APM,
15865	     (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
15866	wr32(hw, I40E_PFPM_WUFC,
15867	     (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
15868
15869	/* Free MSI/legacy interrupt 0 when in recovery mode. */
15870	if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
15871	    !(pf->flags & I40E_FLAG_MSIX_ENABLED))
15872		free_irq(pf->pdev->irq, pf);
15873
15874	/* Since we're going to destroy queues during the
15875	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
15876	 * whole section
15877	 */
15878	rtnl_lock();
15879	i40e_clear_interrupt_scheme(pf);
15880	rtnl_unlock();
15881
15882	if (system_state == SYSTEM_POWER_OFF) {
15883		pci_wake_from_d3(pdev, pf->wol_en);
15884		pci_set_power_state(pdev, PCI_D3hot);
15885	}
15886}
15887
15888/**
15889 * i40e_suspend - PM callback for moving to D3
15890 * @dev: generic device information structure
15891 **/
15892static int __maybe_unused i40e_suspend(struct device *dev)
15893{
15894	struct i40e_pf *pf = dev_get_drvdata(dev);
15895	struct i40e_hw *hw = &pf->hw;
15896
15897	/* If we're already suspended, then there is nothing to do */
15898	if (test_and_set_bit(__I40E_SUSPENDED, pf->state))
15899		return 0;
15900
15901	set_bit(__I40E_DOWN, pf->state);
15902
15903	/* Ensure service task will not be running */
15904	del_timer_sync(&pf->service_timer);
15905	cancel_work_sync(&pf->service_task);
15906
15907	/* Client close must be called explicitly here because the timer
15908	 * has been stopped.
15909	 */
15910	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
15911
15912	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
15913		i40e_enable_mc_magic_wake(pf);
15914
15915	/* Since we're going to destroy queues during the
15916	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
15917	 * whole section
15918	 */
15919	rtnl_lock();
15920
15921	i40e_prep_for_reset(pf, true);
15922
15923	wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
15924	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
15925
15926	/* Clear the interrupt scheme and release our IRQs so that the system
15927	 * can safely hibernate even when there are a large number of CPUs.
15928	 * Otherwise hibernation might fail when mapping all the vectors back
15929	 * to CPU0.
15930	 */
15931	i40e_clear_interrupt_scheme(pf);
15932
15933	rtnl_unlock();
15934
15935	return 0;
15936}
15937
15938/**
15939 * i40e_resume - PM callback for waking up from D3
15940 * @dev: generic device information structure
15941 **/
15942static int __maybe_unused i40e_resume(struct device *dev)
15943{
15944	struct i40e_pf *pf = dev_get_drvdata(dev);
15945	int err;
15946
15947	/* If we're not suspended, then there is nothing to do */
15948	if (!test_bit(__I40E_SUSPENDED, pf->state))
15949		return 0;
15950
15951	/* We need to hold the RTNL lock prior to restoring interrupt schemes,
15952	 * since we're going to be restoring queues
15953	 */
15954	rtnl_lock();
15955
15956	/* We cleared the interrupt scheme when we suspended, so we need to
15957	 * restore it now to resume device functionality.
15958	 */
15959	err = i40e_restore_interrupt_scheme(pf);
15960	if (err) {
15961		dev_err(dev, "Cannot restore interrupt scheme: %d\n",
15962			err);
15963	}
15964
15965	clear_bit(__I40E_DOWN, pf->state);
15966	i40e_reset_and_rebuild(pf, false, true);
15967
15968	rtnl_unlock();
15969
15970	/* Clear suspended state last after everything is recovered */
15971	clear_bit(__I40E_SUSPENDED, pf->state);
15972
15973	/* Restart the service task */
15974	mod_timer(&pf->service_timer,
15975		  round_jiffies(jiffies + pf->service_timer_period));
15976
15977	return 0;
15978}
15979
15980static const struct pci_error_handlers i40e_err_handler = {
15981	.error_detected = i40e_pci_error_detected,
15982	.slot_reset = i40e_pci_error_slot_reset,
15983	.reset_prepare = i40e_pci_error_reset_prepare,
15984	.reset_done = i40e_pci_error_reset_done,
15985	.resume = i40e_pci_error_resume,
15986};
15987
15988static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume);
15989
15990static struct pci_driver i40e_driver = {
15991	.name     = i40e_driver_name,
15992	.id_table = i40e_pci_tbl,
15993	.probe    = i40e_probe,
15994	.remove   = i40e_remove,
15995	.driver   = {
15996		.pm = &i40e_pm_ops,
15997	},
15998	.shutdown = i40e_shutdown,
15999	.err_handler = &i40e_err_handler,
16000	.sriov_configure = i40e_pci_sriov_configure,
16001};
16002
16003/**
16004 * i40e_init_module - Driver registration routine
16005 *
16006 * i40e_init_module is the first routine called when the driver is
16007 * loaded. All it does is register with the PCI subsystem.
16008 **/
16009static int __init i40e_init_module(void)
16010{
16011	int err;
16012
16013	pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string);
16014	pr_info("%s: %s\n", i40e_driver_name, i40e_copyright);
16015
16016	/* There is no need to throttle the number of active tasks because
16017	 * each device limits its own task using a state bit for scheduling
16018	 * the service task, and the device tasks do not interfere with each
16019	 * other, so we don't set a max task limit. We must set WQ_MEM_RECLAIM
16020	 * since we need to be able to guarantee forward progress even under
16021	 * memory pressure.
16022	 */
16023	i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name);
16024	if (!i40e_wq) {
16025		pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
16026		return -ENOMEM;
16027	}
16028
16029	i40e_dbg_init();
16030	err = pci_register_driver(&i40e_driver);
16031	if (err) {
16032		destroy_workqueue(i40e_wq);
16033		i40e_dbg_exit();
16034		return err;
16035	}
16036
16037	return 0;
16038}
16039module_init(i40e_init_module);
16040
16041/**
16042 * i40e_exit_module - Driver exit cleanup routine
16043 *
16044 * i40e_exit_module is called just before the driver is removed
16045 * from memory.
16046 **/
16047static void __exit i40e_exit_module(void)
16048{
16049	pci_unregister_driver(&i40e_driver);
16050	destroy_workqueue(i40e_wq);
16051	i40e_dbg_exit();
16052}
16053module_exit(i40e_exit_module);
16054