1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2005 - 2016 Broadcom
4 * All rights reserved.
5 *
6 * Contact Information:
7 * linux-drivers@emulex.com
8 *
9 * Emulex
10 * 3333 Susan Street
11 * Costa Mesa, CA 92626
12 */
13
14#include <linux/prefetch.h>
15#include <linux/module.h>
16#include "be.h"
17#include "be_cmds.h"
18#include <asm/div64.h>
19#include <linux/aer.h>
20#include <linux/if_bridge.h>
21#include <net/busy_poll.h>
22#include <net/vxlan.h>
23
24MODULE_DESCRIPTION(DRV_DESC);
25MODULE_AUTHOR("Emulex Corporation");
26MODULE_LICENSE("GPL");
27
28/* num_vfs module param is obsolete.
29 * Use sysfs method to enable/disable VFs.
30 */
31static unsigned int num_vfs;
32module_param(num_vfs, uint, 0444);
33MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
34
35static ushort rx_frag_size = 2048;
36module_param(rx_frag_size, ushort, 0444);
37MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
38
39/* Per-module error detection/recovery workq shared across all functions.
40 * Each function schedules its own work request on this shared workq.
41 */
42static struct workqueue_struct *be_err_recovery_workq;
43
44static const struct pci_device_id be_dev_ids[] = {
45#ifdef CONFIG_BE2NET_BE2
46	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
47	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
48#endif /* CONFIG_BE2NET_BE2 */
49#ifdef CONFIG_BE2NET_BE3
50	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
51	{ PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
52#endif /* CONFIG_BE2NET_BE3 */
53#ifdef CONFIG_BE2NET_LANCER
54	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56#endif /* CONFIG_BE2NET_LANCER */
57#ifdef CONFIG_BE2NET_SKYHAWK
58	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
59	{ PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
60#endif /* CONFIG_BE2NET_SKYHAWK */
61	{ 0 }
62};
63MODULE_DEVICE_TABLE(pci, be_dev_ids);
64
65/* Workqueue used by all functions for defering cmd calls to the adapter */
66static struct workqueue_struct *be_wq;
67
68/* UE Status Low CSR */
69static const char * const ue_status_low_desc[] = {
70	"CEV",
71	"CTX",
72	"DBUF",
73	"ERX",
74	"Host",
75	"MPU",
76	"NDMA",
77	"PTC ",
78	"RDMA ",
79	"RXF ",
80	"RXIPS ",
81	"RXULP0 ",
82	"RXULP1 ",
83	"RXULP2 ",
84	"TIM ",
85	"TPOST ",
86	"TPRE ",
87	"TXIPS ",
88	"TXULP0 ",
89	"TXULP1 ",
90	"UC ",
91	"WDMA ",
92	"TXULP2 ",
93	"HOST1 ",
94	"P0_OB_LINK ",
95	"P1_OB_LINK ",
96	"HOST_GPIO ",
97	"MBOX ",
98	"ERX2 ",
99	"SPARE ",
100	"JTAG ",
101	"MPU_INTPEND "
102};
103
104/* UE Status High CSR */
105static const char * const ue_status_hi_desc[] = {
106	"LPCMEMHOST",
107	"MGMT_MAC",
108	"PCS0ONLINE",
109	"MPU_IRAM",
110	"PCS1ONLINE",
111	"PCTL0",
112	"PCTL1",
113	"PMEM",
114	"RR",
115	"TXPB",
116	"RXPP",
117	"XAUI",
118	"TXP",
119	"ARM",
120	"IPC",
121	"HOST2",
122	"HOST3",
123	"HOST4",
124	"HOST5",
125	"HOST6",
126	"HOST7",
127	"ECRC",
128	"Poison TLP",
129	"NETC",
130	"PERIPH",
131	"LLTXULP",
132	"D2P",
133	"RCON",
134	"LDMA",
135	"LLTXP",
136	"LLTXPB",
137	"Unknown"
138};
139
140#define BE_VF_IF_EN_FLAGS	(BE_IF_FLAGS_UNTAGGED | \
141				 BE_IF_FLAGS_BROADCAST | \
142				 BE_IF_FLAGS_MULTICAST | \
143				 BE_IF_FLAGS_PASS_L3L4_ERRORS)
144
145static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
146{
147	struct be_dma_mem *mem = &q->dma_mem;
148
149	if (mem->va) {
150		dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
151				  mem->dma);
152		mem->va = NULL;
153	}
154}
155
156static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
157			  u16 len, u16 entry_size)
158{
159	struct be_dma_mem *mem = &q->dma_mem;
160
161	memset(q, 0, sizeof(*q));
162	q->len = len;
163	q->entry_size = entry_size;
164	mem->size = len * entry_size;
165	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
166				     &mem->dma, GFP_KERNEL);
167	if (!mem->va)
168		return -ENOMEM;
169	return 0;
170}
171
172static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
173{
174	u32 reg, enabled;
175
176	pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
177			      &reg);
178	enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179
180	if (!enabled && enable)
181		reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
182	else if (enabled && !enable)
183		reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184	else
185		return;
186
187	pci_write_config_dword(adapter->pdev,
188			       PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
189}
190
191static void be_intr_set(struct be_adapter *adapter, bool enable)
192{
193	int status = 0;
194
195	/* On lancer interrupts can't be controlled via this register */
196	if (lancer_chip(adapter))
197		return;
198
199	if (be_check_error(adapter, BE_ERROR_EEH))
200		return;
201
202	status = be_cmd_intr_set(adapter, enable);
203	if (status)
204		be_reg_intr_set(adapter, enable);
205}
206
207static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
208{
209	u32 val = 0;
210
211	if (be_check_error(adapter, BE_ERROR_HW))
212		return;
213
214	val |= qid & DB_RQ_RING_ID_MASK;
215	val |= posted << DB_RQ_NUM_POSTED_SHIFT;
216
217	wmb();
218	iowrite32(val, adapter->db + DB_RQ_OFFSET);
219}
220
221static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
222			  u16 posted)
223{
224	u32 val = 0;
225
226	if (be_check_error(adapter, BE_ERROR_HW))
227		return;
228
229	val |= txo->q.id & DB_TXULP_RING_ID_MASK;
230	val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
231
232	wmb();
233	iowrite32(val, adapter->db + txo->db_offset);
234}
235
236static void be_eq_notify(struct be_adapter *adapter, u16 qid,
237			 bool arm, bool clear_int, u16 num_popped,
238			 u32 eq_delay_mult_enc)
239{
240	u32 val = 0;
241
242	val |= qid & DB_EQ_RING_ID_MASK;
243	val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
244
245	if (be_check_error(adapter, BE_ERROR_HW))
246		return;
247
248	if (arm)
249		val |= 1 << DB_EQ_REARM_SHIFT;
250	if (clear_int)
251		val |= 1 << DB_EQ_CLR_SHIFT;
252	val |= 1 << DB_EQ_EVNT_SHIFT;
253	val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
254	val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
255	iowrite32(val, adapter->db + DB_EQ_OFFSET);
256}
257
258void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
259{
260	u32 val = 0;
261
262	val |= qid & DB_CQ_RING_ID_MASK;
263	val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
264			DB_CQ_RING_ID_EXT_MASK_SHIFT);
265
266	if (be_check_error(adapter, BE_ERROR_HW))
267		return;
268
269	if (arm)
270		val |= 1 << DB_CQ_REARM_SHIFT;
271	val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
272	iowrite32(val, adapter->db + DB_CQ_OFFSET);
273}
274
275static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
276{
277	int i;
278
279	/* Check if mac has already been added as part of uc-list */
280	for (i = 0; i < adapter->uc_macs; i++) {
281		if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
282			/* mac already added, skip addition */
283			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
284			return 0;
285		}
286	}
287
288	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
289			       &adapter->pmac_id[0], 0);
290}
291
292static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
293{
294	int i;
295
296	/* Skip deletion if the programmed mac is
297	 * being used in uc-list
298	 */
299	for (i = 0; i < adapter->uc_macs; i++) {
300		if (adapter->pmac_id[i + 1] == pmac_id)
301			return;
302	}
303	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
304}
305
306static int be_mac_addr_set(struct net_device *netdev, void *p)
307{
308	struct be_adapter *adapter = netdev_priv(netdev);
309	struct device *dev = &adapter->pdev->dev;
310	struct sockaddr *addr = p;
311	int status;
312	u8 mac[ETH_ALEN];
313	u32 old_pmac_id = adapter->pmac_id[0];
314
315	if (!is_valid_ether_addr(addr->sa_data))
316		return -EADDRNOTAVAIL;
317
318	/* Proceed further only if, User provided MAC is different
319	 * from active MAC
320	 */
321	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
322		return 0;
323
324	/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
325	 * address
326	 */
327	if (BEx_chip(adapter) && be_virtfn(adapter) &&
328	    !check_privilege(adapter, BE_PRIV_FILTMGMT))
329		return -EPERM;
330
331	/* if device is not running, copy MAC to netdev->dev_addr */
332	if (!netif_running(netdev))
333		goto done;
334
335	/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
336	 * privilege or if PF did not provision the new MAC address.
337	 * On BE3, this cmd will always fail if the VF doesn't have the
338	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
339	 * the MAC for the VF.
340	 */
341	mutex_lock(&adapter->rx_filter_lock);
342	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
343	if (!status) {
344
345		/* Delete the old programmed MAC. This call may fail if the
346		 * old MAC was already deleted by the PF driver.
347		 */
348		if (adapter->pmac_id[0] != old_pmac_id)
349			be_dev_mac_del(adapter, old_pmac_id);
350	}
351
352	mutex_unlock(&adapter->rx_filter_lock);
353	/* Decide if the new MAC is successfully activated only after
354	 * querying the FW
355	 */
356	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
357				       adapter->if_handle, true, 0);
358	if (status)
359		goto err;
360
361	/* The MAC change did not happen, either due to lack of privilege
362	 * or PF didn't pre-provision.
363	 */
364	if (!ether_addr_equal(addr->sa_data, mac)) {
365		status = -EPERM;
366		goto err;
367	}
368
369	/* Remember currently programmed MAC */
370	ether_addr_copy(adapter->dev_mac, addr->sa_data);
371done:
372	ether_addr_copy(netdev->dev_addr, addr->sa_data);
373	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
374	return 0;
375err:
376	dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
377	return status;
378}
379
380/* BE2 supports only v0 cmd */
381static void *hw_stats_from_cmd(struct be_adapter *adapter)
382{
383	if (BE2_chip(adapter)) {
384		struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
385
386		return &cmd->hw_stats;
387	} else if (BE3_chip(adapter)) {
388		struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
389
390		return &cmd->hw_stats;
391	} else {
392		struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
393
394		return &cmd->hw_stats;
395	}
396}
397
398/* BE2 supports only v0 cmd */
399static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
400{
401	if (BE2_chip(adapter)) {
402		struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
403
404		return &hw_stats->erx;
405	} else if (BE3_chip(adapter)) {
406		struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
407
408		return &hw_stats->erx;
409	} else {
410		struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
411
412		return &hw_stats->erx;
413	}
414}
415
416static void populate_be_v0_stats(struct be_adapter *adapter)
417{
418	struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
419	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
420	struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
421	struct be_port_rxf_stats_v0 *port_stats =
422					&rxf_stats->port[adapter->port_num];
423	struct be_drv_stats *drvs = &adapter->drv_stats;
424
425	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
426	drvs->rx_pause_frames = port_stats->rx_pause_frames;
427	drvs->rx_crc_errors = port_stats->rx_crc_errors;
428	drvs->rx_control_frames = port_stats->rx_control_frames;
429	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
430	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
431	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
432	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
433	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
434	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
435	drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
436	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
437	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
438	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
439	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
440	drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
441	drvs->rx_dropped_header_too_small =
442		port_stats->rx_dropped_header_too_small;
443	drvs->rx_address_filtered =
444					port_stats->rx_address_filtered +
445					port_stats->rx_vlan_filtered;
446	drvs->rx_alignment_symbol_errors =
447		port_stats->rx_alignment_symbol_errors;
448
449	drvs->tx_pauseframes = port_stats->tx_pauseframes;
450	drvs->tx_controlframes = port_stats->tx_controlframes;
451
452	if (adapter->port_num)
453		drvs->jabber_events = rxf_stats->port1_jabber_events;
454	else
455		drvs->jabber_events = rxf_stats->port0_jabber_events;
456	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
457	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
458	drvs->forwarded_packets = rxf_stats->forwarded_packets;
459	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
460	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
461	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
462	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
463}
464
465static void populate_be_v1_stats(struct be_adapter *adapter)
466{
467	struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
468	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
469	struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
470	struct be_port_rxf_stats_v1 *port_stats =
471					&rxf_stats->port[adapter->port_num];
472	struct be_drv_stats *drvs = &adapter->drv_stats;
473
474	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
475	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
476	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
477	drvs->rx_pause_frames = port_stats->rx_pause_frames;
478	drvs->rx_crc_errors = port_stats->rx_crc_errors;
479	drvs->rx_control_frames = port_stats->rx_control_frames;
480	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
481	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
482	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
483	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
484	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
485	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
486	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
487	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
488	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
489	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
490	drvs->rx_dropped_header_too_small =
491		port_stats->rx_dropped_header_too_small;
492	drvs->rx_input_fifo_overflow_drop =
493		port_stats->rx_input_fifo_overflow_drop;
494	drvs->rx_address_filtered = port_stats->rx_address_filtered;
495	drvs->rx_alignment_symbol_errors =
496		port_stats->rx_alignment_symbol_errors;
497	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
498	drvs->tx_pauseframes = port_stats->tx_pauseframes;
499	drvs->tx_controlframes = port_stats->tx_controlframes;
500	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
501	drvs->jabber_events = port_stats->jabber_events;
502	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
503	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
504	drvs->forwarded_packets = rxf_stats->forwarded_packets;
505	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
506	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
507	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
508	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
509}
510
511static void populate_be_v2_stats(struct be_adapter *adapter)
512{
513	struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
514	struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
515	struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
516	struct be_port_rxf_stats_v2 *port_stats =
517					&rxf_stats->port[adapter->port_num];
518	struct be_drv_stats *drvs = &adapter->drv_stats;
519
520	be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
521	drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
522	drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
523	drvs->rx_pause_frames = port_stats->rx_pause_frames;
524	drvs->rx_crc_errors = port_stats->rx_crc_errors;
525	drvs->rx_control_frames = port_stats->rx_control_frames;
526	drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
527	drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
528	drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
529	drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
530	drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
531	drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
532	drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
533	drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
534	drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
535	drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
536	drvs->rx_dropped_header_too_small =
537		port_stats->rx_dropped_header_too_small;
538	drvs->rx_input_fifo_overflow_drop =
539		port_stats->rx_input_fifo_overflow_drop;
540	drvs->rx_address_filtered = port_stats->rx_address_filtered;
541	drvs->rx_alignment_symbol_errors =
542		port_stats->rx_alignment_symbol_errors;
543	drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
544	drvs->tx_pauseframes = port_stats->tx_pauseframes;
545	drvs->tx_controlframes = port_stats->tx_controlframes;
546	drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
547	drvs->jabber_events = port_stats->jabber_events;
548	drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
549	drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
550	drvs->forwarded_packets = rxf_stats->forwarded_packets;
551	drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
552	drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
553	drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
554	adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
555	if (be_roce_supported(adapter)) {
556		drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
557		drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
558		drvs->rx_roce_frames = port_stats->roce_frames_received;
559		drvs->roce_drops_crc = port_stats->roce_drops_crc;
560		drvs->roce_drops_payload_len =
561			port_stats->roce_drops_payload_len;
562	}
563}
564
565static void populate_lancer_stats(struct be_adapter *adapter)
566{
567	struct be_drv_stats *drvs = &adapter->drv_stats;
568	struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
569
570	be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
571	drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
572	drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
573	drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
574	drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
575	drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
576	drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
577	drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
578	drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
579	drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
580	drvs->rx_dropped_tcp_length =
581				pport_stats->rx_dropped_invalid_tcp_length;
582	drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
583	drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
584	drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
585	drvs->rx_dropped_header_too_small =
586				pport_stats->rx_dropped_header_too_small;
587	drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
588	drvs->rx_address_filtered =
589					pport_stats->rx_address_filtered +
590					pport_stats->rx_vlan_filtered;
591	drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
592	drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593	drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
594	drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
595	drvs->jabber_events = pport_stats->rx_jabbers;
596	drvs->forwarded_packets = pport_stats->num_forwards_lo;
597	drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
598	drvs->rx_drops_too_many_frags =
599				pport_stats->rx_drops_too_many_frags_lo;
600}
601
602static void accumulate_16bit_val(u32 *acc, u16 val)
603{
604#define lo(x)			(x & 0xFFFF)
605#define hi(x)			(x & 0xFFFF0000)
606	bool wrapped = val < lo(*acc);
607	u32 newacc = hi(*acc) + val;
608
609	if (wrapped)
610		newacc += 65536;
611	WRITE_ONCE(*acc, newacc);
612}
613
614static void populate_erx_stats(struct be_adapter *adapter,
615			       struct be_rx_obj *rxo, u32 erx_stat)
616{
617	if (!BEx_chip(adapter))
618		rx_stats(rxo)->rx_drops_no_frags = erx_stat;
619	else
620		/* below erx HW counter can actually wrap around after
621		 * 65535. Driver accumulates a 32-bit value
622		 */
623		accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
624				     (u16)erx_stat);
625}
626
627void be_parse_stats(struct be_adapter *adapter)
628{
629	struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
630	struct be_rx_obj *rxo;
631	int i;
632	u32 erx_stat;
633
634	if (lancer_chip(adapter)) {
635		populate_lancer_stats(adapter);
636	} else {
637		if (BE2_chip(adapter))
638			populate_be_v0_stats(adapter);
639		else if (BE3_chip(adapter))
640			/* for BE3 */
641			populate_be_v1_stats(adapter);
642		else
643			populate_be_v2_stats(adapter);
644
645		/* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
646		for_all_rx_queues(adapter, rxo, i) {
647			erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
648			populate_erx_stats(adapter, rxo, erx_stat);
649		}
650	}
651}
652
653static void be_get_stats64(struct net_device *netdev,
654			   struct rtnl_link_stats64 *stats)
655{
656	struct be_adapter *adapter = netdev_priv(netdev);
657	struct be_drv_stats *drvs = &adapter->drv_stats;
658	struct be_rx_obj *rxo;
659	struct be_tx_obj *txo;
660	u64 pkts, bytes;
661	unsigned int start;
662	int i;
663
664	for_all_rx_queues(adapter, rxo, i) {
665		const struct be_rx_stats *rx_stats = rx_stats(rxo);
666
667		do {
668			start = u64_stats_fetch_begin_irq(&rx_stats->sync);
669			pkts = rx_stats(rxo)->rx_pkts;
670			bytes = rx_stats(rxo)->rx_bytes;
671		} while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
672		stats->rx_packets += pkts;
673		stats->rx_bytes += bytes;
674		stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
675		stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
676					rx_stats(rxo)->rx_drops_no_frags;
677	}
678
679	for_all_tx_queues(adapter, txo, i) {
680		const struct be_tx_stats *tx_stats = tx_stats(txo);
681
682		do {
683			start = u64_stats_fetch_begin_irq(&tx_stats->sync);
684			pkts = tx_stats(txo)->tx_pkts;
685			bytes = tx_stats(txo)->tx_bytes;
686		} while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
687		stats->tx_packets += pkts;
688		stats->tx_bytes += bytes;
689	}
690
691	/* bad pkts received */
692	stats->rx_errors = drvs->rx_crc_errors +
693		drvs->rx_alignment_symbol_errors +
694		drvs->rx_in_range_errors +
695		drvs->rx_out_range_errors +
696		drvs->rx_frame_too_long +
697		drvs->rx_dropped_too_small +
698		drvs->rx_dropped_too_short +
699		drvs->rx_dropped_header_too_small +
700		drvs->rx_dropped_tcp_length +
701		drvs->rx_dropped_runt;
702
703	/* detailed rx errors */
704	stats->rx_length_errors = drvs->rx_in_range_errors +
705		drvs->rx_out_range_errors +
706		drvs->rx_frame_too_long;
707
708	stats->rx_crc_errors = drvs->rx_crc_errors;
709
710	/* frame alignment errors */
711	stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
712
713	/* receiver fifo overrun */
714	/* drops_no_pbuf is no per i/f, it's per BE card */
715	stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
716				drvs->rx_input_fifo_overflow_drop +
717				drvs->rx_drops_no_pbuf;
718}
719
720void be_link_status_update(struct be_adapter *adapter, u8 link_status)
721{
722	struct net_device *netdev = adapter->netdev;
723
724	if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
725		netif_carrier_off(netdev);
726		adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
727	}
728
729	if (link_status)
730		netif_carrier_on(netdev);
731	else
732		netif_carrier_off(netdev);
733
734	netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
735}
736
737static int be_gso_hdr_len(struct sk_buff *skb)
738{
739	if (skb->encapsulation)
740		return skb_inner_transport_offset(skb) +
741		       inner_tcp_hdrlen(skb);
742	return skb_transport_offset(skb) + tcp_hdrlen(skb);
743}
744
745static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
746{
747	struct be_tx_stats *stats = tx_stats(txo);
748	u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
749	/* Account for headers which get duplicated in TSO pkt */
750	u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
751
752	u64_stats_update_begin(&stats->sync);
753	stats->tx_reqs++;
754	stats->tx_bytes += skb->len + dup_hdr_len;
755	stats->tx_pkts += tx_pkts;
756	if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
757		stats->tx_vxlan_offload_pkts += tx_pkts;
758	u64_stats_update_end(&stats->sync);
759}
760
761/* Returns number of WRBs needed for the skb */
762static u32 skb_wrb_cnt(struct sk_buff *skb)
763{
764	/* +1 for the header wrb */
765	return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
766}
767
768static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
769{
770	wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
771	wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
772	wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
773	wrb->rsvd0 = 0;
774}
775
776/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
777 * to avoid the swap and shift/mask operations in wrb_fill().
778 */
779static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
780{
781	wrb->frag_pa_hi = 0;
782	wrb->frag_pa_lo = 0;
783	wrb->frag_len = 0;
784	wrb->rsvd0 = 0;
785}
786
787static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
788				     struct sk_buff *skb)
789{
790	u8 vlan_prio;
791	u16 vlan_tag;
792
793	vlan_tag = skb_vlan_tag_get(skb);
794	vlan_prio = skb_vlan_tag_get_prio(skb);
795	/* If vlan priority provided by OS is NOT in available bmap */
796	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
797		vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
798				adapter->recommended_prio_bits;
799
800	return vlan_tag;
801}
802
803/* Used only for IP tunnel packets */
804static u16 skb_inner_ip_proto(struct sk_buff *skb)
805{
806	return (inner_ip_hdr(skb)->version == 4) ?
807		inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
808}
809
810static u16 skb_ip_proto(struct sk_buff *skb)
811{
812	return (ip_hdr(skb)->version == 4) ?
813		ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
814}
815
816static inline bool be_is_txq_full(struct be_tx_obj *txo)
817{
818	return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
819}
820
821static inline bool be_can_txq_wake(struct be_tx_obj *txo)
822{
823	return atomic_read(&txo->q.used) < txo->q.len / 2;
824}
825
826static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
827{
828	return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
829}
830
831static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
832				       struct sk_buff *skb,
833				       struct be_wrb_params *wrb_params)
834{
835	u16 proto;
836
837	if (skb_is_gso(skb)) {
838		BE_WRB_F_SET(wrb_params->features, LSO, 1);
839		wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
840		if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
841			BE_WRB_F_SET(wrb_params->features, LSO6, 1);
842	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
843		if (skb->encapsulation) {
844			BE_WRB_F_SET(wrb_params->features, IPCS, 1);
845			proto = skb_inner_ip_proto(skb);
846		} else {
847			proto = skb_ip_proto(skb);
848		}
849		if (proto == IPPROTO_TCP)
850			BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
851		else if (proto == IPPROTO_UDP)
852			BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
853	}
854
855	if (skb_vlan_tag_present(skb)) {
856		BE_WRB_F_SET(wrb_params->features, VLAN, 1);
857		wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
858	}
859
860	BE_WRB_F_SET(wrb_params->features, CRC, 1);
861}
862
863static void wrb_fill_hdr(struct be_adapter *adapter,
864			 struct be_eth_hdr_wrb *hdr,
865			 struct be_wrb_params *wrb_params,
866			 struct sk_buff *skb)
867{
868	memset(hdr, 0, sizeof(*hdr));
869
870	SET_TX_WRB_HDR_BITS(crc, hdr,
871			    BE_WRB_F_GET(wrb_params->features, CRC));
872	SET_TX_WRB_HDR_BITS(ipcs, hdr,
873			    BE_WRB_F_GET(wrb_params->features, IPCS));
874	SET_TX_WRB_HDR_BITS(tcpcs, hdr,
875			    BE_WRB_F_GET(wrb_params->features, TCPCS));
876	SET_TX_WRB_HDR_BITS(udpcs, hdr,
877			    BE_WRB_F_GET(wrb_params->features, UDPCS));
878
879	SET_TX_WRB_HDR_BITS(lso, hdr,
880			    BE_WRB_F_GET(wrb_params->features, LSO));
881	SET_TX_WRB_HDR_BITS(lso6, hdr,
882			    BE_WRB_F_GET(wrb_params->features, LSO6));
883	SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
884
885	/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
886	 * hack is not needed, the evt bit is set while ringing DB.
887	 */
888	SET_TX_WRB_HDR_BITS(event, hdr,
889			    BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
890	SET_TX_WRB_HDR_BITS(vlan, hdr,
891			    BE_WRB_F_GET(wrb_params->features, VLAN));
892	SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
893
894	SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
895	SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
896	SET_TX_WRB_HDR_BITS(mgmt, hdr,
897			    BE_WRB_F_GET(wrb_params->features, OS2BMC));
898}
899
900static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
901			  bool unmap_single)
902{
903	dma_addr_t dma;
904	u32 frag_len = le32_to_cpu(wrb->frag_len);
905
906
907	dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
908		(u64)le32_to_cpu(wrb->frag_pa_lo);
909	if (frag_len) {
910		if (unmap_single)
911			dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
912		else
913			dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
914	}
915}
916
917/* Grab a WRB header for xmit */
918static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
919{
920	u32 head = txo->q.head;
921
922	queue_head_inc(&txo->q);
923	return head;
924}
925
926/* Set up the WRB header for xmit */
927static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
928				struct be_tx_obj *txo,
929				struct be_wrb_params *wrb_params,
930				struct sk_buff *skb, u16 head)
931{
932	u32 num_frags = skb_wrb_cnt(skb);
933	struct be_queue_info *txq = &txo->q;
934	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
935
936	wrb_fill_hdr(adapter, hdr, wrb_params, skb);
937	be_dws_cpu_to_le(hdr, sizeof(*hdr));
938
939	BUG_ON(txo->sent_skb_list[head]);
940	txo->sent_skb_list[head] = skb;
941	txo->last_req_hdr = head;
942	atomic_add(num_frags, &txq->used);
943	txo->last_req_wrb_cnt = num_frags;
944	txo->pend_wrb_cnt += num_frags;
945}
946
947/* Setup a WRB fragment (buffer descriptor) for xmit */
948static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
949				 int len)
950{
951	struct be_eth_wrb *wrb;
952	struct be_queue_info *txq = &txo->q;
953
954	wrb = queue_head_node(txq);
955	wrb_fill(wrb, busaddr, len);
956	queue_head_inc(txq);
957}
958
959/* Bring the queue back to the state it was in before be_xmit_enqueue() routine
960 * was invoked. The producer index is restored to the previous packet and the
961 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
962 */
963static void be_xmit_restore(struct be_adapter *adapter,
964			    struct be_tx_obj *txo, u32 head, bool map_single,
965			    u32 copied)
966{
967	struct device *dev;
968	struct be_eth_wrb *wrb;
969	struct be_queue_info *txq = &txo->q;
970
971	dev = &adapter->pdev->dev;
972	txq->head = head;
973
974	/* skip the first wrb (hdr); it's not mapped */
975	queue_head_inc(txq);
976	while (copied) {
977		wrb = queue_head_node(txq);
978		unmap_tx_frag(dev, wrb, map_single);
979		map_single = false;
980		copied -= le32_to_cpu(wrb->frag_len);
981		queue_head_inc(txq);
982	}
983
984	txq->head = head;
985}
986
987/* Enqueue the given packet for transmit. This routine allocates WRBs for the
988 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
989 * of WRBs used up by the packet.
990 */
991static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
992			   struct sk_buff *skb,
993			   struct be_wrb_params *wrb_params)
994{
995	u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
996	struct device *dev = &adapter->pdev->dev;
997	bool map_single = false;
998	u32 head;
999	dma_addr_t busaddr;
1000	int len;
1001
1002	head = be_tx_get_wrb_hdr(txo);
1003
1004	if (skb->len > skb->data_len) {
1005		len = skb_headlen(skb);
1006
1007		busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1008		if (dma_mapping_error(dev, busaddr))
1009			goto dma_err;
1010		map_single = true;
1011		be_tx_setup_wrb_frag(txo, busaddr, len);
1012		copied += len;
1013	}
1014
1015	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1016		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1017		len = skb_frag_size(frag);
1018
1019		busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1020		if (dma_mapping_error(dev, busaddr))
1021			goto dma_err;
1022		be_tx_setup_wrb_frag(txo, busaddr, len);
1023		copied += len;
1024	}
1025
1026	be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1027
1028	be_tx_stats_update(txo, skb);
1029	return wrb_cnt;
1030
1031dma_err:
1032	adapter->drv_stats.dma_map_errors++;
1033	be_xmit_restore(adapter, txo, head, map_single, copied);
1034	return 0;
1035}
1036
1037static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1038{
1039	return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1040}
1041
1042static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1043					     struct sk_buff *skb,
1044					     struct be_wrb_params
1045					     *wrb_params)
1046{
1047	bool insert_vlan = false;
1048	u16 vlan_tag = 0;
1049
1050	skb = skb_share_check(skb, GFP_ATOMIC);
1051	if (unlikely(!skb))
1052		return skb;
1053
1054	if (skb_vlan_tag_present(skb)) {
1055		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1056		insert_vlan = true;
1057	}
1058
1059	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1060		if (!insert_vlan) {
1061			vlan_tag = adapter->pvid;
1062			insert_vlan = true;
1063		}
1064		/* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065		 * skip VLAN insertion
1066		 */
1067		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068	}
1069
1070	if (insert_vlan) {
1071		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072						vlan_tag);
1073		if (unlikely(!skb))
1074			return skb;
1075		__vlan_hwaccel_clear_tag(skb);
1076	}
1077
1078	/* Insert the outer VLAN, if any */
1079	if (adapter->qnq_vid) {
1080		vlan_tag = adapter->qnq_vid;
1081		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082						vlan_tag);
1083		if (unlikely(!skb))
1084			return skb;
1085		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086	}
1087
1088	return skb;
1089}
1090
1091static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092{
1093	struct ethhdr *eh = (struct ethhdr *)skb->data;
1094	u16 offset = ETH_HLEN;
1095
1096	if (eh->h_proto == htons(ETH_P_IPV6)) {
1097		struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099		offset += sizeof(struct ipv6hdr);
1100		if (ip6h->nexthdr != NEXTHDR_TCP &&
1101		    ip6h->nexthdr != NEXTHDR_UDP) {
1102			struct ipv6_opt_hdr *ehdr =
1103				(struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105			/* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106			if (ehdr->hdrlen == 0xff)
1107				return true;
1108		}
1109	}
1110	return false;
1111}
1112
1113static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114{
1115	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116}
1117
1118static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119{
1120	return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121}
1122
1123static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124						  struct sk_buff *skb,
1125						  struct be_wrb_params
1126						  *wrb_params)
1127{
1128	struct vlan_ethhdr *veh = skb_vlan_eth_hdr(skb);
1129	unsigned int eth_hdr_len;
1130	struct iphdr *ip;
1131
1132	/* For padded packets, BE HW modifies tot_len field in IP header
1133	 * incorrecly when VLAN tag is inserted by HW.
1134	 * For padded packets, Lancer computes incorrect checksum.
1135	 */
1136	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137						VLAN_ETH_HLEN : ETH_HLEN;
1138	if (skb->len <= 60 &&
1139	    (lancer_chip(adapter) || BE3_chip(adapter) ||
1140	     skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
1141		ip = (struct iphdr *)ip_hdr(skb);
1142		if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len))))
1143			goto tx_drop;
1144	}
1145
1146	/* If vlan tag is already inlined in the packet, skip HW VLAN
1147	 * tagging in pvid-tagging mode
1148	 */
1149	if (be_pvid_tagging_enabled(adapter) &&
1150	    veh->h_vlan_proto == htons(ETH_P_8021Q))
1151		BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1152
1153	/* HW has a bug wherein it will calculate CSUM for VLAN
1154	 * pkts even though it is disabled.
1155	 * Manually insert VLAN in pkt.
1156	 */
1157	if (skb->ip_summed != CHECKSUM_PARTIAL &&
1158	    skb_vlan_tag_present(skb)) {
1159		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1160		if (unlikely(!skb))
1161			goto err;
1162	}
1163
1164	/* HW may lockup when VLAN HW tagging is requested on
1165	 * certain ipv6 packets. Drop such pkts if the HW workaround to
1166	 * skip HW tagging is not enabled by FW.
1167	 */
1168	if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1169		     (adapter->pvid || adapter->qnq_vid) &&
1170		     !qnq_async_evt_rcvd(adapter)))
1171		goto tx_drop;
1172
1173	/* Manual VLAN tag insertion to prevent:
1174	 * ASIC lockup when the ASIC inserts VLAN tag into
1175	 * certain ipv6 packets. Insert VLAN tags in driver,
1176	 * and set event, completion, vlan bits accordingly
1177	 * in the Tx WRB.
1178	 */
1179	if (be_ipv6_tx_stall_chk(adapter, skb) &&
1180	    be_vlan_tag_tx_chk(adapter, skb)) {
1181		skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1182		if (unlikely(!skb))
1183			goto err;
1184	}
1185
1186	return skb;
1187tx_drop:
1188	dev_kfree_skb_any(skb);
1189err:
1190	return NULL;
1191}
1192
1193static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1194					   struct sk_buff *skb,
1195					   struct be_wrb_params *wrb_params)
1196{
1197	int err;
1198
1199	/* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1200	 * packets that are 32b or less may cause a transmit stall
1201	 * on that port. The workaround is to pad such packets
1202	 * (len <= 32 bytes) to a minimum length of 36b.
1203	 */
1204	if (skb->len <= 32) {
1205		if (skb_put_padto(skb, 36))
1206			return NULL;
1207	}
1208
1209	if (BEx_chip(adapter) || lancer_chip(adapter)) {
1210		skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1211		if (!skb)
1212			return NULL;
1213	}
1214
1215	/* The stack can send us skbs with length greater than
1216	 * what the HW can handle. Trim the extra bytes.
1217	 */
1218	WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1219	err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1220	WARN_ON(err);
1221
1222	return skb;
1223}
1224
1225static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1226{
1227	struct be_queue_info *txq = &txo->q;
1228	struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1229
1230	/* Mark the last request eventable if it hasn't been marked already */
1231	if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1232		hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1233
1234	/* compose a dummy wrb if there are odd set of wrbs to notify */
1235	if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1236		wrb_fill_dummy(queue_head_node(txq));
1237		queue_head_inc(txq);
1238		atomic_inc(&txq->used);
1239		txo->pend_wrb_cnt++;
1240		hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1241					   TX_HDR_WRB_NUM_SHIFT);
1242		hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1243					  TX_HDR_WRB_NUM_SHIFT);
1244	}
1245	be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1246	txo->pend_wrb_cnt = 0;
1247}
1248
1249/* OS2BMC related */
1250
1251#define DHCP_CLIENT_PORT	68
1252#define DHCP_SERVER_PORT	67
1253#define NET_BIOS_PORT1		137
1254#define NET_BIOS_PORT2		138
1255#define DHCPV6_RAS_PORT		547
1256
1257#define is_mc_allowed_on_bmc(adapter, eh)	\
1258	(!is_multicast_filt_enabled(adapter) &&	\
1259	 is_multicast_ether_addr(eh->h_dest) &&	\
1260	 !is_broadcast_ether_addr(eh->h_dest))
1261
1262#define is_bc_allowed_on_bmc(adapter, eh)	\
1263	(!is_broadcast_filt_enabled(adapter) &&	\
1264	 is_broadcast_ether_addr(eh->h_dest))
1265
1266#define is_arp_allowed_on_bmc(adapter, skb)	\
1267	(is_arp(skb) && is_arp_filt_enabled(adapter))
1268
1269#define is_arp(skb)	(skb->protocol == htons(ETH_P_ARP))
1270
1271#define is_arp_filt_enabled(adapter)	\
1272		(adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1273
1274#define is_dhcp_client_filt_enabled(adapter)	\
1275		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1276
1277#define is_dhcp_srvr_filt_enabled(adapter)	\
1278		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1279
1280#define is_nbios_filt_enabled(adapter)	\
1281		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1282
1283#define is_ipv6_na_filt_enabled(adapter)	\
1284		(adapter->bmc_filt_mask &	\
1285			BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1286
1287#define is_ipv6_ra_filt_enabled(adapter)	\
1288		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1289
1290#define is_ipv6_ras_filt_enabled(adapter)	\
1291		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1292
1293#define is_broadcast_filt_enabled(adapter)	\
1294		(adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1295
1296#define is_multicast_filt_enabled(adapter)	\
1297		(adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1298
1299static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1300			       struct sk_buff **skb)
1301{
1302	struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1303	bool os2bmc = false;
1304
1305	if (!be_is_os2bmc_enabled(adapter))
1306		goto done;
1307
1308	if (!is_multicast_ether_addr(eh->h_dest))
1309		goto done;
1310
1311	if (is_mc_allowed_on_bmc(adapter, eh) ||
1312	    is_bc_allowed_on_bmc(adapter, eh) ||
1313	    is_arp_allowed_on_bmc(adapter, (*skb))) {
1314		os2bmc = true;
1315		goto done;
1316	}
1317
1318	if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1319		struct ipv6hdr *hdr = ipv6_hdr((*skb));
1320		u8 nexthdr = hdr->nexthdr;
1321
1322		if (nexthdr == IPPROTO_ICMPV6) {
1323			struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1324
1325			switch (icmp6->icmp6_type) {
1326			case NDISC_ROUTER_ADVERTISEMENT:
1327				os2bmc = is_ipv6_ra_filt_enabled(adapter);
1328				goto done;
1329			case NDISC_NEIGHBOUR_ADVERTISEMENT:
1330				os2bmc = is_ipv6_na_filt_enabled(adapter);
1331				goto done;
1332			default:
1333				break;
1334			}
1335		}
1336	}
1337
1338	if (is_udp_pkt((*skb))) {
1339		struct udphdr *udp = udp_hdr((*skb));
1340
1341		switch (ntohs(udp->dest)) {
1342		case DHCP_CLIENT_PORT:
1343			os2bmc = is_dhcp_client_filt_enabled(adapter);
1344			goto done;
1345		case DHCP_SERVER_PORT:
1346			os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1347			goto done;
1348		case NET_BIOS_PORT1:
1349		case NET_BIOS_PORT2:
1350			os2bmc = is_nbios_filt_enabled(adapter);
1351			goto done;
1352		case DHCPV6_RAS_PORT:
1353			os2bmc = is_ipv6_ras_filt_enabled(adapter);
1354			goto done;
1355		default:
1356			break;
1357		}
1358	}
1359done:
1360	/* For packets over a vlan, which are destined
1361	 * to BMC, asic expects the vlan to be inline in the packet.
1362	 */
1363	if (os2bmc)
1364		*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1365
1366	return os2bmc;
1367}
1368
1369static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1370{
1371	struct be_adapter *adapter = netdev_priv(netdev);
1372	u16 q_idx = skb_get_queue_mapping(skb);
1373	struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1374	struct be_wrb_params wrb_params = { 0 };
1375	bool flush = !netdev_xmit_more();
1376	u16 wrb_cnt;
1377
1378	skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1379	if (unlikely(!skb))
1380		goto drop;
1381
1382	be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1383
1384	wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1385	if (unlikely(!wrb_cnt)) {
1386		dev_kfree_skb_any(skb);
1387		goto drop;
1388	}
1389
1390	/* if os2bmc is enabled and if the pkt is destined to bmc,
1391	 * enqueue the pkt a 2nd time with mgmt bit set.
1392	 */
1393	if (be_send_pkt_to_bmc(adapter, &skb)) {
1394		BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1395		wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1396		if (unlikely(!wrb_cnt))
1397			goto drop;
1398		else
1399			skb_get(skb);
1400	}
1401
1402	if (be_is_txq_full(txo)) {
1403		netif_stop_subqueue(netdev, q_idx);
1404		tx_stats(txo)->tx_stops++;
1405	}
1406
1407	if (flush || __netif_subqueue_stopped(netdev, q_idx))
1408		be_xmit_flush(adapter, txo);
1409
1410	return NETDEV_TX_OK;
1411drop:
1412	tx_stats(txo)->tx_drv_drops++;
1413	/* Flush the already enqueued tx requests */
1414	if (flush && txo->pend_wrb_cnt)
1415		be_xmit_flush(adapter, txo);
1416
1417	return NETDEV_TX_OK;
1418}
1419
1420static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
1421{
1422	struct be_adapter *adapter = netdev_priv(netdev);
1423	struct device *dev = &adapter->pdev->dev;
1424	struct be_tx_obj *txo;
1425	struct sk_buff *skb;
1426	struct tcphdr *tcphdr;
1427	struct udphdr *udphdr;
1428	u32 *entry;
1429	int status;
1430	int i, j;
1431
1432	for_all_tx_queues(adapter, txo, i) {
1433		dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1434			 i, txo->q.head, txo->q.tail,
1435			 atomic_read(&txo->q.used), txo->q.id);
1436
1437		entry = txo->q.dma_mem.va;
1438		for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1439			if (entry[j] != 0 || entry[j + 1] != 0 ||
1440			    entry[j + 2] != 0 || entry[j + 3] != 0) {
1441				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1442					 j, entry[j], entry[j + 1],
1443					 entry[j + 2], entry[j + 3]);
1444			}
1445		}
1446
1447		entry = txo->cq.dma_mem.va;
1448		dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1449			 i, txo->cq.head, txo->cq.tail,
1450			 atomic_read(&txo->cq.used));
1451		for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1452			if (entry[j] != 0 || entry[j + 1] != 0 ||
1453			    entry[j + 2] != 0 || entry[j + 3] != 0) {
1454				dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1455					 j, entry[j], entry[j + 1],
1456					 entry[j + 2], entry[j + 3]);
1457			}
1458		}
1459
1460		for (j = 0; j < TX_Q_LEN; j++) {
1461			if (txo->sent_skb_list[j]) {
1462				skb = txo->sent_skb_list[j];
1463				if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1464					tcphdr = tcp_hdr(skb);
1465					dev_info(dev, "TCP source port %d\n",
1466						 ntohs(tcphdr->source));
1467					dev_info(dev, "TCP dest port %d\n",
1468						 ntohs(tcphdr->dest));
1469					dev_info(dev, "TCP sequence num %d\n",
1470						 ntohs(tcphdr->seq));
1471					dev_info(dev, "TCP ack_seq %d\n",
1472						 ntohs(tcphdr->ack_seq));
1473				} else if (ip_hdr(skb)->protocol ==
1474					   IPPROTO_UDP) {
1475					udphdr = udp_hdr(skb);
1476					dev_info(dev, "UDP source port %d\n",
1477						 ntohs(udphdr->source));
1478					dev_info(dev, "UDP dest port %d\n",
1479						 ntohs(udphdr->dest));
1480				}
1481				dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1482					 j, skb, skb->len, skb->protocol);
1483			}
1484		}
1485	}
1486
1487	if (lancer_chip(adapter)) {
1488		dev_info(dev, "Initiating reset due to tx timeout\n");
1489		dev_info(dev, "Resetting adapter\n");
1490		status = lancer_physdev_ctrl(adapter,
1491					     PHYSDEV_CONTROL_FW_RESET_MASK);
1492		if (status)
1493			dev_err(dev, "Reset failed .. Reboot server\n");
1494	}
1495}
1496
1497static inline bool be_in_all_promisc(struct be_adapter *adapter)
1498{
1499	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1500			BE_IF_FLAGS_ALL_PROMISCUOUS;
1501}
1502
1503static int be_set_vlan_promisc(struct be_adapter *adapter)
1504{
1505	struct device *dev = &adapter->pdev->dev;
1506	int status;
1507
1508	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1509		return 0;
1510
1511	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1512	if (!status) {
1513		dev_info(dev, "Enabled VLAN promiscuous mode\n");
1514		adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1515	} else {
1516		dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1517	}
1518	return status;
1519}
1520
1521static int be_clear_vlan_promisc(struct be_adapter *adapter)
1522{
1523	struct device *dev = &adapter->pdev->dev;
1524	int status;
1525
1526	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1527	if (!status) {
1528		dev_info(dev, "Disabling VLAN promiscuous mode\n");
1529		adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1530	}
1531	return status;
1532}
1533
1534/*
1535 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1536 * If the user configures more, place BE in vlan promiscuous mode.
1537 */
1538static int be_vid_config(struct be_adapter *adapter)
1539{
1540	struct device *dev = &adapter->pdev->dev;
1541	u16 vids[BE_NUM_VLANS_SUPPORTED];
1542	u16 num = 0, i = 0;
1543	int status = 0;
1544
1545	/* No need to change the VLAN state if the I/F is in promiscuous */
1546	if (adapter->netdev->flags & IFF_PROMISC)
1547		return 0;
1548
1549	if (adapter->vlans_added > be_max_vlans(adapter))
1550		return be_set_vlan_promisc(adapter);
1551
1552	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1553		status = be_clear_vlan_promisc(adapter);
1554		if (status)
1555			return status;
1556	}
1557	/* Construct VLAN Table to give to HW */
1558	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1559		vids[num++] = cpu_to_le16(i);
1560
1561	status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1562	if (status) {
1563		dev_err(dev, "Setting HW VLAN filtering failed\n");
1564		/* Set to VLAN promisc mode as setting VLAN filter failed */
1565		if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1566		    addl_status(status) ==
1567				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1568			return be_set_vlan_promisc(adapter);
1569	}
1570	return status;
1571}
1572
1573static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1574{
1575	struct be_adapter *adapter = netdev_priv(netdev);
1576	int status = 0;
1577
1578	mutex_lock(&adapter->rx_filter_lock);
1579
1580	/* Packets with VID 0 are always received by Lancer by default */
1581	if (lancer_chip(adapter) && vid == 0)
1582		goto done;
1583
1584	if (test_bit(vid, adapter->vids))
1585		goto done;
1586
1587	set_bit(vid, adapter->vids);
1588	adapter->vlans_added++;
1589
1590	status = be_vid_config(adapter);
1591done:
1592	mutex_unlock(&adapter->rx_filter_lock);
1593	return status;
1594}
1595
1596static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1597{
1598	struct be_adapter *adapter = netdev_priv(netdev);
1599	int status = 0;
1600
1601	mutex_lock(&adapter->rx_filter_lock);
1602
1603	/* Packets with VID 0 are always received by Lancer by default */
1604	if (lancer_chip(adapter) && vid == 0)
1605		goto done;
1606
1607	if (!test_bit(vid, adapter->vids))
1608		goto done;
1609
1610	clear_bit(vid, adapter->vids);
1611	adapter->vlans_added--;
1612
1613	status = be_vid_config(adapter);
1614done:
1615	mutex_unlock(&adapter->rx_filter_lock);
1616	return status;
1617}
1618
1619static void be_set_all_promisc(struct be_adapter *adapter)
1620{
1621	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1622	adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1623}
1624
1625static void be_set_mc_promisc(struct be_adapter *adapter)
1626{
1627	int status;
1628
1629	if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1630		return;
1631
1632	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1633	if (!status)
1634		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1635}
1636
1637static void be_set_uc_promisc(struct be_adapter *adapter)
1638{
1639	int status;
1640
1641	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1642		return;
1643
1644	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1645	if (!status)
1646		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1647}
1648
1649static void be_clear_uc_promisc(struct be_adapter *adapter)
1650{
1651	int status;
1652
1653	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1654		return;
1655
1656	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1657	if (!status)
1658		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1659}
1660
1661/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1662 * We use a single callback function for both sync and unsync. We really don't
1663 * add/remove addresses through this callback. But, we use it to detect changes
1664 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1665 */
1666static int be_uc_list_update(struct net_device *netdev,
1667			     const unsigned char *addr)
1668{
1669	struct be_adapter *adapter = netdev_priv(netdev);
1670
1671	adapter->update_uc_list = true;
1672	return 0;
1673}
1674
1675static int be_mc_list_update(struct net_device *netdev,
1676			     const unsigned char *addr)
1677{
1678	struct be_adapter *adapter = netdev_priv(netdev);
1679
1680	adapter->update_mc_list = true;
1681	return 0;
1682}
1683
1684static void be_set_mc_list(struct be_adapter *adapter)
1685{
1686	struct net_device *netdev = adapter->netdev;
1687	struct netdev_hw_addr *ha;
1688	bool mc_promisc = false;
1689	int status;
1690
1691	netif_addr_lock_bh(netdev);
1692	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1693
1694	if (netdev->flags & IFF_PROMISC) {
1695		adapter->update_mc_list = false;
1696	} else if (netdev->flags & IFF_ALLMULTI ||
1697		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
1698		/* Enable multicast promisc if num configured exceeds
1699		 * what we support
1700		 */
1701		mc_promisc = true;
1702		adapter->update_mc_list = false;
1703	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1704		/* Update mc-list unconditionally if the iface was previously
1705		 * in mc-promisc mode and now is out of that mode.
1706		 */
1707		adapter->update_mc_list = true;
1708	}
1709
1710	if (adapter->update_mc_list) {
1711		int i = 0;
1712
1713		/* cache the mc-list in adapter */
1714		netdev_for_each_mc_addr(ha, netdev) {
1715			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1716			i++;
1717		}
1718		adapter->mc_count = netdev_mc_count(netdev);
1719	}
1720	netif_addr_unlock_bh(netdev);
1721
1722	if (mc_promisc) {
1723		be_set_mc_promisc(adapter);
1724	} else if (adapter->update_mc_list) {
1725		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1726		if (!status)
1727			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1728		else
1729			be_set_mc_promisc(adapter);
1730
1731		adapter->update_mc_list = false;
1732	}
1733}
1734
1735static void be_clear_mc_list(struct be_adapter *adapter)
1736{
1737	struct net_device *netdev = adapter->netdev;
1738
1739	__dev_mc_unsync(netdev, NULL);
1740	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1741	adapter->mc_count = 0;
1742}
1743
1744static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1745{
1746	if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1747		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1748		return 0;
1749	}
1750
1751	return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1752			       adapter->if_handle,
1753			       &adapter->pmac_id[uc_idx + 1], 0);
1754}
1755
1756static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1757{
1758	if (pmac_id == adapter->pmac_id[0])
1759		return;
1760
1761	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1762}
1763
1764static void be_set_uc_list(struct be_adapter *adapter)
1765{
1766	struct net_device *netdev = adapter->netdev;
1767	struct netdev_hw_addr *ha;
1768	bool uc_promisc = false;
1769	int curr_uc_macs = 0, i;
1770
1771	netif_addr_lock_bh(netdev);
1772	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1773
1774	if (netdev->flags & IFF_PROMISC) {
1775		adapter->update_uc_list = false;
1776	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1777		uc_promisc = true;
1778		adapter->update_uc_list = false;
1779	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1780		/* Update uc-list unconditionally if the iface was previously
1781		 * in uc-promisc mode and now is out of that mode.
1782		 */
1783		adapter->update_uc_list = true;
1784	}
1785
1786	if (adapter->update_uc_list) {
1787		/* cache the uc-list in adapter array */
1788		i = 0;
1789		netdev_for_each_uc_addr(ha, netdev) {
1790			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1791			i++;
1792		}
1793		curr_uc_macs = netdev_uc_count(netdev);
1794	}
1795	netif_addr_unlock_bh(netdev);
1796
1797	if (uc_promisc) {
1798		be_set_uc_promisc(adapter);
1799	} else if (adapter->update_uc_list) {
1800		be_clear_uc_promisc(adapter);
1801
1802		for (i = 0; i < adapter->uc_macs; i++)
1803			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1804
1805		for (i = 0; i < curr_uc_macs; i++)
1806			be_uc_mac_add(adapter, i);
1807		adapter->uc_macs = curr_uc_macs;
1808		adapter->update_uc_list = false;
1809	}
1810}
1811
1812static void be_clear_uc_list(struct be_adapter *adapter)
1813{
1814	struct net_device *netdev = adapter->netdev;
1815	int i;
1816
1817	__dev_uc_unsync(netdev, NULL);
1818	for (i = 0; i < adapter->uc_macs; i++)
1819		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1820
1821	adapter->uc_macs = 0;
1822}
1823
1824static void __be_set_rx_mode(struct be_adapter *adapter)
1825{
1826	struct net_device *netdev = adapter->netdev;
1827
1828	mutex_lock(&adapter->rx_filter_lock);
1829
1830	if (netdev->flags & IFF_PROMISC) {
1831		if (!be_in_all_promisc(adapter))
1832			be_set_all_promisc(adapter);
1833	} else if (be_in_all_promisc(adapter)) {
1834		/* We need to re-program the vlan-list or clear
1835		 * vlan-promisc mode (if needed) when the interface
1836		 * comes out of promisc mode.
1837		 */
1838		be_vid_config(adapter);
1839	}
1840
1841	be_set_uc_list(adapter);
1842	be_set_mc_list(adapter);
1843
1844	mutex_unlock(&adapter->rx_filter_lock);
1845}
1846
1847static void be_work_set_rx_mode(struct work_struct *work)
1848{
1849	struct be_cmd_work *cmd_work =
1850				container_of(work, struct be_cmd_work, work);
1851
1852	__be_set_rx_mode(cmd_work->adapter);
1853	kfree(cmd_work);
1854}
1855
1856static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1857{
1858	struct be_adapter *adapter = netdev_priv(netdev);
1859	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1860	int status;
1861
1862	if (!sriov_enabled(adapter))
1863		return -EPERM;
1864
1865	if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1866		return -EINVAL;
1867
1868	/* Proceed further only if user provided MAC is different
1869	 * from active MAC
1870	 */
1871	if (ether_addr_equal(mac, vf_cfg->mac_addr))
1872		return 0;
1873
1874	if (BEx_chip(adapter)) {
1875		be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1876				vf + 1);
1877
1878		status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1879					 &vf_cfg->pmac_id, vf + 1);
1880	} else {
1881		status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1882					vf + 1);
1883	}
1884
1885	if (status) {
1886		dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1887			mac, vf, status);
1888		return be_cmd_status(status);
1889	}
1890
1891	ether_addr_copy(vf_cfg->mac_addr, mac);
1892
1893	return 0;
1894}
1895
1896static int be_get_vf_config(struct net_device *netdev, int vf,
1897			    struct ifla_vf_info *vi)
1898{
1899	struct be_adapter *adapter = netdev_priv(netdev);
1900	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1901
1902	if (!sriov_enabled(adapter))
1903		return -EPERM;
1904
1905	if (vf >= adapter->num_vfs)
1906		return -EINVAL;
1907
1908	vi->vf = vf;
1909	vi->max_tx_rate = vf_cfg->tx_rate;
1910	vi->min_tx_rate = 0;
1911	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1912	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1913	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1914	vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1915	vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1916
1917	return 0;
1918}
1919
1920static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1921{
1922	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1923	u16 vids[BE_NUM_VLANS_SUPPORTED];
1924	int vf_if_id = vf_cfg->if_handle;
1925	int status;
1926
1927	/* Enable Transparent VLAN Tagging */
1928	status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1929	if (status)
1930		return status;
1931
1932	/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1933	vids[0] = 0;
1934	status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1935	if (!status)
1936		dev_info(&adapter->pdev->dev,
1937			 "Cleared guest VLANs on VF%d", vf);
1938
1939	/* After TVT is enabled, disallow VFs to program VLAN filters */
1940	if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1941		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1942						  ~BE_PRIV_FILTMGMT, vf + 1);
1943		if (!status)
1944			vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1945	}
1946	return 0;
1947}
1948
1949static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1950{
1951	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1952	struct device *dev = &adapter->pdev->dev;
1953	int status;
1954
1955	/* Reset Transparent VLAN Tagging. */
1956	status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1957				       vf_cfg->if_handle, 0, 0);
1958	if (status)
1959		return status;
1960
1961	/* Allow VFs to program VLAN filtering */
1962	if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1963		status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1964						  BE_PRIV_FILTMGMT, vf + 1);
1965		if (!status) {
1966			vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1967			dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1968		}
1969	}
1970
1971	dev_info(dev,
1972		 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1973	return 0;
1974}
1975
1976static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1977			  __be16 vlan_proto)
1978{
1979	struct be_adapter *adapter = netdev_priv(netdev);
1980	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1981	int status;
1982
1983	if (!sriov_enabled(adapter))
1984		return -EPERM;
1985
1986	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1987		return -EINVAL;
1988
1989	if (vlan_proto != htons(ETH_P_8021Q))
1990		return -EPROTONOSUPPORT;
1991
1992	if (vlan || qos) {
1993		vlan |= qos << VLAN_PRIO_SHIFT;
1994		status = be_set_vf_tvt(adapter, vf, vlan);
1995	} else {
1996		status = be_clear_vf_tvt(adapter, vf);
1997	}
1998
1999	if (status) {
2000		dev_err(&adapter->pdev->dev,
2001			"VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2002			status);
2003		return be_cmd_status(status);
2004	}
2005
2006	vf_cfg->vlan_tag = vlan;
2007	return 0;
2008}
2009
2010static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2011			     int min_tx_rate, int max_tx_rate)
2012{
2013	struct be_adapter *adapter = netdev_priv(netdev);
2014	struct device *dev = &adapter->pdev->dev;
2015	int percent_rate, status = 0;
2016	u16 link_speed = 0;
2017	u8 link_status;
2018
2019	if (!sriov_enabled(adapter))
2020		return -EPERM;
2021
2022	if (vf >= adapter->num_vfs)
2023		return -EINVAL;
2024
2025	if (min_tx_rate)
2026		return -EINVAL;
2027
2028	if (!max_tx_rate)
2029		goto config_qos;
2030
2031	status = be_cmd_link_status_query(adapter, &link_speed,
2032					  &link_status, 0);
2033	if (status)
2034		goto err;
2035
2036	if (!link_status) {
2037		dev_err(dev, "TX-rate setting not allowed when link is down\n");
2038		status = -ENETDOWN;
2039		goto err;
2040	}
2041
2042	if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2043		dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2044			link_speed);
2045		status = -EINVAL;
2046		goto err;
2047	}
2048
2049	/* On Skyhawk the QOS setting must be done only as a % value */
2050	percent_rate = link_speed / 100;
2051	if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2052		dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2053			percent_rate);
2054		status = -EINVAL;
2055		goto err;
2056	}
2057
2058config_qos:
2059	status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2060	if (status)
2061		goto err;
2062
2063	adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2064	return 0;
2065
2066err:
2067	dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2068		max_tx_rate, vf);
2069	return be_cmd_status(status);
2070}
2071
2072static int be_set_vf_link_state(struct net_device *netdev, int vf,
2073				int link_state)
2074{
2075	struct be_adapter *adapter = netdev_priv(netdev);
2076	int status;
2077
2078	if (!sriov_enabled(adapter))
2079		return -EPERM;
2080
2081	if (vf >= adapter->num_vfs)
2082		return -EINVAL;
2083
2084	status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2085	if (status) {
2086		dev_err(&adapter->pdev->dev,
2087			"Link state change on VF %d failed: %#x\n", vf, status);
2088		return be_cmd_status(status);
2089	}
2090
2091	adapter->vf_cfg[vf].plink_tracking = link_state;
2092
2093	return 0;
2094}
2095
2096static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2097{
2098	struct be_adapter *adapter = netdev_priv(netdev);
2099	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2100	u8 spoofchk;
2101	int status;
2102
2103	if (!sriov_enabled(adapter))
2104		return -EPERM;
2105
2106	if (vf >= adapter->num_vfs)
2107		return -EINVAL;
2108
2109	if (BEx_chip(adapter))
2110		return -EOPNOTSUPP;
2111
2112	if (enable == vf_cfg->spoofchk)
2113		return 0;
2114
2115	spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2116
2117	status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2118				       0, spoofchk);
2119	if (status) {
2120		dev_err(&adapter->pdev->dev,
2121			"Spoofchk change on VF %d failed: %#x\n", vf, status);
2122		return be_cmd_status(status);
2123	}
2124
2125	vf_cfg->spoofchk = enable;
2126	return 0;
2127}
2128
2129static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2130			  ulong now)
2131{
2132	aic->rx_pkts_prev = rx_pkts;
2133	aic->tx_reqs_prev = tx_pkts;
2134	aic->jiffies = now;
2135}
2136
2137static int be_get_new_eqd(struct be_eq_obj *eqo)
2138{
2139	struct be_adapter *adapter = eqo->adapter;
2140	int eqd, start;
2141	struct be_aic_obj *aic;
2142	struct be_rx_obj *rxo;
2143	struct be_tx_obj *txo;
2144	u64 rx_pkts = 0, tx_pkts = 0;
2145	ulong now;
2146	u32 pps, delta;
2147	int i;
2148
2149	aic = &adapter->aic_obj[eqo->idx];
2150	if (!adapter->aic_enabled) {
2151		if (aic->jiffies)
2152			aic->jiffies = 0;
2153		eqd = aic->et_eqd;
2154		return eqd;
2155	}
2156
2157	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2158		do {
2159			start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2160			rx_pkts += rxo->stats.rx_pkts;
2161		} while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2162	}
2163
2164	for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2165		do {
2166			start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2167			tx_pkts += txo->stats.tx_reqs;
2168		} while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2169	}
2170
2171	/* Skip, if wrapped around or first calculation */
2172	now = jiffies;
2173	if (!aic->jiffies || time_before(now, aic->jiffies) ||
2174	    rx_pkts < aic->rx_pkts_prev ||
2175	    tx_pkts < aic->tx_reqs_prev) {
2176		be_aic_update(aic, rx_pkts, tx_pkts, now);
2177		return aic->prev_eqd;
2178	}
2179
2180	delta = jiffies_to_msecs(now - aic->jiffies);
2181	if (delta == 0)
2182		return aic->prev_eqd;
2183
2184	pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2185		(((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2186	eqd = (pps / 15000) << 2;
2187
2188	if (eqd < 8)
2189		eqd = 0;
2190	eqd = min_t(u32, eqd, aic->max_eqd);
2191	eqd = max_t(u32, eqd, aic->min_eqd);
2192
2193	be_aic_update(aic, rx_pkts, tx_pkts, now);
2194
2195	return eqd;
2196}
2197
2198/* For Skyhawk-R only */
2199static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2200{
2201	struct be_adapter *adapter = eqo->adapter;
2202	struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2203	ulong now = jiffies;
2204	int eqd;
2205	u32 mult_enc;
2206
2207	if (!adapter->aic_enabled)
2208		return 0;
2209
2210	if (jiffies_to_msecs(now - aic->jiffies) < 1)
2211		eqd = aic->prev_eqd;
2212	else
2213		eqd = be_get_new_eqd(eqo);
2214
2215	if (eqd > 100)
2216		mult_enc = R2I_DLY_ENC_1;
2217	else if (eqd > 60)
2218		mult_enc = R2I_DLY_ENC_2;
2219	else if (eqd > 20)
2220		mult_enc = R2I_DLY_ENC_3;
2221	else
2222		mult_enc = R2I_DLY_ENC_0;
2223
2224	aic->prev_eqd = eqd;
2225
2226	return mult_enc;
2227}
2228
2229void be_eqd_update(struct be_adapter *adapter, bool force_update)
2230{
2231	struct be_set_eqd set_eqd[MAX_EVT_QS];
2232	struct be_aic_obj *aic;
2233	struct be_eq_obj *eqo;
2234	int i, num = 0, eqd;
2235
2236	for_all_evt_queues(adapter, eqo, i) {
2237		aic = &adapter->aic_obj[eqo->idx];
2238		eqd = be_get_new_eqd(eqo);
2239		if (force_update || eqd != aic->prev_eqd) {
2240			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2241			set_eqd[num].eq_id = eqo->q.id;
2242			aic->prev_eqd = eqd;
2243			num++;
2244		}
2245	}
2246
2247	if (num)
2248		be_cmd_modify_eqd(adapter, set_eqd, num);
2249}
2250
2251static void be_rx_stats_update(struct be_rx_obj *rxo,
2252			       struct be_rx_compl_info *rxcp)
2253{
2254	struct be_rx_stats *stats = rx_stats(rxo);
2255
2256	u64_stats_update_begin(&stats->sync);
2257	stats->rx_compl++;
2258	stats->rx_bytes += rxcp->pkt_size;
2259	stats->rx_pkts++;
2260	if (rxcp->tunneled)
2261		stats->rx_vxlan_offload_pkts++;
2262	if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2263		stats->rx_mcast_pkts++;
2264	if (rxcp->err)
2265		stats->rx_compl_err++;
2266	u64_stats_update_end(&stats->sync);
2267}
2268
2269static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2270{
2271	/* L4 checksum is not reliable for non TCP/UDP packets.
2272	 * Also ignore ipcksm for ipv6 pkts
2273	 */
2274	return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2275		(rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2276}
2277
2278static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2279{
2280	struct be_adapter *adapter = rxo->adapter;
2281	struct be_rx_page_info *rx_page_info;
2282	struct be_queue_info *rxq = &rxo->q;
2283	u32 frag_idx = rxq->tail;
2284
2285	rx_page_info = &rxo->page_info_tbl[frag_idx];
2286	BUG_ON(!rx_page_info->page);
2287
2288	if (rx_page_info->last_frag) {
2289		dma_unmap_page(&adapter->pdev->dev,
2290			       dma_unmap_addr(rx_page_info, bus),
2291			       adapter->big_page_size, DMA_FROM_DEVICE);
2292		rx_page_info->last_frag = false;
2293	} else {
2294		dma_sync_single_for_cpu(&adapter->pdev->dev,
2295					dma_unmap_addr(rx_page_info, bus),
2296					rx_frag_size, DMA_FROM_DEVICE);
2297	}
2298
2299	queue_tail_inc(rxq);
2300	atomic_dec(&rxq->used);
2301	return rx_page_info;
2302}
2303
2304/* Throwaway the data in the Rx completion */
2305static void be_rx_compl_discard(struct be_rx_obj *rxo,
2306				struct be_rx_compl_info *rxcp)
2307{
2308	struct be_rx_page_info *page_info;
2309	u16 i, num_rcvd = rxcp->num_rcvd;
2310
2311	for (i = 0; i < num_rcvd; i++) {
2312		page_info = get_rx_page_info(rxo);
2313		put_page(page_info->page);
2314		memset(page_info, 0, sizeof(*page_info));
2315	}
2316}
2317
2318/*
2319 * skb_fill_rx_data forms a complete skb for an ether frame
2320 * indicated by rxcp.
2321 */
2322static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2323			     struct be_rx_compl_info *rxcp)
2324{
2325	struct be_rx_page_info *page_info;
2326	u16 i, j;
2327	u16 hdr_len, curr_frag_len, remaining;
2328	u8 *start;
2329
2330	page_info = get_rx_page_info(rxo);
2331	start = page_address(page_info->page) + page_info->page_offset;
2332	prefetch(start);
2333
2334	/* Copy data in the first descriptor of this completion */
2335	curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2336
2337	skb->len = curr_frag_len;
2338	if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2339		memcpy(skb->data, start, curr_frag_len);
2340		/* Complete packet has now been moved to data */
2341		put_page(page_info->page);
2342		skb->data_len = 0;
2343		skb->tail += curr_frag_len;
2344	} else {
2345		hdr_len = ETH_HLEN;
2346		memcpy(skb->data, start, hdr_len);
2347		skb_shinfo(skb)->nr_frags = 1;
2348		skb_frag_set_page(skb, 0, page_info->page);
2349		skb_frag_off_set(&skb_shinfo(skb)->frags[0],
2350				 page_info->page_offset + hdr_len);
2351		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2352				  curr_frag_len - hdr_len);
2353		skb->data_len = curr_frag_len - hdr_len;
2354		skb->truesize += rx_frag_size;
2355		skb->tail += hdr_len;
2356	}
2357	page_info->page = NULL;
2358
2359	if (rxcp->pkt_size <= rx_frag_size) {
2360		BUG_ON(rxcp->num_rcvd != 1);
2361		return;
2362	}
2363
2364	/* More frags present for this completion */
2365	remaining = rxcp->pkt_size - curr_frag_len;
2366	for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2367		page_info = get_rx_page_info(rxo);
2368		curr_frag_len = min(remaining, rx_frag_size);
2369
2370		/* Coalesce all frags from the same physical page in one slot */
2371		if (page_info->page_offset == 0) {
2372			/* Fresh page */
2373			j++;
2374			skb_frag_set_page(skb, j, page_info->page);
2375			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2376					 page_info->page_offset);
2377			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2378			skb_shinfo(skb)->nr_frags++;
2379		} else {
2380			put_page(page_info->page);
2381		}
2382
2383		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2384		skb->len += curr_frag_len;
2385		skb->data_len += curr_frag_len;
2386		skb->truesize += rx_frag_size;
2387		remaining -= curr_frag_len;
2388		page_info->page = NULL;
2389	}
2390	BUG_ON(j > MAX_SKB_FRAGS);
2391}
2392
2393/* Process the RX completion indicated by rxcp when GRO is disabled */
2394static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2395				struct be_rx_compl_info *rxcp)
2396{
2397	struct be_adapter *adapter = rxo->adapter;
2398	struct net_device *netdev = adapter->netdev;
2399	struct sk_buff *skb;
2400
2401	skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2402	if (unlikely(!skb)) {
2403		rx_stats(rxo)->rx_drops_no_skbs++;
2404		be_rx_compl_discard(rxo, rxcp);
2405		return;
2406	}
2407
2408	skb_fill_rx_data(rxo, skb, rxcp);
2409
2410	if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2411		skb->ip_summed = CHECKSUM_UNNECESSARY;
2412	else
2413		skb_checksum_none_assert(skb);
2414
2415	skb->protocol = eth_type_trans(skb, netdev);
2416	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2417	if (netdev->features & NETIF_F_RXHASH)
2418		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2419
2420	skb->csum_level = rxcp->tunneled;
2421	skb_mark_napi_id(skb, napi);
2422
2423	if (rxcp->vlanf)
2424		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2425
2426	netif_receive_skb(skb);
2427}
2428
2429/* Process the RX completion indicated by rxcp when GRO is enabled */
2430static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2431				    struct napi_struct *napi,
2432				    struct be_rx_compl_info *rxcp)
2433{
2434	struct be_adapter *adapter = rxo->adapter;
2435	struct be_rx_page_info *page_info;
2436	struct sk_buff *skb = NULL;
2437	u16 remaining, curr_frag_len;
2438	u16 i, j;
2439
2440	skb = napi_get_frags(napi);
2441	if (!skb) {
2442		be_rx_compl_discard(rxo, rxcp);
2443		return;
2444	}
2445
2446	remaining = rxcp->pkt_size;
2447	for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2448		page_info = get_rx_page_info(rxo);
2449
2450		curr_frag_len = min(remaining, rx_frag_size);
2451
2452		/* Coalesce all frags from the same physical page in one slot */
2453		if (i == 0 || page_info->page_offset == 0) {
2454			/* First frag or Fresh page */
2455			j++;
2456			skb_frag_set_page(skb, j, page_info->page);
2457			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
2458					 page_info->page_offset);
2459			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2460		} else {
2461			put_page(page_info->page);
2462		}
2463		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2464		skb->truesize += rx_frag_size;
2465		remaining -= curr_frag_len;
2466		memset(page_info, 0, sizeof(*page_info));
2467	}
2468	BUG_ON(j > MAX_SKB_FRAGS);
2469
2470	skb_shinfo(skb)->nr_frags = j + 1;
2471	skb->len = rxcp->pkt_size;
2472	skb->data_len = rxcp->pkt_size;
2473	skb->ip_summed = CHECKSUM_UNNECESSARY;
2474	skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2475	if (adapter->netdev->features & NETIF_F_RXHASH)
2476		skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2477
2478	skb->csum_level = rxcp->tunneled;
2479
2480	if (rxcp->vlanf)
2481		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2482
2483	napi_gro_frags(napi);
2484}
2485
2486static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2487				 struct be_rx_compl_info *rxcp)
2488{
2489	rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2490	rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2491	rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2492	rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2493	rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2494	rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2495	rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2496	rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2497	rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2498	rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2499	rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2500	if (rxcp->vlanf) {
2501		rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2502		rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2503	}
2504	rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2505	rxcp->tunneled =
2506		GET_RX_COMPL_V1_BITS(tunneled, compl);
2507}
2508
2509static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2510				 struct be_rx_compl_info *rxcp)
2511{
2512	rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2513	rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2514	rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2515	rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2516	rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2517	rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2518	rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2519	rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2520	rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2521	rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2522	rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2523	if (rxcp->vlanf) {
2524		rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2525		rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2526	}
2527	rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2528	rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2529}
2530
2531static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2532{
2533	struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2534	struct be_rx_compl_info *rxcp = &rxo->rxcp;
2535	struct be_adapter *adapter = rxo->adapter;
2536
2537	/* For checking the valid bit it is Ok to use either definition as the
2538	 * valid bit is at the same position in both v0 and v1 Rx compl */
2539	if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2540		return NULL;
2541
2542	rmb();
2543	be_dws_le_to_cpu(compl, sizeof(*compl));
2544
2545	if (adapter->be3_native)
2546		be_parse_rx_compl_v1(compl, rxcp);
2547	else
2548		be_parse_rx_compl_v0(compl, rxcp);
2549
2550	if (rxcp->ip_frag)
2551		rxcp->l4_csum = 0;
2552
2553	if (rxcp->vlanf) {
2554		/* In QNQ modes, if qnq bit is not set, then the packet was
2555		 * tagged only with the transparent outer vlan-tag and must
2556		 * not be treated as a vlan packet by host
2557		 */
2558		if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2559			rxcp->vlanf = 0;
2560
2561		if (!lancer_chip(adapter))
2562			rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2563
2564		if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2565		    !test_bit(rxcp->vlan_tag, adapter->vids))
2566			rxcp->vlanf = 0;
2567	}
2568
2569	/* As the compl has been parsed, reset it; we wont touch it again */
2570	compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2571
2572	queue_tail_inc(&rxo->cq);
2573	return rxcp;
2574}
2575
2576static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2577{
2578	u32 order = get_order(size);
2579
2580	if (order > 0)
2581		gfp |= __GFP_COMP;
2582	return  alloc_pages(gfp, order);
2583}
2584
2585/*
2586 * Allocate a page, split it to fragments of size rx_frag_size and post as
2587 * receive buffers to BE
2588 */
2589static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2590{
2591	struct be_adapter *adapter = rxo->adapter;
2592	struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2593	struct be_queue_info *rxq = &rxo->q;
2594	struct page *pagep = NULL;
2595	struct device *dev = &adapter->pdev->dev;
2596	struct be_eth_rx_d *rxd;
2597	u64 page_dmaaddr = 0, frag_dmaaddr;
2598	u32 posted, page_offset = 0, notify = 0;
2599
2600	page_info = &rxo->page_info_tbl[rxq->head];
2601	for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2602		if (!pagep) {
2603			pagep = be_alloc_pages(adapter->big_page_size, gfp);
2604			if (unlikely(!pagep)) {
2605				rx_stats(rxo)->rx_post_fail++;
2606				break;
2607			}
2608			page_dmaaddr = dma_map_page(dev, pagep, 0,
2609						    adapter->big_page_size,
2610						    DMA_FROM_DEVICE);
2611			if (dma_mapping_error(dev, page_dmaaddr)) {
2612				put_page(pagep);
2613				pagep = NULL;
2614				adapter->drv_stats.dma_map_errors++;
2615				break;
2616			}
2617			page_offset = 0;
2618		} else {
2619			get_page(pagep);
2620			page_offset += rx_frag_size;
2621		}
2622		page_info->page_offset = page_offset;
2623		page_info->page = pagep;
2624
2625		rxd = queue_head_node(rxq);
2626		frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2627		rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2628		rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2629
2630		/* Any space left in the current big page for another frag? */
2631		if ((page_offset + rx_frag_size + rx_frag_size) >
2632					adapter->big_page_size) {
2633			pagep = NULL;
2634			page_info->last_frag = true;
2635			dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2636		} else {
2637			dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2638		}
2639
2640		prev_page_info = page_info;
2641		queue_head_inc(rxq);
2642		page_info = &rxo->page_info_tbl[rxq->head];
2643	}
2644
2645	/* Mark the last frag of a page when we break out of the above loop
2646	 * with no more slots available in the RXQ
2647	 */
2648	if (pagep) {
2649		prev_page_info->last_frag = true;
2650		dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2651	}
2652
2653	if (posted) {
2654		atomic_add(posted, &rxq->used);
2655		if (rxo->rx_post_starved)
2656			rxo->rx_post_starved = false;
2657		do {
2658			notify = min(MAX_NUM_POST_ERX_DB, posted);
2659			be_rxq_notify(adapter, rxq->id, notify);
2660			posted -= notify;
2661		} while (posted);
2662	} else if (atomic_read(&rxq->used) == 0) {
2663		/* Let be_worker replenish when memory is available */
2664		rxo->rx_post_starved = true;
2665	}
2666}
2667
2668static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2669{
2670	switch (status) {
2671	case BE_TX_COMP_HDR_PARSE_ERR:
2672		tx_stats(txo)->tx_hdr_parse_err++;
2673		break;
2674	case BE_TX_COMP_NDMA_ERR:
2675		tx_stats(txo)->tx_dma_err++;
2676		break;
2677	case BE_TX_COMP_ACL_ERR:
2678		tx_stats(txo)->tx_spoof_check_err++;
2679		break;
2680	}
2681}
2682
2683static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2684{
2685	switch (status) {
2686	case LANCER_TX_COMP_LSO_ERR:
2687		tx_stats(txo)->tx_tso_err++;
2688		break;
2689	case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2690	case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2691		tx_stats(txo)->tx_spoof_check_err++;
2692		break;
2693	case LANCER_TX_COMP_QINQ_ERR:
2694		tx_stats(txo)->tx_qinq_err++;
2695		break;
2696	case LANCER_TX_COMP_PARITY_ERR:
2697		tx_stats(txo)->tx_internal_parity_err++;
2698		break;
2699	case LANCER_TX_COMP_DMA_ERR:
2700		tx_stats(txo)->tx_dma_err++;
2701		break;
2702	case LANCER_TX_COMP_SGE_ERR:
2703		tx_stats(txo)->tx_sge_err++;
2704		break;
2705	}
2706}
2707
2708static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2709						struct be_tx_obj *txo)
2710{
2711	struct be_queue_info *tx_cq = &txo->cq;
2712	struct be_tx_compl_info *txcp = &txo->txcp;
2713	struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2714
2715	if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2716		return NULL;
2717
2718	/* Ensure load ordering of valid bit dword and other dwords below */
2719	rmb();
2720	be_dws_le_to_cpu(compl, sizeof(*compl));
2721
2722	txcp->status = GET_TX_COMPL_BITS(status, compl);
2723	txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2724
2725	if (txcp->status) {
2726		if (lancer_chip(adapter)) {
2727			lancer_update_tx_err(txo, txcp->status);
2728			/* Reset the adapter incase of TSO,
2729			 * SGE or Parity error
2730			 */
2731			if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2732			    txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2733			    txcp->status == LANCER_TX_COMP_SGE_ERR)
2734				be_set_error(adapter, BE_ERROR_TX);
2735		} else {
2736			be_update_tx_err(txo, txcp->status);
2737		}
2738	}
2739
2740	if (be_check_error(adapter, BE_ERROR_TX))
2741		return NULL;
2742
2743	compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2744	queue_tail_inc(tx_cq);
2745	return txcp;
2746}
2747
2748static u16 be_tx_compl_process(struct be_adapter *adapter,
2749			       struct be_tx_obj *txo, u16 last_index)
2750{
2751	struct sk_buff **sent_skbs = txo->sent_skb_list;
2752	struct be_queue_info *txq = &txo->q;
2753	struct sk_buff *skb = NULL;
2754	bool unmap_skb_hdr = false;
2755	struct be_eth_wrb *wrb;
2756	u16 num_wrbs = 0;
2757	u32 frag_index;
2758
2759	do {
2760		if (sent_skbs[txq->tail]) {
2761			/* Free skb from prev req */
2762			if (skb)
2763				dev_consume_skb_any(skb);
2764			skb = sent_skbs[txq->tail];
2765			sent_skbs[txq->tail] = NULL;
2766			queue_tail_inc(txq);  /* skip hdr wrb */
2767			num_wrbs++;
2768			unmap_skb_hdr = true;
2769		}
2770		wrb = queue_tail_node(txq);
2771		frag_index = txq->tail;
2772		unmap_tx_frag(&adapter->pdev->dev, wrb,
2773			      (unmap_skb_hdr && skb_headlen(skb)));
2774		unmap_skb_hdr = false;
2775		queue_tail_inc(txq);
2776		num_wrbs++;
2777	} while (frag_index != last_index);
2778	dev_consume_skb_any(skb);
2779
2780	return num_wrbs;
2781}
2782
2783/* Return the number of events in the event queue */
2784static inline int events_get(struct be_eq_obj *eqo)
2785{
2786	struct be_eq_entry *eqe;
2787	int num = 0;
2788
2789	do {
2790		eqe = queue_tail_node(&eqo->q);
2791		if (eqe->evt == 0)
2792			break;
2793
2794		rmb();
2795		eqe->evt = 0;
2796		num++;
2797		queue_tail_inc(&eqo->q);
2798	} while (true);
2799
2800	return num;
2801}
2802
2803/* Leaves the EQ is disarmed state */
2804static void be_eq_clean(struct be_eq_obj *eqo)
2805{
2806	int num = events_get(eqo);
2807
2808	be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2809}
2810
2811/* Free posted rx buffers that were not used */
2812static void be_rxq_clean(struct be_rx_obj *rxo)
2813{
2814	struct be_queue_info *rxq = &rxo->q;
2815	struct be_rx_page_info *page_info;
2816
2817	while (atomic_read(&rxq->used) > 0) {
2818		page_info = get_rx_page_info(rxo);
2819		put_page(page_info->page);
2820		memset(page_info, 0, sizeof(*page_info));
2821	}
2822	BUG_ON(atomic_read(&rxq->used));
2823	rxq->tail = 0;
2824	rxq->head = 0;
2825}
2826
2827static void be_rx_cq_clean(struct be_rx_obj *rxo)
2828{
2829	struct be_queue_info *rx_cq = &rxo->cq;
2830	struct be_rx_compl_info *rxcp;
2831	struct be_adapter *adapter = rxo->adapter;
2832	int flush_wait = 0;
2833
2834	/* Consume pending rx completions.
2835	 * Wait for the flush completion (identified by zero num_rcvd)
2836	 * to arrive. Notify CQ even when there are no more CQ entries
2837	 * for HW to flush partially coalesced CQ entries.
2838	 * In Lancer, there is no need to wait for flush compl.
2839	 */
2840	for (;;) {
2841		rxcp = be_rx_compl_get(rxo);
2842		if (!rxcp) {
2843			if (lancer_chip(adapter))
2844				break;
2845
2846			if (flush_wait++ > 50 ||
2847			    be_check_error(adapter,
2848					   BE_ERROR_HW)) {
2849				dev_warn(&adapter->pdev->dev,
2850					 "did not receive flush compl\n");
2851				break;
2852			}
2853			be_cq_notify(adapter, rx_cq->id, true, 0);
2854			mdelay(1);
2855		} else {
2856			be_rx_compl_discard(rxo, rxcp);
2857			be_cq_notify(adapter, rx_cq->id, false, 1);
2858			if (rxcp->num_rcvd == 0)
2859				break;
2860		}
2861	}
2862
2863	/* After cleanup, leave the CQ in unarmed state */
2864	be_cq_notify(adapter, rx_cq->id, false, 0);
2865}
2866
2867static void be_tx_compl_clean(struct be_adapter *adapter)
2868{
2869	struct device *dev = &adapter->pdev->dev;
2870	u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2871	struct be_tx_compl_info *txcp;
2872	struct be_queue_info *txq;
2873	u32 end_idx, notified_idx;
2874	struct be_tx_obj *txo;
2875	int i, pending_txqs;
2876
2877	/* Stop polling for compls when HW has been silent for 10ms */
2878	do {
2879		pending_txqs = adapter->num_tx_qs;
2880
2881		for_all_tx_queues(adapter, txo, i) {
2882			cmpl = 0;
2883			num_wrbs = 0;
2884			txq = &txo->q;
2885			while ((txcp = be_tx_compl_get(adapter, txo))) {
2886				num_wrbs +=
2887					be_tx_compl_process(adapter, txo,
2888							    txcp->end_index);
2889				cmpl++;
2890			}
2891			if (cmpl) {
2892				be_cq_notify(adapter, txo->cq.id, false, cmpl);
2893				atomic_sub(num_wrbs, &txq->used);
2894				timeo = 0;
2895			}
2896			if (!be_is_tx_compl_pending(txo))
2897				pending_txqs--;
2898		}
2899
2900		if (pending_txqs == 0 || ++timeo > 10 ||
2901		    be_check_error(adapter, BE_ERROR_HW))
2902			break;
2903
2904		mdelay(1);
2905	} while (true);
2906
2907	/* Free enqueued TX that was never notified to HW */
2908	for_all_tx_queues(adapter, txo, i) {
2909		txq = &txo->q;
2910
2911		if (atomic_read(&txq->used)) {
2912			dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2913				 i, atomic_read(&txq->used));
2914			notified_idx = txq->tail;
2915			end_idx = txq->tail;
2916			index_adv(&end_idx, atomic_read(&txq->used) - 1,
2917				  txq->len);
2918			/* Use the tx-compl process logic to handle requests
2919			 * that were not sent to the HW.
2920			 */
2921			num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2922			atomic_sub(num_wrbs, &txq->used);
2923			BUG_ON(atomic_read(&txq->used));
2924			txo->pend_wrb_cnt = 0;
2925			/* Since hw was never notified of these requests,
2926			 * reset TXQ indices
2927			 */
2928			txq->head = notified_idx;
2929			txq->tail = notified_idx;
2930		}
2931	}
2932}
2933
2934static void be_evt_queues_destroy(struct be_adapter *adapter)
2935{
2936	struct be_eq_obj *eqo;
2937	int i;
2938
2939	for_all_evt_queues(adapter, eqo, i) {
2940		if (eqo->q.created) {
2941			be_eq_clean(eqo);
2942			be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2943			netif_napi_del(&eqo->napi);
2944			free_cpumask_var(eqo->affinity_mask);
2945		}
2946		be_queue_free(adapter, &eqo->q);
2947	}
2948}
2949
2950static int be_evt_queues_create(struct be_adapter *adapter)
2951{
2952	struct be_queue_info *eq;
2953	struct be_eq_obj *eqo;
2954	struct be_aic_obj *aic;
2955	int i, rc;
2956
2957	/* need enough EQs to service both RX and TX queues */
2958	adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2959				    max(adapter->cfg_num_rx_irqs,
2960					adapter->cfg_num_tx_irqs));
2961
2962	adapter->aic_enabled = true;
2963
2964	for_all_evt_queues(adapter, eqo, i) {
2965		int numa_node = dev_to_node(&adapter->pdev->dev);
2966
2967		aic = &adapter->aic_obj[i];
2968		eqo->adapter = adapter;
2969		eqo->idx = i;
2970		aic->max_eqd = BE_MAX_EQD;
2971
2972		eq = &eqo->q;
2973		rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2974				    sizeof(struct be_eq_entry));
2975		if (rc)
2976			return rc;
2977
2978		rc = be_cmd_eq_create(adapter, eqo);
2979		if (rc)
2980			return rc;
2981
2982		if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2983			return -ENOMEM;
2984		cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2985				eqo->affinity_mask);
2986		netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2987			       BE_NAPI_WEIGHT);
2988	}
2989	return 0;
2990}
2991
2992static void be_mcc_queues_destroy(struct be_adapter *adapter)
2993{
2994	struct be_queue_info *q;
2995
2996	q = &adapter->mcc_obj.q;
2997	if (q->created)
2998		be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2999	be_queue_free(adapter, q);
3000
3001	q = &adapter->mcc_obj.cq;
3002	if (q->created)
3003		be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3004	be_queue_free(adapter, q);
3005}
3006
3007/* Must be called only after TX qs are created as MCC shares TX EQ */
3008static int be_mcc_queues_create(struct be_adapter *adapter)
3009{
3010	struct be_queue_info *q, *cq;
3011
3012	cq = &adapter->mcc_obj.cq;
3013	if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3014			   sizeof(struct be_mcc_compl)))
3015		goto err;
3016
3017	/* Use the default EQ for MCC completions */
3018	if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3019		goto mcc_cq_free;
3020
3021	q = &adapter->mcc_obj.q;
3022	if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3023		goto mcc_cq_destroy;
3024
3025	if (be_cmd_mccq_create(adapter, q, cq))
3026		goto mcc_q_free;
3027
3028	return 0;
3029
3030mcc_q_free:
3031	be_queue_free(adapter, q);
3032mcc_cq_destroy:
3033	be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3034mcc_cq_free:
3035	be_queue_free(adapter, cq);
3036err:
3037	return -1;
3038}
3039
3040static void be_tx_queues_destroy(struct be_adapter *adapter)
3041{
3042	struct be_queue_info *q;
3043	struct be_tx_obj *txo;
3044	u8 i;
3045
3046	for_all_tx_queues(adapter, txo, i) {
3047		q = &txo->q;
3048		if (q->created)
3049			be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3050		be_queue_free(adapter, q);
3051
3052		q = &txo->cq;
3053		if (q->created)
3054			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3055		be_queue_free(adapter, q);
3056	}
3057}
3058
3059static int be_tx_qs_create(struct be_adapter *adapter)
3060{
3061	struct be_queue_info *cq;
3062	struct be_tx_obj *txo;
3063	struct be_eq_obj *eqo;
3064	int status, i;
3065
3066	adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3067
3068	for_all_tx_queues(adapter, txo, i) {
3069		cq = &txo->cq;
3070		status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3071					sizeof(struct be_eth_tx_compl));
3072		if (status)
3073			return status;
3074
3075		u64_stats_init(&txo->stats.sync);
3076		u64_stats_init(&txo->stats.sync_compl);
3077
3078		/* If num_evt_qs is less than num_tx_qs, then more than
3079		 * one txq share an eq
3080		 */
3081		eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3082		status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3083		if (status)
3084			return status;
3085
3086		status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3087					sizeof(struct be_eth_wrb));
3088		if (status)
3089			return status;
3090
3091		status = be_cmd_txq_create(adapter, txo);
3092		if (status)
3093			return status;
3094
3095		netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3096				    eqo->idx);
3097	}
3098
3099	dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3100		 adapter->num_tx_qs);
3101	return 0;
3102}
3103
3104static void be_rx_cqs_destroy(struct be_adapter *adapter)
3105{
3106	struct be_queue_info *q;
3107	struct be_rx_obj *rxo;
3108	int i;
3109
3110	for_all_rx_queues(adapter, rxo, i) {
3111		q = &rxo->cq;
3112		if (q->created)
3113			be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3114		be_queue_free(adapter, q);
3115	}
3116}
3117
3118static int be_rx_cqs_create(struct be_adapter *adapter)
3119{
3120	struct be_queue_info *eq, *cq;
3121	struct be_rx_obj *rxo;
3122	int rc, i;
3123
3124	adapter->num_rss_qs =
3125			min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3126
3127	/* We'll use RSS only if atleast 2 RSS rings are supported. */
3128	if (adapter->num_rss_qs < 2)
3129		adapter->num_rss_qs = 0;
3130
3131	adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3132
3133	/* When the interface is not capable of RSS rings (and there is no
3134	 * need to create a default RXQ) we'll still need one RXQ
3135	 */
3136	if (adapter->num_rx_qs == 0)
3137		adapter->num_rx_qs = 1;
3138
3139	adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3140	for_all_rx_queues(adapter, rxo, i) {
3141		rxo->adapter = adapter;
3142		cq = &rxo->cq;
3143		rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3144				    sizeof(struct be_eth_rx_compl));
3145		if (rc)
3146			return rc;
3147
3148		u64_stats_init(&rxo->stats.sync);
3149		eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3150		rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3151		if (rc)
3152			return rc;
3153	}
3154
3155	dev_info(&adapter->pdev->dev,
3156		 "created %d RX queue(s)\n", adapter->num_rx_qs);
3157	return 0;
3158}
3159
3160static irqreturn_t be_intx(int irq, void *dev)
3161{
3162	struct be_eq_obj *eqo = dev;
3163	struct be_adapter *adapter = eqo->adapter;
3164	int num_evts = 0;
3165
3166	/* IRQ is not expected when NAPI is scheduled as the EQ
3167	 * will not be armed.
3168	 * But, this can happen on Lancer INTx where it takes
3169	 * a while to de-assert INTx or in BE2 where occasionaly
3170	 * an interrupt may be raised even when EQ is unarmed.
3171	 * If NAPI is already scheduled, then counting & notifying
3172	 * events will orphan them.
3173	 */
3174	if (napi_schedule_prep(&eqo->napi)) {
3175		num_evts = events_get(eqo);
3176		__napi_schedule(&eqo->napi);
3177		if (num_evts)
3178			eqo->spurious_intr = 0;
3179	}
3180	be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3181
3182	/* Return IRQ_HANDLED only for the the first spurious intr
3183	 * after a valid intr to stop the kernel from branding
3184	 * this irq as a bad one!
3185	 */
3186	if (num_evts || eqo->spurious_intr++ == 0)
3187		return IRQ_HANDLED;
3188	else
3189		return IRQ_NONE;
3190}
3191
3192static irqreturn_t be_msix(int irq, void *dev)
3193{
3194	struct be_eq_obj *eqo = dev;
3195
3196	be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3197	napi_schedule(&eqo->napi);
3198	return IRQ_HANDLED;
3199}
3200
3201static inline bool do_gro(struct be_rx_compl_info *rxcp)
3202{
3203	return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3204}
3205
3206static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3207			 int budget)
3208{
3209	struct be_adapter *adapter = rxo->adapter;
3210	struct be_queue_info *rx_cq = &rxo->cq;
3211	struct be_rx_compl_info *rxcp;
3212	u32 work_done;
3213	u32 frags_consumed = 0;
3214
3215	for (work_done = 0; work_done < budget; work_done++) {
3216		rxcp = be_rx_compl_get(rxo);
3217		if (!rxcp)
3218			break;
3219
3220		/* Is it a flush compl that has no data */
3221		if (unlikely(rxcp->num_rcvd == 0))
3222			goto loop_continue;
3223
3224		/* Discard compl with partial DMA Lancer B0 */
3225		if (unlikely(!rxcp->pkt_size)) {
3226			be_rx_compl_discard(rxo, rxcp);
3227			goto loop_continue;
3228		}
3229
3230		/* On BE drop pkts that arrive due to imperfect filtering in
3231		 * promiscuous mode on some skews
3232		 */
3233		if (unlikely(rxcp->port != adapter->port_num &&
3234			     !lancer_chip(adapter))) {
3235			be_rx_compl_discard(rxo, rxcp);
3236			goto loop_continue;
3237		}
3238
3239		if (do_gro(rxcp))
3240			be_rx_compl_process_gro(rxo, napi, rxcp);
3241		else
3242			be_rx_compl_process(rxo, napi, rxcp);
3243
3244loop_continue:
3245		frags_consumed += rxcp->num_rcvd;
3246		be_rx_stats_update(rxo, rxcp);
3247	}
3248
3249	if (work_done) {
3250		be_cq_notify(adapter, rx_cq->id, true, work_done);
3251
3252		/* When an rx-obj gets into post_starved state, just
3253		 * let be_worker do the posting.
3254		 */
3255		if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3256		    !rxo->rx_post_starved)
3257			be_post_rx_frags(rxo, GFP_ATOMIC,
3258					 max_t(u32, MAX_RX_POST,
3259					       frags_consumed));
3260	}
3261
3262	return work_done;
3263}
3264
3265
3266static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3267			  int idx)
3268{
3269	int num_wrbs = 0, work_done = 0;
3270	struct be_tx_compl_info *txcp;
3271
3272	while ((txcp = be_tx_compl_get(adapter, txo))) {
3273		num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3274		work_done++;
3275	}
3276
3277	if (work_done) {
3278		be_cq_notify(adapter, txo->cq.id, true, work_done);
3279		atomic_sub(num_wrbs, &txo->q.used);
3280
3281		/* As Tx wrbs have been freed up, wake up netdev queue
3282		 * if it was stopped due to lack of tx wrbs.  */
3283		if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3284		    be_can_txq_wake(txo)) {
3285			netif_wake_subqueue(adapter->netdev, idx);
3286		}
3287
3288		u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3289		tx_stats(txo)->tx_compl += work_done;
3290		u64_stats_update_end(&tx_stats(txo)->sync_compl);
3291	}
3292}
3293
3294int be_poll(struct napi_struct *napi, int budget)
3295{
3296	struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3297	struct be_adapter *adapter = eqo->adapter;
3298	int max_work = 0, work, i, num_evts;
3299	struct be_rx_obj *rxo;
3300	struct be_tx_obj *txo;
3301	u32 mult_enc = 0;
3302
3303	num_evts = events_get(eqo);
3304
3305	for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3306		be_process_tx(adapter, txo, i);
3307
3308	/* This loop will iterate twice for EQ0 in which
3309	 * completions of the last RXQ (default one) are also processed
3310	 * For other EQs the loop iterates only once
3311	 */
3312	for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3313		work = be_process_rx(rxo, napi, budget);
3314		max_work = max(work, max_work);
3315	}
3316
3317	if (is_mcc_eqo(eqo))
3318		be_process_mcc(adapter);
3319
3320	if (max_work < budget) {
3321		napi_complete_done(napi, max_work);
3322
3323		/* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3324		 * delay via a delay multiplier encoding value
3325		 */
3326		if (skyhawk_chip(adapter))
3327			mult_enc = be_get_eq_delay_mult_enc(eqo);
3328
3329		be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3330			     mult_enc);
3331	} else {
3332		/* As we'll continue in polling mode, count and clear events */
3333		be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3334	}
3335	return max_work;
3336}
3337
3338void be_detect_error(struct be_adapter *adapter)
3339{
3340	u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3341	u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3342	struct device *dev = &adapter->pdev->dev;
3343	u16 val;
3344	u32 i;
3345
3346	if (be_check_error(adapter, BE_ERROR_HW))
3347		return;
3348
3349	if (lancer_chip(adapter)) {
3350		sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3351		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3352			be_set_error(adapter, BE_ERROR_UE);
3353			sliport_err1 = ioread32(adapter->db +
3354						SLIPORT_ERROR1_OFFSET);
3355			sliport_err2 = ioread32(adapter->db +
3356						SLIPORT_ERROR2_OFFSET);
3357			/* Do not log error messages if its a FW reset */
3358			if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3359			    sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3360				dev_info(dev, "Reset is in progress\n");
3361			} else {
3362				dev_err(dev, "Error detected in the card\n");
3363				dev_err(dev, "ERR: sliport status 0x%x\n",
3364					sliport_status);
3365				dev_err(dev, "ERR: sliport error1 0x%x\n",
3366					sliport_err1);
3367				dev_err(dev, "ERR: sliport error2 0x%x\n",
3368					sliport_err2);
3369			}
3370		}
3371	} else {
3372		ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3373		ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3374		ue_lo_mask = ioread32(adapter->pcicfg +
3375				      PCICFG_UE_STATUS_LOW_MASK);
3376		ue_hi_mask = ioread32(adapter->pcicfg +
3377				      PCICFG_UE_STATUS_HI_MASK);
3378
3379		ue_lo = (ue_lo & ~ue_lo_mask);
3380		ue_hi = (ue_hi & ~ue_hi_mask);
3381
3382		if (ue_lo || ue_hi) {
3383			/* On certain platforms BE3 hardware can indicate
3384			 * spurious UEs. In case of a UE in the chip,
3385			 * the POST register correctly reports either a
3386			 * FAT_LOG_START state (FW is currently dumping
3387			 * FAT log data) or a ARMFW_UE state. Check for the
3388			 * above states to ascertain if the UE is valid or not.
3389			 */
3390			if (BE3_chip(adapter)) {
3391				val = be_POST_stage_get(adapter);
3392				if ((val & POST_STAGE_FAT_LOG_START)
3393				     != POST_STAGE_FAT_LOG_START &&
3394				    (val & POST_STAGE_ARMFW_UE)
3395				     != POST_STAGE_ARMFW_UE &&
3396				    (val & POST_STAGE_RECOVERABLE_ERR)
3397				     != POST_STAGE_RECOVERABLE_ERR)
3398					return;
3399			}
3400
3401			dev_err(dev, "Error detected in the adapter");
3402			be_set_error(adapter, BE_ERROR_UE);
3403
3404			for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3405				if (ue_lo & 1)
3406					dev_err(dev, "UE: %s bit set\n",
3407						ue_status_low_desc[i]);
3408			}
3409			for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3410				if (ue_hi & 1)
3411					dev_err(dev, "UE: %s bit set\n",
3412						ue_status_hi_desc[i]);
3413			}
3414		}
3415	}
3416}
3417
3418static void be_msix_disable(struct be_adapter *adapter)
3419{
3420	if (msix_enabled(adapter)) {
3421		pci_disable_msix(adapter->pdev);
3422		adapter->num_msix_vec = 0;
3423		adapter->num_msix_roce_vec = 0;
3424	}
3425}
3426
3427static int be_msix_enable(struct be_adapter *adapter)
3428{
3429	unsigned int i, max_roce_eqs;
3430	struct device *dev = &adapter->pdev->dev;
3431	int num_vec;
3432
3433	/* If RoCE is supported, program the max number of vectors that
3434	 * could be used for NIC and RoCE, else, just program the number
3435	 * we'll use initially.
3436	 */
3437	if (be_roce_supported(adapter)) {
3438		max_roce_eqs =
3439			be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3440		max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3441		num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3442	} else {
3443		num_vec = max(adapter->cfg_num_rx_irqs,
3444			      adapter->cfg_num_tx_irqs);
3445	}
3446
3447	for (i = 0; i < num_vec; i++)
3448		adapter->msix_entries[i].entry = i;
3449
3450	num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3451					MIN_MSIX_VECTORS, num_vec);
3452	if (num_vec < 0)
3453		goto fail;
3454
3455	if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3456		adapter->num_msix_roce_vec = num_vec / 2;
3457		dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3458			 adapter->num_msix_roce_vec);
3459	}
3460
3461	adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3462
3463	dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3464		 adapter->num_msix_vec);
3465	return 0;
3466
3467fail:
3468	dev_warn(dev, "MSIx enable failed\n");
3469
3470	/* INTx is not supported in VFs, so fail probe if enable_msix fails */
3471	if (be_virtfn(adapter))
3472		return num_vec;
3473	return 0;
3474}
3475
3476static inline int be_msix_vec_get(struct be_adapter *adapter,
3477				  struct be_eq_obj *eqo)
3478{
3479	return adapter->msix_entries[eqo->msix_idx].vector;
3480}
3481
3482static int be_msix_register(struct be_adapter *adapter)
3483{
3484	struct net_device *netdev = adapter->netdev;
3485	struct be_eq_obj *eqo;
3486	int status, i, vec;
3487
3488	for_all_evt_queues(adapter, eqo, i) {
3489		sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3490		vec = be_msix_vec_get(adapter, eqo);
3491		status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3492		if (status)
3493			goto err_msix;
3494
3495		irq_set_affinity_hint(vec, eqo->affinity_mask);
3496	}
3497
3498	return 0;
3499err_msix:
3500	for (i--; i >= 0; i--) {
3501		eqo = &adapter->eq_obj[i];
3502		free_irq(be_msix_vec_get(adapter, eqo), eqo);
3503	}
3504	dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3505		 status);
3506	be_msix_disable(adapter);
3507	return status;
3508}
3509
3510static int be_irq_register(struct be_adapter *adapter)
3511{
3512	struct net_device *netdev = adapter->netdev;
3513	int status;
3514
3515	if (msix_enabled(adapter)) {
3516		status = be_msix_register(adapter);
3517		if (status == 0)
3518			goto done;
3519		/* INTx is not supported for VF */
3520		if (be_virtfn(adapter))
3521			return status;
3522	}
3523
3524	/* INTx: only the first EQ is used */
3525	netdev->irq = adapter->pdev->irq;
3526	status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3527			     &adapter->eq_obj[0]);
3528	if (status) {
3529		dev_err(&adapter->pdev->dev,
3530			"INTx request IRQ failed - err %d\n", status);
3531		return status;
3532	}
3533done:
3534	adapter->isr_registered = true;
3535	return 0;
3536}
3537
3538static void be_irq_unregister(struct be_adapter *adapter)
3539{
3540	struct net_device *netdev = adapter->netdev;
3541	struct be_eq_obj *eqo;
3542	int i, vec;
3543
3544	if (!adapter->isr_registered)
3545		return;
3546
3547	/* INTx */
3548	if (!msix_enabled(adapter)) {
3549		free_irq(netdev->irq, &adapter->eq_obj[0]);
3550		goto done;
3551	}
3552
3553	/* MSIx */
3554	for_all_evt_queues(adapter, eqo, i) {
3555		vec = be_msix_vec_get(adapter, eqo);
3556		irq_set_affinity_hint(vec, NULL);
3557		free_irq(vec, eqo);
3558	}
3559
3560done:
3561	adapter->isr_registered = false;
3562}
3563
3564static void be_rx_qs_destroy(struct be_adapter *adapter)
3565{
3566	struct rss_info *rss = &adapter->rss_info;
3567	struct be_queue_info *q;
3568	struct be_rx_obj *rxo;
3569	int i;
3570
3571	for_all_rx_queues(adapter, rxo, i) {
3572		q = &rxo->q;
3573		if (q->created) {
3574			/* If RXQs are destroyed while in an "out of buffer"
3575			 * state, there is a possibility of an HW stall on
3576			 * Lancer. So, post 64 buffers to each queue to relieve
3577			 * the "out of buffer" condition.
3578			 * Make sure there's space in the RXQ before posting.
3579			 */
3580			if (lancer_chip(adapter)) {
3581				be_rx_cq_clean(rxo);
3582				if (atomic_read(&q->used) == 0)
3583					be_post_rx_frags(rxo, GFP_KERNEL,
3584							 MAX_RX_POST);
3585			}
3586
3587			be_cmd_rxq_destroy(adapter, q);
3588			be_rx_cq_clean(rxo);
3589			be_rxq_clean(rxo);
3590		}
3591		be_queue_free(adapter, q);
3592	}
3593
3594	if (rss->rss_flags) {
3595		rss->rss_flags = RSS_ENABLE_NONE;
3596		be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3597				  128, rss->rss_hkey);
3598	}
3599}
3600
3601static void be_disable_if_filters(struct be_adapter *adapter)
3602{
3603	/* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3604	if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3605	    check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3606		be_dev_mac_del(adapter, adapter->pmac_id[0]);
3607		eth_zero_addr(adapter->dev_mac);
3608	}
3609
3610	be_clear_uc_list(adapter);
3611	be_clear_mc_list(adapter);
3612
3613	/* The IFACE flags are enabled in the open path and cleared
3614	 * in the close path. When a VF gets detached from the host and
3615	 * assigned to a VM the following happens:
3616	 *	- VF's IFACE flags get cleared in the detach path
3617	 *	- IFACE create is issued by the VF in the attach path
3618	 * Due to a bug in the BE3/Skyhawk-R FW
3619	 * (Lancer FW doesn't have the bug), the IFACE capability flags
3620	 * specified along with the IFACE create cmd issued by a VF are not
3621	 * honoured by FW.  As a consequence, if a *new* driver
3622	 * (that enables/disables IFACE flags in open/close)
3623	 * is loaded in the host and an *old* driver is * used by a VM/VF,
3624	 * the IFACE gets created *without* the needed flags.
3625	 * To avoid this, disable RX-filter flags only for Lancer.
3626	 */
3627	if (lancer_chip(adapter)) {
3628		be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3629		adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3630	}
3631}
3632
3633static int be_close(struct net_device *netdev)
3634{
3635	struct be_adapter *adapter = netdev_priv(netdev);
3636	struct be_eq_obj *eqo;
3637	int i;
3638
3639	/* This protection is needed as be_close() may be called even when the
3640	 * adapter is in cleared state (after eeh perm failure)
3641	 */
3642	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3643		return 0;
3644
3645	/* Before attempting cleanup ensure all the pending cmds in the
3646	 * config_wq have finished execution
3647	 */
3648	flush_workqueue(be_wq);
3649
3650	be_disable_if_filters(adapter);
3651
3652	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3653		for_all_evt_queues(adapter, eqo, i) {
3654			napi_disable(&eqo->napi);
3655		}
3656		adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3657	}
3658
3659	be_async_mcc_disable(adapter);
3660
3661	/* Wait for all pending tx completions to arrive so that
3662	 * all tx skbs are freed.
3663	 */
3664	netif_tx_disable(netdev);
3665	be_tx_compl_clean(adapter);
3666
3667	be_rx_qs_destroy(adapter);
3668
3669	for_all_evt_queues(adapter, eqo, i) {
3670		if (msix_enabled(adapter))
3671			synchronize_irq(be_msix_vec_get(adapter, eqo));
3672		else
3673			synchronize_irq(netdev->irq);
3674		be_eq_clean(eqo);
3675	}
3676
3677	be_irq_unregister(adapter);
3678
3679	return 0;
3680}
3681
3682static int be_rx_qs_create(struct be_adapter *adapter)
3683{
3684	struct rss_info *rss = &adapter->rss_info;
3685	u8 rss_key[RSS_HASH_KEY_LEN];
3686	struct be_rx_obj *rxo;
3687	int rc, i, j;
3688
3689	for_all_rx_queues(adapter, rxo, i) {
3690		rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3691				    sizeof(struct be_eth_rx_d));
3692		if (rc)
3693			return rc;
3694	}
3695
3696	if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3697		rxo = default_rxo(adapter);
3698		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3699				       rx_frag_size, adapter->if_handle,
3700				       false, &rxo->rss_id);
3701		if (rc)
3702			return rc;
3703	}
3704
3705	for_all_rss_queues(adapter, rxo, i) {
3706		rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3707				       rx_frag_size, adapter->if_handle,
3708				       true, &rxo->rss_id);
3709		if (rc)
3710			return rc;
3711	}
3712
3713	if (be_multi_rxq(adapter)) {
3714		for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3715			for_all_rss_queues(adapter, rxo, i) {
3716				if ((j + i) >= RSS_INDIR_TABLE_LEN)
3717					break;
3718				rss->rsstable[j + i] = rxo->rss_id;
3719				rss->rss_queue[j + i] = i;
3720			}
3721		}
3722		rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3723			RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3724
3725		if (!BEx_chip(adapter))
3726			rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3727				RSS_ENABLE_UDP_IPV6;
3728
3729		netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3730		rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3731				       RSS_INDIR_TABLE_LEN, rss_key);
3732		if (rc) {
3733			rss->rss_flags = RSS_ENABLE_NONE;
3734			return rc;
3735		}
3736
3737		memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3738	} else {
3739		/* Disable RSS, if only default RX Q is created */
3740		rss->rss_flags = RSS_ENABLE_NONE;
3741	}
3742
3743
3744	/* Post 1 less than RXQ-len to avoid head being equal to tail,
3745	 * which is a queue empty condition
3746	 */
3747	for_all_rx_queues(adapter, rxo, i)
3748		be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3749
3750	return 0;
3751}
3752
3753static int be_enable_if_filters(struct be_adapter *adapter)
3754{
3755	int status;
3756
3757	status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3758	if (status)
3759		return status;
3760
3761	/* Normally this condition usually true as the ->dev_mac is zeroed.
3762	 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3763	 * subsequent be_dev_mac_add() can fail (after fresh boot)
3764	 */
3765	if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3766		int old_pmac_id = -1;
3767
3768		/* Remember old programmed MAC if any - can happen on BE3 VF */
3769		if (!is_zero_ether_addr(adapter->dev_mac))
3770			old_pmac_id = adapter->pmac_id[0];
3771
3772		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3773		if (status)
3774			return status;
3775
3776		/* Delete the old programmed MAC as we successfully programmed
3777		 * a new MAC
3778		 */
3779		if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3780			be_dev_mac_del(adapter, old_pmac_id);
3781
3782		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3783	}
3784
3785	if (adapter->vlans_added)
3786		be_vid_config(adapter);
3787
3788	__be_set_rx_mode(adapter);
3789
3790	return 0;
3791}
3792
3793static int be_open(struct net_device *netdev)
3794{
3795	struct be_adapter *adapter = netdev_priv(netdev);
3796	struct be_eq_obj *eqo;
3797	struct be_rx_obj *rxo;
3798	struct be_tx_obj *txo;
3799	u8 link_status;
3800	int status, i;
3801
3802	status = be_rx_qs_create(adapter);
3803	if (status)
3804		goto err;
3805
3806	status = be_enable_if_filters(adapter);
3807	if (status)
3808		goto err;
3809
3810	status = be_irq_register(adapter);
3811	if (status)
3812		goto err;
3813
3814	for_all_rx_queues(adapter, rxo, i)
3815		be_cq_notify(adapter, rxo->cq.id, true, 0);
3816
3817	for_all_tx_queues(adapter, txo, i)
3818		be_cq_notify(adapter, txo->cq.id, true, 0);
3819
3820	be_async_mcc_enable(adapter);
3821
3822	for_all_evt_queues(adapter, eqo, i) {
3823		napi_enable(&eqo->napi);
3824		be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3825	}
3826	adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3827
3828	status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3829	if (!status)
3830		be_link_status_update(adapter, link_status);
3831
3832	netif_tx_start_all_queues(netdev);
3833
3834	udp_tunnel_nic_reset_ntf(netdev);
3835
3836	return 0;
3837err:
3838	be_close(adapter->netdev);
3839	return -EIO;
3840}
3841
3842static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3843{
3844	u32 addr;
3845
3846	addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3847
3848	mac[5] = (u8)(addr & 0xFF);
3849	mac[4] = (u8)((addr >> 8) & 0xFF);
3850	mac[3] = (u8)((addr >> 16) & 0xFF);
3851	/* Use the OUI from the current MAC address */
3852	memcpy(mac, adapter->netdev->dev_addr, 3);
3853}
3854
3855/*
3856 * Generate a seed MAC address from the PF MAC Address using jhash.
3857 * MAC Address for VFs are assigned incrementally starting from the seed.
3858 * These addresses are programmed in the ASIC by the PF and the VF driver
3859 * queries for the MAC address during its probe.
3860 */
3861static int be_vf_eth_addr_config(struct be_adapter *adapter)
3862{
3863	u32 vf;
3864	int status = 0;
3865	u8 mac[ETH_ALEN];
3866	struct be_vf_cfg *vf_cfg;
3867
3868	be_vf_eth_addr_generate(adapter, mac);
3869
3870	for_all_vfs(adapter, vf_cfg, vf) {
3871		if (BEx_chip(adapter))
3872			status = be_cmd_pmac_add(adapter, mac,
3873						 vf_cfg->if_handle,
3874						 &vf_cfg->pmac_id, vf + 1);
3875		else
3876			status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3877						vf + 1);
3878
3879		if (status)
3880			dev_err(&adapter->pdev->dev,
3881				"Mac address assignment failed for VF %d\n",
3882				vf);
3883		else
3884			memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3885
3886		mac[5] += 1;
3887	}
3888	return status;
3889}
3890
3891static int be_vfs_mac_query(struct be_adapter *adapter)
3892{
3893	int status, vf;
3894	u8 mac[ETH_ALEN];
3895	struct be_vf_cfg *vf_cfg;
3896
3897	for_all_vfs(adapter, vf_cfg, vf) {
3898		status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3899					       mac, vf_cfg->if_handle,
3900					       false, vf+1);
3901		if (status)
3902			return status;
3903		memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3904	}
3905	return 0;
3906}
3907
3908static void be_vf_clear(struct be_adapter *adapter)
3909{
3910	struct be_vf_cfg *vf_cfg;
3911	u32 vf;
3912
3913	if (pci_vfs_assigned(adapter->pdev)) {
3914		dev_warn(&adapter->pdev->dev,
3915			 "VFs are assigned to VMs: not disabling VFs\n");
3916		goto done;
3917	}
3918
3919	pci_disable_sriov(adapter->pdev);
3920
3921	for_all_vfs(adapter, vf_cfg, vf) {
3922		if (BEx_chip(adapter))
3923			be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3924					vf_cfg->pmac_id, vf + 1);
3925		else
3926			be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3927				       vf + 1);
3928
3929		be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3930	}
3931
3932	if (BE3_chip(adapter))
3933		be_cmd_set_hsw_config(adapter, 0, 0,
3934				      adapter->if_handle,
3935				      PORT_FWD_TYPE_PASSTHRU, 0);
3936done:
3937	kfree(adapter->vf_cfg);
3938	adapter->num_vfs = 0;
3939	adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3940}
3941
3942static void be_clear_queues(struct be_adapter *adapter)
3943{
3944	be_mcc_queues_destroy(adapter);
3945	be_rx_cqs_destroy(adapter);
3946	be_tx_queues_destroy(adapter);
3947	be_evt_queues_destroy(adapter);
3948}
3949
3950static void be_cancel_worker(struct be_adapter *adapter)
3951{
3952	if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3953		cancel_delayed_work_sync(&adapter->work);
3954		adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3955	}
3956}
3957
3958static void be_cancel_err_detection(struct be_adapter *adapter)
3959{
3960	struct be_error_recovery *err_rec = &adapter->error_recovery;
3961
3962	if (!be_err_recovery_workq)
3963		return;
3964
3965	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3966		cancel_delayed_work_sync(&err_rec->err_detection_work);
3967		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3968	}
3969}
3970
3971/* VxLAN offload Notes:
3972 *
3973 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
3974 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
3975 * is expected to work across all types of IP tunnels once exported. Skyhawk
3976 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
3977 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
3978 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
3979 * those other tunnels are unexported on the fly through ndo_features_check().
3980 */
3981static int be_vxlan_set_port(struct net_device *netdev, unsigned int table,
3982			     unsigned int entry, struct udp_tunnel_info *ti)
3983{
3984	struct be_adapter *adapter = netdev_priv(netdev);
3985	struct device *dev = &adapter->pdev->dev;
3986	int status;
3987
3988	status = be_cmd_manage_iface(adapter, adapter->if_handle,
3989				     OP_CONVERT_NORMAL_TO_TUNNEL);
3990	if (status) {
3991		dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3992		return status;
3993	}
3994	adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3995
3996	status = be_cmd_set_vxlan_port(adapter, ti->port);
3997	if (status) {
3998		dev_warn(dev, "Failed to add VxLAN port\n");
3999		return status;
4000	}
4001	adapter->vxlan_port = ti->port;
4002
4003	netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4004				   NETIF_F_TSO | NETIF_F_TSO6 |
4005				   NETIF_F_GSO_UDP_TUNNEL;
4006
4007	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4008		 be16_to_cpu(ti->port));
4009	return 0;
4010}
4011
4012static int be_vxlan_unset_port(struct net_device *netdev, unsigned int table,
4013			       unsigned int entry, struct udp_tunnel_info *ti)
4014{
4015	struct be_adapter *adapter = netdev_priv(netdev);
4016
4017	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4018		be_cmd_manage_iface(adapter, adapter->if_handle,
4019				    OP_CONVERT_TUNNEL_TO_NORMAL);
4020
4021	if (adapter->vxlan_port)
4022		be_cmd_set_vxlan_port(adapter, 0);
4023
4024	adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4025	adapter->vxlan_port = 0;
4026
4027	netdev->hw_enc_features = 0;
4028	return 0;
4029}
4030
4031static const struct udp_tunnel_nic_info be_udp_tunnels = {
4032	.set_port	= be_vxlan_set_port,
4033	.unset_port	= be_vxlan_unset_port,
4034	.flags		= UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
4035			  UDP_TUNNEL_NIC_INFO_OPEN_ONLY,
4036	.tables		= {
4037		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, },
4038	},
4039};
4040
4041static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4042				struct be_resources *vft_res)
4043{
4044	struct be_resources res = adapter->pool_res;
4045	u32 vf_if_cap_flags = res.vf_if_cap_flags;
4046	struct be_resources res_mod = {0};
4047	u16 num_vf_qs = 1;
4048
4049	/* Distribute the queue resources among the PF and it's VFs */
4050	if (num_vfs) {
4051		/* Divide the rx queues evenly among the VFs and the PF, capped
4052		 * at VF-EQ-count. Any remainder queues belong to the PF.
4053		 */
4054		num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4055				res.max_rss_qs / (num_vfs + 1));
4056
4057		/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4058		 * RSS Tables per port. Provide RSS on VFs, only if number of
4059		 * VFs requested is less than it's PF Pool's RSS Tables limit.
4060		 */
4061		if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4062			num_vf_qs = 1;
4063	}
4064
4065	/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4066	 * which are modifiable using SET_PROFILE_CONFIG cmd.
4067	 */
4068	be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4069				  RESOURCE_MODIFIABLE, 0);
4070
4071	/* If RSS IFACE capability flags are modifiable for a VF, set the
4072	 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4073	 * more than 1 RSSQ is available for a VF.
4074	 * Otherwise, provision only 1 queue pair for VF.
4075	 */
4076	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4077		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4078		if (num_vf_qs > 1) {
4079			vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4080			if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4081				vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4082		} else {
4083			vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4084					     BE_IF_FLAGS_DEFQ_RSS);
4085		}
4086	} else {
4087		num_vf_qs = 1;
4088	}
4089
4090	if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4091		vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4092		vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4093	}
4094
4095	vft_res->vf_if_cap_flags = vf_if_cap_flags;
4096	vft_res->max_rx_qs = num_vf_qs;
4097	vft_res->max_rss_qs = num_vf_qs;
4098	vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4099	vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4100
4101	/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4102	 * among the PF and it's VFs, if the fields are changeable
4103	 */
4104	if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4105		vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4106
4107	if (res_mod.max_vlans == FIELD_MODIFIABLE)
4108		vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4109
4110	if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4111		vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4112
4113	if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4114		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4115}
4116
4117static void be_if_destroy(struct be_adapter *adapter)
4118{
4119	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4120
4121	kfree(adapter->pmac_id);
4122	adapter->pmac_id = NULL;
4123
4124	kfree(adapter->mc_list);
4125	adapter->mc_list = NULL;
4126
4127	kfree(adapter->uc_list);
4128	adapter->uc_list = NULL;
4129}
4130
4131static int be_clear(struct be_adapter *adapter)
4132{
4133	struct pci_dev *pdev = adapter->pdev;
4134	struct  be_resources vft_res = {0};
4135
4136	be_cancel_worker(adapter);
4137
4138	flush_workqueue(be_wq);
4139
4140	if (sriov_enabled(adapter))
4141		be_vf_clear(adapter);
4142
4143	/* Re-configure FW to distribute resources evenly across max-supported
4144	 * number of VFs, only when VFs are not already enabled.
4145	 */
4146	if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4147	    !pci_vfs_assigned(pdev)) {
4148		be_calculate_vf_res(adapter,
4149				    pci_sriov_get_totalvfs(pdev),
4150				    &vft_res);
4151		be_cmd_set_sriov_config(adapter, adapter->pool_res,
4152					pci_sriov_get_totalvfs(pdev),
4153					&vft_res);
4154	}
4155
4156	be_vxlan_unset_port(adapter->netdev, 0, 0, NULL);
4157
4158	be_if_destroy(adapter);
4159
4160	be_clear_queues(adapter);
4161
4162	be_msix_disable(adapter);
4163	adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4164	return 0;
4165}
4166
4167static int be_vfs_if_create(struct be_adapter *adapter)
4168{
4169	struct be_resources res = {0};
4170	u32 cap_flags, en_flags, vf;
4171	struct be_vf_cfg *vf_cfg;
4172	int status;
4173
4174	/* If a FW profile exists, then cap_flags are updated */
4175	cap_flags = BE_VF_IF_EN_FLAGS;
4176
4177	for_all_vfs(adapter, vf_cfg, vf) {
4178		if (!BE3_chip(adapter)) {
4179			status = be_cmd_get_profile_config(adapter, &res, NULL,
4180							   ACTIVE_PROFILE_TYPE,
4181							   RESOURCE_LIMITS,
4182							   vf + 1);
4183			if (!status) {
4184				cap_flags = res.if_cap_flags;
4185				/* Prevent VFs from enabling VLAN promiscuous
4186				 * mode
4187				 */
4188				cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4189			}
4190		}
4191
4192		/* PF should enable IF flags during proxy if_create call */
4193		en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4194		status = be_cmd_if_create(adapter, cap_flags, en_flags,
4195					  &vf_cfg->if_handle, vf + 1);
4196		if (status)
4197			return status;
4198	}
4199
4200	return 0;
4201}
4202
4203static int be_vf_setup_init(struct be_adapter *adapter)
4204{
4205	struct be_vf_cfg *vf_cfg;
4206	int vf;
4207
4208	adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4209				  GFP_KERNEL);
4210	if (!adapter->vf_cfg)
4211		return -ENOMEM;
4212
4213	for_all_vfs(adapter, vf_cfg, vf) {
4214		vf_cfg->if_handle = -1;
4215		vf_cfg->pmac_id = -1;
4216	}
4217	return 0;
4218}
4219
4220static int be_vf_setup(struct be_adapter *adapter)
4221{
4222	struct device *dev = &adapter->pdev->dev;
4223	struct be_vf_cfg *vf_cfg;
4224	int status, old_vfs, vf;
4225	bool spoofchk;
4226
4227	old_vfs = pci_num_vf(adapter->pdev);
4228
4229	status = be_vf_setup_init(adapter);
4230	if (status)
4231		goto err;
4232
4233	if (old_vfs) {
4234		for_all_vfs(adapter, vf_cfg, vf) {
4235			status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4236			if (status)
4237				goto err;
4238		}
4239
4240		status = be_vfs_mac_query(adapter);
4241		if (status)
4242			goto err;
4243	} else {
4244		status = be_vfs_if_create(adapter);
4245		if (status)
4246			goto err;
4247
4248		status = be_vf_eth_addr_config(adapter);
4249		if (status)
4250			goto err;
4251	}
4252
4253	for_all_vfs(adapter, vf_cfg, vf) {
4254		/* Allow VFs to programs MAC/VLAN filters */
4255		status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4256						  vf + 1);
4257		if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4258			status = be_cmd_set_fn_privileges(adapter,
4259							  vf_cfg->privileges |
4260							  BE_PRIV_FILTMGMT,
4261							  vf + 1);
4262			if (!status) {
4263				vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4264				dev_info(dev, "VF%d has FILTMGMT privilege\n",
4265					 vf);
4266			}
4267		}
4268
4269		/* Allow full available bandwidth */
4270		if (!old_vfs)
4271			be_cmd_config_qos(adapter, 0, 0, vf + 1);
4272
4273		status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4274					       vf_cfg->if_handle, NULL,
4275					       &spoofchk);
4276		if (!status)
4277			vf_cfg->spoofchk = spoofchk;
4278
4279		if (!old_vfs) {
4280			be_cmd_enable_vf(adapter, vf + 1);
4281			be_cmd_set_logical_link_config(adapter,
4282						       IFLA_VF_LINK_STATE_AUTO,
4283						       vf+1);
4284		}
4285	}
4286
4287	if (!old_vfs) {
4288		status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4289		if (status) {
4290			dev_err(dev, "SRIOV enable failed\n");
4291			adapter->num_vfs = 0;
4292			goto err;
4293		}
4294	}
4295
4296	if (BE3_chip(adapter)) {
4297		/* On BE3, enable VEB only when SRIOV is enabled */
4298		status = be_cmd_set_hsw_config(adapter, 0, 0,
4299					       adapter->if_handle,
4300					       PORT_FWD_TYPE_VEB, 0);
4301		if (status)
4302			goto err;
4303	}
4304
4305	adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4306	return 0;
4307err:
4308	dev_err(dev, "VF setup failed\n");
4309	be_vf_clear(adapter);
4310	return status;
4311}
4312
4313/* Converting function_mode bits on BE3 to SH mc_type enums */
4314
4315static u8 be_convert_mc_type(u32 function_mode)
4316{
4317	if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4318		return vNIC1;
4319	else if (function_mode & QNQ_MODE)
4320		return FLEX10;
4321	else if (function_mode & VNIC_MODE)
4322		return vNIC2;
4323	else if (function_mode & UMC_ENABLED)
4324		return UMC;
4325	else
4326		return MC_NONE;
4327}
4328
4329/* On BE2/BE3 FW does not suggest the supported limits */
4330static void BEx_get_resources(struct be_adapter *adapter,
4331			      struct be_resources *res)
4332{
4333	bool use_sriov = adapter->num_vfs ? 1 : 0;
4334
4335	if (be_physfn(adapter))
4336		res->max_uc_mac = BE_UC_PMAC_COUNT;
4337	else
4338		res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4339
4340	adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4341
4342	if (be_is_mc(adapter)) {
4343		/* Assuming that there are 4 channels per port,
4344		 * when multi-channel is enabled
4345		 */
4346		if (be_is_qnq_mode(adapter))
4347			res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4348		else
4349			/* In a non-qnq multichannel mode, the pvid
4350			 * takes up one vlan entry
4351			 */
4352			res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4353	} else {
4354		res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4355	}
4356
4357	res->max_mcast_mac = BE_MAX_MC;
4358
4359	/* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4360	 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4361	 *    *only* if it is RSS-capable.
4362	 */
4363	if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4364	    be_virtfn(adapter) ||
4365	    (be_is_mc(adapter) &&
4366	     !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4367		res->max_tx_qs = 1;
4368	} else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4369		struct be_resources super_nic_res = {0};
4370
4371		/* On a SuperNIC profile, the driver needs to use the
4372		 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4373		 */
4374		be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4375					  ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4376					  0);
4377		/* Some old versions of BE3 FW don't report max_tx_qs value */
4378		res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4379	} else {
4380		res->max_tx_qs = BE3_MAX_TX_QS;
4381	}
4382
4383	if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4384	    !use_sriov && be_physfn(adapter))
4385		res->max_rss_qs = (adapter->be3_native) ?
4386					   BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4387	res->max_rx_qs = res->max_rss_qs + 1;
4388
4389	if (be_physfn(adapter))
4390		res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4391					BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4392	else
4393		res->max_evt_qs = 1;
4394
4395	res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4396	res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4397	if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4398		res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4399}
4400
4401static void be_setup_init(struct be_adapter *adapter)
4402{
4403	adapter->vlan_prio_bmap = 0xff;
4404	adapter->phy.link_speed = -1;
4405	adapter->if_handle = -1;
4406	adapter->be3_native = false;
4407	adapter->if_flags = 0;
4408	adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4409	if (be_physfn(adapter))
4410		adapter->cmd_privileges = MAX_PRIVILEGES;
4411	else
4412		adapter->cmd_privileges = MIN_PRIVILEGES;
4413}
4414
4415/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4416 * However, this HW limitation is not exposed to the host via any SLI cmd.
4417 * As a result, in the case of SRIOV and in particular multi-partition configs
4418 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4419 * for distribution between the VFs. This self-imposed limit will determine the
4420 * no: of VFs for which RSS can be enabled.
4421 */
4422static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4423{
4424	struct be_port_resources port_res = {0};
4425	u8 rss_tables_on_port;
4426	u16 max_vfs = be_max_vfs(adapter);
4427
4428	be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4429				  RESOURCE_LIMITS, 0);
4430
4431	rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4432
4433	/* Each PF Pool's RSS Tables limit =
4434	 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4435	 */
4436	adapter->pool_res.max_rss_tables =
4437		max_vfs * rss_tables_on_port / port_res.max_vfs;
4438}
4439
4440static int be_get_sriov_config(struct be_adapter *adapter)
4441{
4442	struct be_resources res = {0};
4443	int max_vfs, old_vfs;
4444
4445	be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4446				  RESOURCE_LIMITS, 0);
4447
4448	/* Some old versions of BE3 FW don't report max_vfs value */
4449	if (BE3_chip(adapter) && !res.max_vfs) {
4450		max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4451		res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4452	}
4453
4454	adapter->pool_res = res;
4455
4456	/* If during previous unload of the driver, the VFs were not disabled,
4457	 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4458	 * Instead use the TotalVFs value stored in the pci-dev struct.
4459	 */
4460	old_vfs = pci_num_vf(adapter->pdev);
4461	if (old_vfs) {
4462		dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4463			 old_vfs);
4464
4465		adapter->pool_res.max_vfs =
4466			pci_sriov_get_totalvfs(adapter->pdev);
4467		adapter->num_vfs = old_vfs;
4468	}
4469
4470	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4471		be_calculate_pf_pool_rss_tables(adapter);
4472		dev_info(&adapter->pdev->dev,
4473			 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4474			 be_max_pf_pool_rss_tables(adapter));
4475	}
4476	return 0;
4477}
4478
4479static void be_alloc_sriov_res(struct be_adapter *adapter)
4480{
4481	int old_vfs = pci_num_vf(adapter->pdev);
4482	struct  be_resources vft_res = {0};
4483	int status;
4484
4485	be_get_sriov_config(adapter);
4486
4487	if (!old_vfs)
4488		pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4489
4490	/* When the HW is in SRIOV capable configuration, the PF-pool
4491	 * resources are given to PF during driver load, if there are no
4492	 * old VFs. This facility is not available in BE3 FW.
4493	 * Also, this is done by FW in Lancer chip.
4494	 */
4495	if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4496		be_calculate_vf_res(adapter, 0, &vft_res);
4497		status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4498						 &vft_res);
4499		if (status)
4500			dev_err(&adapter->pdev->dev,
4501				"Failed to optimize SRIOV resources\n");
4502	}
4503}
4504
4505static int be_get_resources(struct be_adapter *adapter)
4506{
4507	struct device *dev = &adapter->pdev->dev;
4508	struct be_resources res = {0};
4509	int status;
4510
4511	/* For Lancer, SH etc read per-function resource limits from FW.
4512	 * GET_FUNC_CONFIG returns per function guaranteed limits.
4513	 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4514	 */
4515	if (BEx_chip(adapter)) {
4516		BEx_get_resources(adapter, &res);
4517	} else {
4518		status = be_cmd_get_func_config(adapter, &res);
4519		if (status)
4520			return status;
4521
4522		/* If a deafault RXQ must be created, we'll use up one RSSQ*/
4523		if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4524		    !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4525			res.max_rss_qs -= 1;
4526	}
4527
4528	/* If RoCE is supported stash away half the EQs for RoCE */
4529	res.max_nic_evt_qs = be_roce_supported(adapter) ?
4530				res.max_evt_qs / 2 : res.max_evt_qs;
4531	adapter->res = res;
4532
4533	/* If FW supports RSS default queue, then skip creating non-RSS
4534	 * queue for non-IP traffic.
4535	 */
4536	adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4537				 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4538
4539	dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4540		 be_max_txqs(adapter), be_max_rxqs(adapter),
4541		 be_max_rss(adapter), be_max_nic_eqs(adapter),
4542		 be_max_vfs(adapter));
4543	dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4544		 be_max_uc(adapter), be_max_mc(adapter),
4545		 be_max_vlans(adapter));
4546
4547	/* Ensure RX and TX queues are created in pairs at init time */
4548	adapter->cfg_num_rx_irqs =
4549				min_t(u16, netif_get_num_default_rss_queues(),
4550				      be_max_qp_irqs(adapter));
4551	adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4552	return 0;
4553}
4554
4555static int be_get_config(struct be_adapter *adapter)
4556{
4557	int status, level;
4558	u16 profile_id;
4559
4560	status = be_cmd_get_cntl_attributes(adapter);
4561	if (status)
4562		return status;
4563
4564	status = be_cmd_query_fw_cfg(adapter);
4565	if (status)
4566		return status;
4567
4568	if (!lancer_chip(adapter) && be_physfn(adapter))
4569		be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4570
4571	if (BEx_chip(adapter)) {
4572		level = be_cmd_get_fw_log_level(adapter);
4573		adapter->msg_enable =
4574			level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4575	}
4576
4577	be_cmd_get_acpi_wol_cap(adapter);
4578	pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4579	pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4580
4581	be_cmd_query_port_name(adapter);
4582
4583	if (be_physfn(adapter)) {
4584		status = be_cmd_get_active_profile(adapter, &profile_id);
4585		if (!status)
4586			dev_info(&adapter->pdev->dev,
4587				 "Using profile 0x%x\n", profile_id);
4588	}
4589
4590	return 0;
4591}
4592
4593static int be_mac_setup(struct be_adapter *adapter)
4594{
4595	u8 mac[ETH_ALEN];
4596	int status;
4597
4598	if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4599		status = be_cmd_get_perm_mac(adapter, mac);
4600		if (status)
4601			return status;
4602
4603		memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4604		memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4605
4606		/* Initial MAC for BE3 VFs is already programmed by PF */
4607		if (BEx_chip(adapter) && be_virtfn(adapter))
4608			memcpy(adapter->dev_mac, mac, ETH_ALEN);
4609	}
4610
4611	return 0;
4612}
4613
4614static void be_schedule_worker(struct be_adapter *adapter)
4615{
4616	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4617	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4618}
4619
4620static void be_destroy_err_recovery_workq(void)
4621{
4622	if (!be_err_recovery_workq)
4623		return;
4624
4625	flush_workqueue(be_err_recovery_workq);
4626	destroy_workqueue(be_err_recovery_workq);
4627	be_err_recovery_workq = NULL;
4628}
4629
4630static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4631{
4632	struct be_error_recovery *err_rec = &adapter->error_recovery;
4633
4634	if (!be_err_recovery_workq)
4635		return;
4636
4637	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4638			   msecs_to_jiffies(delay));
4639	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4640}
4641
4642static int be_setup_queues(struct be_adapter *adapter)
4643{
4644	struct net_device *netdev = adapter->netdev;
4645	int status;
4646
4647	status = be_evt_queues_create(adapter);
4648	if (status)
4649		goto err;
4650
4651	status = be_tx_qs_create(adapter);
4652	if (status)
4653		goto err;
4654
4655	status = be_rx_cqs_create(adapter);
4656	if (status)
4657		goto err;
4658
4659	status = be_mcc_queues_create(adapter);
4660	if (status)
4661		goto err;
4662
4663	status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4664	if (status)
4665		goto err;
4666
4667	status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4668	if (status)
4669		goto err;
4670
4671	return 0;
4672err:
4673	dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4674	return status;
4675}
4676
4677static int be_if_create(struct be_adapter *adapter)
4678{
4679	u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4680	u32 cap_flags = be_if_cap_flags(adapter);
4681	int status;
4682
4683	/* alloc required memory for other filtering fields */
4684	adapter->pmac_id = kcalloc(be_max_uc(adapter),
4685				   sizeof(*adapter->pmac_id), GFP_KERNEL);
4686	if (!adapter->pmac_id)
4687		return -ENOMEM;
4688
4689	adapter->mc_list = kcalloc(be_max_mc(adapter),
4690				   sizeof(*adapter->mc_list), GFP_KERNEL);
4691	if (!adapter->mc_list)
4692		return -ENOMEM;
4693
4694	adapter->uc_list = kcalloc(be_max_uc(adapter),
4695				   sizeof(*adapter->uc_list), GFP_KERNEL);
4696	if (!adapter->uc_list)
4697		return -ENOMEM;
4698
4699	if (adapter->cfg_num_rx_irqs == 1)
4700		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4701
4702	en_flags &= cap_flags;
4703	/* will enable all the needed filter flags in be_open() */
4704	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4705				  &adapter->if_handle, 0);
4706
4707	if (status)
4708		return status;
4709
4710	return 0;
4711}
4712
4713int be_update_queues(struct be_adapter *adapter)
4714{
4715	struct net_device *netdev = adapter->netdev;
4716	int status;
4717
4718	if (netif_running(netdev)) {
4719		/* be_tx_timeout() must not run concurrently with this
4720		 * function, synchronize with an already-running dev_watchdog
4721		 */
4722		netif_tx_lock_bh(netdev);
4723		/* device cannot transmit now, avoid dev_watchdog timeouts */
4724		netif_carrier_off(netdev);
4725		netif_tx_unlock_bh(netdev);
4726
4727		be_close(netdev);
4728	}
4729
4730	be_cancel_worker(adapter);
4731
4732	/* If any vectors have been shared with RoCE we cannot re-program
4733	 * the MSIx table.
4734	 */
4735	if (!adapter->num_msix_roce_vec)
4736		be_msix_disable(adapter);
4737
4738	be_clear_queues(adapter);
4739	status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4740	if (status)
4741		return status;
4742
4743	if (!msix_enabled(adapter)) {
4744		status = be_msix_enable(adapter);
4745		if (status)
4746			return status;
4747	}
4748
4749	status = be_if_create(adapter);
4750	if (status)
4751		return status;
4752
4753	status = be_setup_queues(adapter);
4754	if (status)
4755		return status;
4756
4757	be_schedule_worker(adapter);
4758
4759	/* The IF was destroyed and re-created. We need to clear
4760	 * all promiscuous flags valid for the destroyed IF.
4761	 * Without this promisc mode is not restored during
4762	 * be_open() because the driver thinks that it is
4763	 * already enabled in HW.
4764	 */
4765	adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4766
4767	if (netif_running(netdev))
4768		status = be_open(netdev);
4769
4770	return status;
4771}
4772
4773static inline int fw_major_num(const char *fw_ver)
4774{
4775	int fw_major = 0, i;
4776
4777	i = sscanf(fw_ver, "%d.", &fw_major);
4778	if (i != 1)
4779		return 0;
4780
4781	return fw_major;
4782}
4783
4784/* If it is error recovery, FLR the PF
4785 * Else if any VFs are already enabled don't FLR the PF
4786 */
4787static bool be_reset_required(struct be_adapter *adapter)
4788{
4789	if (be_error_recovering(adapter))
4790		return true;
4791	else
4792		return pci_num_vf(adapter->pdev) == 0;
4793}
4794
4795/* Wait for the FW to be ready and perform the required initialization */
4796static int be_func_init(struct be_adapter *adapter)
4797{
4798	int status;
4799
4800	status = be_fw_wait_ready(adapter);
4801	if (status)
4802		return status;
4803
4804	/* FW is now ready; clear errors to allow cmds/doorbell */
4805	be_clear_error(adapter, BE_CLEAR_ALL);
4806
4807	if (be_reset_required(adapter)) {
4808		status = be_cmd_reset_function(adapter);
4809		if (status)
4810			return status;
4811
4812		/* Wait for interrupts to quiesce after an FLR */
4813		msleep(100);
4814	}
4815
4816	/* Tell FW we're ready to fire cmds */
4817	status = be_cmd_fw_init(adapter);
4818	if (status)
4819		return status;
4820
4821	/* Allow interrupts for other ULPs running on NIC function */
4822	be_intr_set(adapter, true);
4823
4824	return 0;
4825}
4826
4827static int be_setup(struct be_adapter *adapter)
4828{
4829	struct device *dev = &adapter->pdev->dev;
4830	int status;
4831
4832	status = be_func_init(adapter);
4833	if (status)
4834		return status;
4835
4836	be_setup_init(adapter);
4837
4838	if (!lancer_chip(adapter))
4839		be_cmd_req_native_mode(adapter);
4840
4841	/* invoke this cmd first to get pf_num and vf_num which are needed
4842	 * for issuing profile related cmds
4843	 */
4844	if (!BEx_chip(adapter)) {
4845		status = be_cmd_get_func_config(adapter, NULL);
4846		if (status)
4847			return status;
4848	}
4849
4850	status = be_get_config(adapter);
4851	if (status)
4852		goto err;
4853
4854	if (!BE2_chip(adapter) && be_physfn(adapter))
4855		be_alloc_sriov_res(adapter);
4856
4857	status = be_get_resources(adapter);
4858	if (status)
4859		goto err;
4860
4861	status = be_msix_enable(adapter);
4862	if (status)
4863		goto err;
4864
4865	/* will enable all the needed filter flags in be_open() */
4866	status = be_if_create(adapter);
4867	if (status)
4868		goto err;
4869
4870	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4871	rtnl_lock();
4872	status = be_setup_queues(adapter);
4873	rtnl_unlock();
4874	if (status)
4875		goto err;
4876
4877	be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4878
4879	status = be_mac_setup(adapter);
4880	if (status)
4881		goto err;
4882
4883	be_cmd_get_fw_ver(adapter);
4884	dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4885
4886	if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4887		dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4888			adapter->fw_ver);
4889		dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4890	}
4891
4892	status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4893					 adapter->rx_fc);
4894	if (status)
4895		be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4896					&adapter->rx_fc);
4897
4898	dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4899		 adapter->tx_fc, adapter->rx_fc);
4900
4901	if (be_physfn(adapter))
4902		be_cmd_set_logical_link_config(adapter,
4903					       IFLA_VF_LINK_STATE_AUTO, 0);
4904
4905	/* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4906	 * confusing a linux bridge or OVS that it might be connected to.
4907	 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4908	 * when SRIOV is not enabled.
4909	 */
4910	if (BE3_chip(adapter))
4911		be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4912				      PORT_FWD_TYPE_PASSTHRU, 0);
4913
4914	if (adapter->num_vfs)
4915		be_vf_setup(adapter);
4916
4917	status = be_cmd_get_phy_info(adapter);
4918	if (!status && be_pause_supported(adapter))
4919		adapter->phy.fc_autoneg = 1;
4920
4921	if (be_physfn(adapter) && !lancer_chip(adapter))
4922		be_cmd_set_features(adapter);
4923
4924	be_schedule_worker(adapter);
4925	adapter->flags |= BE_FLAGS_SETUP_DONE;
4926	return 0;
4927err:
4928	be_clear(adapter);
4929	return status;
4930}
4931
4932#ifdef CONFIG_NET_POLL_CONTROLLER
4933static void be_netpoll(struct net_device *netdev)
4934{
4935	struct be_adapter *adapter = netdev_priv(netdev);
4936	struct be_eq_obj *eqo;
4937	int i;
4938
4939	for_all_evt_queues(adapter, eqo, i) {
4940		be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4941		napi_schedule(&eqo->napi);
4942	}
4943}
4944#endif
4945
4946int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4947{
4948	const struct firmware *fw;
4949	int status;
4950
4951	if (!netif_running(adapter->netdev)) {
4952		dev_err(&adapter->pdev->dev,
4953			"Firmware load not allowed (interface is down)\n");
4954		return -ENETDOWN;
4955	}
4956
4957	status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4958	if (status)
4959		goto fw_exit;
4960
4961	dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4962
4963	if (lancer_chip(adapter))
4964		status = lancer_fw_download(adapter, fw);
4965	else
4966		status = be_fw_download(adapter, fw);
4967
4968	if (!status)
4969		be_cmd_get_fw_ver(adapter);
4970
4971fw_exit:
4972	release_firmware(fw);
4973	return status;
4974}
4975
4976static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4977				 u16 flags, struct netlink_ext_ack *extack)
4978{
4979	struct be_adapter *adapter = netdev_priv(dev);
4980	struct nlattr *attr, *br_spec;
4981	int rem;
4982	int status = 0;
4983	u16 mode = 0;
4984
4985	if (!sriov_enabled(adapter))
4986		return -EOPNOTSUPP;
4987
4988	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4989	if (!br_spec)
4990		return -EINVAL;
4991
4992	nla_for_each_nested(attr, br_spec, rem) {
4993		if (nla_type(attr) != IFLA_BRIDGE_MODE)
4994			continue;
4995
4996		if (nla_len(attr) < sizeof(mode))
4997			return -EINVAL;
4998
4999		mode = nla_get_u16(attr);
5000		if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
5001			return -EOPNOTSUPP;
5002
5003		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
5004			return -EINVAL;
5005
5006		status = be_cmd_set_hsw_config(adapter, 0, 0,
5007					       adapter->if_handle,
5008					       mode == BRIDGE_MODE_VEPA ?
5009					       PORT_FWD_TYPE_VEPA :
5010					       PORT_FWD_TYPE_VEB, 0);
5011		if (status)
5012			goto err;
5013
5014		dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
5015			 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5016
5017		return status;
5018	}
5019err:
5020	dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5021		mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5022
5023	return status;
5024}
5025
5026static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5027				 struct net_device *dev, u32 filter_mask,
5028				 int nlflags)
5029{
5030	struct be_adapter *adapter = netdev_priv(dev);
5031	int status = 0;
5032	u8 hsw_mode;
5033
5034	/* BE and Lancer chips support VEB mode only */
5035	if (BEx_chip(adapter) || lancer_chip(adapter)) {
5036		/* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5037		if (!pci_sriov_get_totalvfs(adapter->pdev))
5038			return 0;
5039		hsw_mode = PORT_FWD_TYPE_VEB;
5040	} else {
5041		status = be_cmd_get_hsw_config(adapter, NULL, 0,
5042					       adapter->if_handle, &hsw_mode,
5043					       NULL);
5044		if (status)
5045			return 0;
5046
5047		if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5048			return 0;
5049	}
5050
5051	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5052				       hsw_mode == PORT_FWD_TYPE_VEPA ?
5053				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5054				       0, 0, nlflags, filter_mask, NULL);
5055}
5056
5057static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5058					 void (*func)(struct work_struct *))
5059{
5060	struct be_cmd_work *work;
5061
5062	work = kzalloc(sizeof(*work), GFP_ATOMIC);
5063	if (!work) {
5064		dev_err(&adapter->pdev->dev,
5065			"be_work memory allocation failed\n");
5066		return NULL;
5067	}
5068
5069	INIT_WORK(&work->work, func);
5070	work->adapter = adapter;
5071	return work;
5072}
5073
5074static netdev_features_t be_features_check(struct sk_buff *skb,
5075					   struct net_device *dev,
5076					   netdev_features_t features)
5077{
5078	struct be_adapter *adapter = netdev_priv(dev);
5079	u8 l4_hdr = 0;
5080
5081	if (skb_is_gso(skb)) {
5082		/* IPv6 TSO requests with extension hdrs are a problem
5083		 * to Lancer and BE3 HW. Disable TSO6 feature.
5084		 */
5085		if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5086			features &= ~NETIF_F_TSO6;
5087
5088		/* Lancer cannot handle the packet with MSS less than 256.
5089		 * Also it can't handle a TSO packet with a single segment
5090		 * Disable the GSO support in such cases
5091		 */
5092		if (lancer_chip(adapter) &&
5093		    (skb_shinfo(skb)->gso_size < 256 ||
5094		     skb_shinfo(skb)->gso_segs == 1))
5095			features &= ~NETIF_F_GSO_MASK;
5096	}
5097
5098	/* The code below restricts offload features for some tunneled and
5099	 * Q-in-Q packets.
5100	 * Offload features for normal (non tunnel) packets are unchanged.
5101	 */
5102	features = vlan_features_check(skb, features);
5103	if (!skb->encapsulation ||
5104	    !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5105		return features;
5106
5107	/* It's an encapsulated packet and VxLAN offloads are enabled. We
5108	 * should disable tunnel offload features if it's not a VxLAN packet,
5109	 * as tunnel offloads have been enabled only for VxLAN. This is done to
5110	 * allow other tunneled traffic like GRE work fine while VxLAN
5111	 * offloads are configured in Skyhawk-R.
5112	 */
5113	switch (vlan_get_protocol(skb)) {
5114	case htons(ETH_P_IP):
5115		l4_hdr = ip_hdr(skb)->protocol;
5116		break;
5117	case htons(ETH_P_IPV6):
5118		l4_hdr = ipv6_hdr(skb)->nexthdr;
5119		break;
5120	default:
5121		return features;
5122	}
5123
5124	if (l4_hdr != IPPROTO_UDP ||
5125	    skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5126	    skb->inner_protocol != htons(ETH_P_TEB) ||
5127	    skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5128		sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5129	    !adapter->vxlan_port ||
5130	    udp_hdr(skb)->dest != adapter->vxlan_port)
5131		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5132
5133	return features;
5134}
5135
5136static int be_get_phys_port_id(struct net_device *dev,
5137			       struct netdev_phys_item_id *ppid)
5138{
5139	int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5140	struct be_adapter *adapter = netdev_priv(dev);
5141	u8 *id;
5142
5143	if (MAX_PHYS_ITEM_ID_LEN < id_len)
5144		return -ENOSPC;
5145
5146	ppid->id[0] = adapter->hba_port_num + 1;
5147	id = &ppid->id[1];
5148	for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5149	     i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5150		memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5151
5152	ppid->id_len = id_len;
5153
5154	return 0;
5155}
5156
5157static void be_set_rx_mode(struct net_device *dev)
5158{
5159	struct be_adapter *adapter = netdev_priv(dev);
5160	struct be_cmd_work *work;
5161
5162	work = be_alloc_work(adapter, be_work_set_rx_mode);
5163	if (work)
5164		queue_work(be_wq, &work->work);
5165}
5166
5167static const struct net_device_ops be_netdev_ops = {
5168	.ndo_open		= be_open,
5169	.ndo_stop		= be_close,
5170	.ndo_start_xmit		= be_xmit,
5171	.ndo_set_rx_mode	= be_set_rx_mode,
5172	.ndo_set_mac_address	= be_mac_addr_set,
5173	.ndo_get_stats64	= be_get_stats64,
5174	.ndo_validate_addr	= eth_validate_addr,
5175	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
5176	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
5177	.ndo_set_vf_mac		= be_set_vf_mac,
5178	.ndo_set_vf_vlan	= be_set_vf_vlan,
5179	.ndo_set_vf_rate	= be_set_vf_tx_rate,
5180	.ndo_get_vf_config	= be_get_vf_config,
5181	.ndo_set_vf_link_state  = be_set_vf_link_state,
5182	.ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5183	.ndo_tx_timeout		= be_tx_timeout,
5184#ifdef CONFIG_NET_POLL_CONTROLLER
5185	.ndo_poll_controller	= be_netpoll,
5186#endif
5187	.ndo_bridge_setlink	= be_ndo_bridge_setlink,
5188	.ndo_bridge_getlink	= be_ndo_bridge_getlink,
5189	.ndo_udp_tunnel_add	= udp_tunnel_nic_add_port,
5190	.ndo_udp_tunnel_del	= udp_tunnel_nic_del_port,
5191	.ndo_features_check	= be_features_check,
5192	.ndo_get_phys_port_id   = be_get_phys_port_id,
5193};
5194
5195static void be_netdev_init(struct net_device *netdev)
5196{
5197	struct be_adapter *adapter = netdev_priv(netdev);
5198
5199	netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5200		NETIF_F_GSO_UDP_TUNNEL |
5201		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5202		NETIF_F_HW_VLAN_CTAG_TX;
5203	if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5204		netdev->hw_features |= NETIF_F_RXHASH;
5205
5206	netdev->features |= netdev->hw_features |
5207		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5208
5209	netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5210		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5211
5212	netdev->priv_flags |= IFF_UNICAST_FLT;
5213
5214	netdev->flags |= IFF_MULTICAST;
5215
5216	netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5217
5218	netdev->netdev_ops = &be_netdev_ops;
5219
5220	netdev->ethtool_ops = &be_ethtool_ops;
5221
5222	if (!lancer_chip(adapter) && !BEx_chip(adapter) && !be_is_mc(adapter))
5223		netdev->udp_tunnel_nic_info = &be_udp_tunnels;
5224
5225	/* MTU range: 256 - 9000 */
5226	netdev->min_mtu = BE_MIN_MTU;
5227	netdev->max_mtu = BE_MAX_MTU;
5228}
5229
5230static void be_cleanup(struct be_adapter *adapter)
5231{
5232	struct net_device *netdev = adapter->netdev;
5233
5234	rtnl_lock();
5235	netif_device_detach(netdev);
5236	if (netif_running(netdev))
5237		be_close(netdev);
5238	rtnl_unlock();
5239
5240	be_clear(adapter);
5241}
5242
5243static int be_resume(struct be_adapter *adapter)
5244{
5245	struct net_device *netdev = adapter->netdev;
5246	int status;
5247
5248	status = be_setup(adapter);
5249	if (status)
5250		return status;
5251
5252	rtnl_lock();
5253	if (netif_running(netdev))
5254		status = be_open(netdev);
5255	rtnl_unlock();
5256
5257	if (status)
5258		return status;
5259
5260	netif_device_attach(netdev);
5261
5262	return 0;
5263}
5264
5265static void be_soft_reset(struct be_adapter *adapter)
5266{
5267	u32 val;
5268
5269	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5270	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5271	val |= SLIPORT_SOFTRESET_SR_MASK;
5272	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5273}
5274
5275static bool be_err_is_recoverable(struct be_adapter *adapter)
5276{
5277	struct be_error_recovery *err_rec = &adapter->error_recovery;
5278	unsigned long initial_idle_time =
5279		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5280	unsigned long recovery_interval =
5281		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5282	u16 ue_err_code;
5283	u32 val;
5284
5285	val = be_POST_stage_get(adapter);
5286	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5287		return false;
5288	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5289	if (ue_err_code == 0)
5290		return false;
5291
5292	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5293		ue_err_code);
5294
5295	if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5296		dev_err(&adapter->pdev->dev,
5297			"Cannot recover within %lu sec from driver load\n",
5298			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5299		return false;
5300	}
5301
5302	if (err_rec->last_recovery_time && time_before_eq(
5303		jiffies - err_rec->last_recovery_time, recovery_interval)) {
5304		dev_err(&adapter->pdev->dev,
5305			"Cannot recover within %lu sec from last recovery\n",
5306			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5307		return false;
5308	}
5309
5310	if (ue_err_code == err_rec->last_err_code) {
5311		dev_err(&adapter->pdev->dev,
5312			"Cannot recover from a consecutive TPE error\n");
5313		return false;
5314	}
5315
5316	err_rec->last_recovery_time = jiffies;
5317	err_rec->last_err_code = ue_err_code;
5318	return true;
5319}
5320
5321static int be_tpe_recover(struct be_adapter *adapter)
5322{
5323	struct be_error_recovery *err_rec = &adapter->error_recovery;
5324	int status = -EAGAIN;
5325	u32 val;
5326
5327	switch (err_rec->recovery_state) {
5328	case ERR_RECOVERY_ST_NONE:
5329		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5330		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5331		break;
5332
5333	case ERR_RECOVERY_ST_DETECT:
5334		val = be_POST_stage_get(adapter);
5335		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5336		    POST_STAGE_RECOVERABLE_ERR) {
5337			dev_err(&adapter->pdev->dev,
5338				"Unrecoverable HW error detected: 0x%x\n", val);
5339			status = -EINVAL;
5340			err_rec->resched_delay = 0;
5341			break;
5342		}
5343
5344		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5345
5346		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5347		 * milliseconds before it checks for final error status in
5348		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5349		 * If it does, then PF0 initiates a Soft Reset.
5350		 */
5351		if (adapter->pf_num == 0) {
5352			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5353			err_rec->resched_delay = err_rec->ue_to_reset_time -
5354					ERR_RECOVERY_UE_DETECT_DURATION;
5355			break;
5356		}
5357
5358		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5359		err_rec->resched_delay = err_rec->ue_to_poll_time -
5360					ERR_RECOVERY_UE_DETECT_DURATION;
5361		break;
5362
5363	case ERR_RECOVERY_ST_RESET:
5364		if (!be_err_is_recoverable(adapter)) {
5365			dev_err(&adapter->pdev->dev,
5366				"Failed to meet recovery criteria\n");
5367			status = -EIO;
5368			err_rec->resched_delay = 0;
5369			break;
5370		}
5371		be_soft_reset(adapter);
5372		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5373		err_rec->resched_delay = err_rec->ue_to_poll_time -
5374					err_rec->ue_to_reset_time;
5375		break;
5376
5377	case ERR_RECOVERY_ST_PRE_POLL:
5378		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5379		err_rec->resched_delay = 0;
5380		status = 0;			/* done */
5381		break;
5382
5383	default:
5384		status = -EINVAL;
5385		err_rec->resched_delay = 0;
5386		break;
5387	}
5388
5389	return status;
5390}
5391
5392static int be_err_recover(struct be_adapter *adapter)
5393{
5394	int status;
5395
5396	if (!lancer_chip(adapter)) {
5397		if (!adapter->error_recovery.recovery_supported ||
5398		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5399			return -EIO;
5400		status = be_tpe_recover(adapter);
5401		if (status)
5402			goto err;
5403	}
5404
5405	/* Wait for adapter to reach quiescent state before
5406	 * destroying queues
5407	 */
5408	status = be_fw_wait_ready(adapter);
5409	if (status)
5410		goto err;
5411
5412	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5413
5414	be_cleanup(adapter);
5415
5416	status = be_resume(adapter);
5417	if (status)
5418		goto err;
5419
5420	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5421
5422err:
5423	return status;
5424}
5425
5426static void be_err_detection_task(struct work_struct *work)
5427{
5428	struct be_error_recovery *err_rec =
5429			container_of(work, struct be_error_recovery,
5430				     err_detection_work.work);
5431	struct be_adapter *adapter =
5432			container_of(err_rec, struct be_adapter,
5433				     error_recovery);
5434	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5435	struct device *dev = &adapter->pdev->dev;
5436	int recovery_status;
5437
5438	be_detect_error(adapter);
5439	if (!be_check_error(adapter, BE_ERROR_HW))
5440		goto reschedule_task;
5441
5442	recovery_status = be_err_recover(adapter);
5443	if (!recovery_status) {
5444		err_rec->recovery_retries = 0;
5445		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5446		dev_info(dev, "Adapter recovery successful\n");
5447		goto reschedule_task;
5448	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5449		/* BEx/SH recovery state machine */
5450		if (adapter->pf_num == 0 &&
5451		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5452			dev_err(&adapter->pdev->dev,
5453				"Adapter recovery in progress\n");
5454		resched_delay = err_rec->resched_delay;
5455		goto reschedule_task;
5456	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5457		/* For VFs, check if PF have allocated resources
5458		 * every second.
5459		 */
5460		dev_err(dev, "Re-trying adapter recovery\n");
5461		goto reschedule_task;
5462	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5463		   ERR_RECOVERY_MAX_RETRY_COUNT) {
5464		/* In case of another error during recovery, it takes 30 sec
5465		 * for adapter to come out of error. Retry error recovery after
5466		 * this time interval.
5467		 */
5468		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5469		resched_delay = ERR_RECOVERY_RETRY_DELAY;
5470		goto reschedule_task;
5471	} else {
5472		dev_err(dev, "Adapter recovery failed\n");
5473		dev_err(dev, "Please reboot server to recover\n");
5474	}
5475
5476	return;
5477
5478reschedule_task:
5479	be_schedule_err_detection(adapter, resched_delay);
5480}
5481
5482static void be_log_sfp_info(struct be_adapter *adapter)
5483{
5484	int status;
5485
5486	status = be_cmd_query_sfp_info(adapter);
5487	if (!status) {
5488		dev_err(&adapter->pdev->dev,
5489			"Port %c: %s Vendor: %s part no: %s",
5490			adapter->port_name,
5491			be_misconfig_evt_port_state[adapter->phy_state],
5492			adapter->phy.vendor_name,
5493			adapter->phy.vendor_pn);
5494	}
5495	adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5496}
5497
5498static void be_worker(struct work_struct *work)
5499{
5500	struct be_adapter *adapter =
5501		container_of(work, struct be_adapter, work.work);
5502	struct be_rx_obj *rxo;
5503	int i;
5504
5505	if (be_physfn(adapter) &&
5506	    MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5507		be_cmd_get_die_temperature(adapter);
5508
5509	/* when interrupts are not yet enabled, just reap any pending
5510	 * mcc completions
5511	 */
5512	if (!netif_running(adapter->netdev)) {
5513		local_bh_disable();
5514		be_process_mcc(adapter);
5515		local_bh_enable();
5516		goto reschedule;
5517	}
5518
5519	if (!adapter->stats_cmd_sent) {
5520		if (lancer_chip(adapter))
5521			lancer_cmd_get_pport_stats(adapter,
5522						   &adapter->stats_cmd);
5523		else
5524			be_cmd_get_stats(adapter, &adapter->stats_cmd);
5525	}
5526
5527	for_all_rx_queues(adapter, rxo, i) {
5528		/* Replenish RX-queues starved due to memory
5529		 * allocation failures.
5530		 */
5531		if (rxo->rx_post_starved)
5532			be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5533	}
5534
5535	/* EQ-delay update for Skyhawk is done while notifying EQ */
5536	if (!skyhawk_chip(adapter))
5537		be_eqd_update(adapter, false);
5538
5539	if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5540		be_log_sfp_info(adapter);
5541
5542reschedule:
5543	adapter->work_counter++;
5544	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5545}
5546
5547static void be_unmap_pci_bars(struct be_adapter *adapter)
5548{
5549	if (adapter->csr)
5550		pci_iounmap(adapter->pdev, adapter->csr);
5551	if (adapter->db)
5552		pci_iounmap(adapter->pdev, adapter->db);
5553	if (adapter->pcicfg && adapter->pcicfg_mapped)
5554		pci_iounmap(adapter->pdev, adapter->pcicfg);
5555}
5556
5557static int db_bar(struct be_adapter *adapter)
5558{
5559	if (lancer_chip(adapter) || be_virtfn(adapter))
5560		return 0;
5561	else
5562		return 4;
5563}
5564
5565static int be_roce_map_pci_bars(struct be_adapter *adapter)
5566{
5567	if (skyhawk_chip(adapter)) {
5568		adapter->roce_db.size = 4096;
5569		adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5570							      db_bar(adapter));
5571		adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5572							       db_bar(adapter));
5573	}
5574	return 0;
5575}
5576
5577static int be_map_pci_bars(struct be_adapter *adapter)
5578{
5579	struct pci_dev *pdev = adapter->pdev;
5580	u8 __iomem *addr;
5581	u32 sli_intf;
5582
5583	pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5584	adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5585				SLI_INTF_FAMILY_SHIFT;
5586	adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5587
5588	if (BEx_chip(adapter) && be_physfn(adapter)) {
5589		adapter->csr = pci_iomap(pdev, 2, 0);
5590		if (!adapter->csr)
5591			return -ENOMEM;
5592	}
5593
5594	addr = pci_iomap(pdev, db_bar(adapter), 0);
5595	if (!addr)
5596		goto pci_map_err;
5597	adapter->db = addr;
5598
5599	if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5600		if (be_physfn(adapter)) {
5601			/* PCICFG is the 2nd BAR in BE2 */
5602			addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5603			if (!addr)
5604				goto pci_map_err;
5605			adapter->pcicfg = addr;
5606			adapter->pcicfg_mapped = true;
5607		} else {
5608			adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5609			adapter->pcicfg_mapped = false;
5610		}
5611	}
5612
5613	be_roce_map_pci_bars(adapter);
5614	return 0;
5615
5616pci_map_err:
5617	dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5618	be_unmap_pci_bars(adapter);
5619	return -ENOMEM;
5620}
5621
5622static void be_drv_cleanup(struct be_adapter *adapter)
5623{
5624	struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5625	struct device *dev = &adapter->pdev->dev;
5626
5627	if (mem->va)
5628		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5629
5630	mem = &adapter->rx_filter;
5631	if (mem->va)
5632		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5633
5634	mem = &adapter->stats_cmd;
5635	if (mem->va)
5636		dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5637}
5638
5639/* Allocate and initialize various fields in be_adapter struct */
5640static int be_drv_init(struct be_adapter *adapter)
5641{
5642	struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5643	struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5644	struct be_dma_mem *rx_filter = &adapter->rx_filter;
5645	struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5646	struct device *dev = &adapter->pdev->dev;
5647	int status = 0;
5648
5649	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5650	mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5651						&mbox_mem_alloc->dma,
5652						GFP_KERNEL);
5653	if (!mbox_mem_alloc->va)
5654		return -ENOMEM;
5655
5656	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5657	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5658	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5659
5660	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5661	rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5662					   &rx_filter->dma, GFP_KERNEL);
5663	if (!rx_filter->va) {
5664		status = -ENOMEM;
5665		goto free_mbox;
5666	}
5667
5668	if (lancer_chip(adapter))
5669		stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5670	else if (BE2_chip(adapter))
5671		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5672	else if (BE3_chip(adapter))
5673		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5674	else
5675		stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5676	stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5677					   &stats_cmd->dma, GFP_KERNEL);
5678	if (!stats_cmd->va) {
5679		status = -ENOMEM;
5680		goto free_rx_filter;
5681	}
5682
5683	mutex_init(&adapter->mbox_lock);
5684	mutex_init(&adapter->mcc_lock);
5685	mutex_init(&adapter->rx_filter_lock);
5686	spin_lock_init(&adapter->mcc_cq_lock);
5687	init_completion(&adapter->et_cmd_compl);
5688
5689	pci_save_state(adapter->pdev);
5690
5691	INIT_DELAYED_WORK(&adapter->work, be_worker);
5692
5693	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5694	adapter->error_recovery.resched_delay = 0;
5695	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5696			  be_err_detection_task);
5697
5698	adapter->rx_fc = true;
5699	adapter->tx_fc = true;
5700
5701	/* Must be a power of 2 or else MODULO will BUG_ON */
5702	adapter->be_get_temp_freq = 64;
5703
5704	return 0;
5705
5706free_rx_filter:
5707	dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5708free_mbox:
5709	dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5710			  mbox_mem_alloc->dma);
5711	return status;
5712}
5713
5714static void be_remove(struct pci_dev *pdev)
5715{
5716	struct be_adapter *adapter = pci_get_drvdata(pdev);
5717
5718	if (!adapter)
5719		return;
5720
5721	be_roce_dev_remove(adapter);
5722	be_intr_set(adapter, false);
5723
5724	be_cancel_err_detection(adapter);
5725
5726	unregister_netdev(adapter->netdev);
5727
5728	be_clear(adapter);
5729
5730	if (!pci_vfs_assigned(adapter->pdev))
5731		be_cmd_reset_function(adapter);
5732
5733	/* tell fw we're done with firing cmds */
5734	be_cmd_fw_clean(adapter);
5735
5736	be_unmap_pci_bars(adapter);
5737	be_drv_cleanup(adapter);
5738
5739	pci_disable_pcie_error_reporting(pdev);
5740
5741	pci_release_regions(pdev);
5742	pci_disable_device(pdev);
5743
5744	free_netdev(adapter->netdev);
5745}
5746
5747static ssize_t be_hwmon_show_temp(struct device *dev,
5748				  struct device_attribute *dev_attr,
5749				  char *buf)
5750{
5751	struct be_adapter *adapter = dev_get_drvdata(dev);
5752
5753	/* Unit: millidegree Celsius */
5754	if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5755		return -EIO;
5756	else
5757		return sprintf(buf, "%u\n",
5758			       adapter->hwmon_info.be_on_die_temp * 1000);
5759}
5760
5761static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5762			  be_hwmon_show_temp, NULL, 1);
5763
5764static struct attribute *be_hwmon_attrs[] = {
5765	&sensor_dev_attr_temp1_input.dev_attr.attr,
5766	NULL
5767};
5768
5769ATTRIBUTE_GROUPS(be_hwmon);
5770
5771static char *mc_name(struct be_adapter *adapter)
5772{
5773	char *str = "";	/* default */
5774
5775	switch (adapter->mc_type) {
5776	case UMC:
5777		str = "UMC";
5778		break;
5779	case FLEX10:
5780		str = "FLEX10";
5781		break;
5782	case vNIC1:
5783		str = "vNIC-1";
5784		break;
5785	case nPAR:
5786		str = "nPAR";
5787		break;
5788	case UFP:
5789		str = "UFP";
5790		break;
5791	case vNIC2:
5792		str = "vNIC-2";
5793		break;
5794	default:
5795		str = "";
5796	}
5797
5798	return str;
5799}
5800
5801static inline char *func_name(struct be_adapter *adapter)
5802{
5803	return be_physfn(adapter) ? "PF" : "VF";
5804}
5805
5806static inline char *nic_name(struct pci_dev *pdev)
5807{
5808	switch (pdev->device) {
5809	case OC_DEVICE_ID1:
5810		return OC_NAME;
5811	case OC_DEVICE_ID2:
5812		return OC_NAME_BE;
5813	case OC_DEVICE_ID3:
5814	case OC_DEVICE_ID4:
5815		return OC_NAME_LANCER;
5816	case BE_DEVICE_ID2:
5817		return BE3_NAME;
5818	case OC_DEVICE_ID5:
5819	case OC_DEVICE_ID6:
5820		return OC_NAME_SH;
5821	default:
5822		return BE_NAME;
5823	}
5824}
5825
5826static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5827{
5828	struct be_adapter *adapter;
5829	struct net_device *netdev;
5830	int status = 0;
5831
5832	status = pci_enable_device(pdev);
5833	if (status)
5834		goto do_none;
5835
5836	status = pci_request_regions(pdev, DRV_NAME);
5837	if (status)
5838		goto disable_dev;
5839	pci_set_master(pdev);
5840
5841	netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5842	if (!netdev) {
5843		status = -ENOMEM;
5844		goto rel_reg;
5845	}
5846	adapter = netdev_priv(netdev);
5847	adapter->pdev = pdev;
5848	pci_set_drvdata(pdev, adapter);
5849	adapter->netdev = netdev;
5850	SET_NETDEV_DEV(netdev, &pdev->dev);
5851
5852	status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5853	if (!status) {
5854		netdev->features |= NETIF_F_HIGHDMA;
5855	} else {
5856		status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5857		if (status) {
5858			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5859			goto free_netdev;
5860		}
5861	}
5862
5863	status = pci_enable_pcie_error_reporting(pdev);
5864	if (!status)
5865		dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5866
5867	status = be_map_pci_bars(adapter);
5868	if (status)
5869		goto free_netdev;
5870
5871	status = be_drv_init(adapter);
5872	if (status)
5873		goto unmap_bars;
5874
5875	status = be_setup(adapter);
5876	if (status)
5877		goto drv_cleanup;
5878
5879	be_netdev_init(netdev);
5880	status = register_netdev(netdev);
5881	if (status != 0)
5882		goto unsetup;
5883
5884	be_roce_dev_add(adapter);
5885
5886	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5887	adapter->error_recovery.probe_time = jiffies;
5888
5889	/* On Die temperature not supported for VF. */
5890	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5891		adapter->hwmon_info.hwmon_dev =
5892			devm_hwmon_device_register_with_groups(&pdev->dev,
5893							       DRV_NAME,
5894							       adapter,
5895							       be_hwmon_groups);
5896		adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5897	}
5898
5899	dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5900		 func_name(adapter), mc_name(adapter), adapter->port_name);
5901
5902	return 0;
5903
5904unsetup:
5905	be_clear(adapter);
5906drv_cleanup:
5907	be_drv_cleanup(adapter);
5908unmap_bars:
5909	be_unmap_pci_bars(adapter);
5910free_netdev:
5911	pci_disable_pcie_error_reporting(pdev);
5912	free_netdev(netdev);
5913rel_reg:
5914	pci_release_regions(pdev);
5915disable_dev:
5916	pci_disable_device(pdev);
5917do_none:
5918	dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5919	return status;
5920}
5921
5922static int __maybe_unused be_suspend(struct device *dev_d)
5923{
5924	struct be_adapter *adapter = dev_get_drvdata(dev_d);
5925
5926	be_intr_set(adapter, false);
5927	be_cancel_err_detection(adapter);
5928
5929	be_cleanup(adapter);
5930
5931	return 0;
5932}
5933
5934static int __maybe_unused be_pci_resume(struct device *dev_d)
5935{
5936	struct be_adapter *adapter = dev_get_drvdata(dev_d);
5937	int status = 0;
5938
5939	status = be_resume(adapter);
5940	if (status)
5941		return status;
5942
5943	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5944
5945	return 0;
5946}
5947
5948/*
5949 * An FLR will stop BE from DMAing any data.
5950 */
5951static void be_shutdown(struct pci_dev *pdev)
5952{
5953	struct be_adapter *adapter = pci_get_drvdata(pdev);
5954
5955	if (!adapter)
5956		return;
5957
5958	be_roce_dev_shutdown(adapter);
5959	cancel_delayed_work_sync(&adapter->work);
5960	be_cancel_err_detection(adapter);
5961
5962	netif_device_detach(adapter->netdev);
5963
5964	be_cmd_reset_function(adapter);
5965
5966	pci_disable_device(pdev);
5967}
5968
5969static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5970					    pci_channel_state_t state)
5971{
5972	struct be_adapter *adapter = pci_get_drvdata(pdev);
5973
5974	dev_err(&adapter->pdev->dev, "EEH error detected\n");
5975
5976	be_roce_dev_remove(adapter);
5977
5978	if (!be_check_error(adapter, BE_ERROR_EEH)) {
5979		be_set_error(adapter, BE_ERROR_EEH);
5980
5981		be_cancel_err_detection(adapter);
5982
5983		be_cleanup(adapter);
5984	}
5985
5986	if (state == pci_channel_io_perm_failure)
5987		return PCI_ERS_RESULT_DISCONNECT;
5988
5989	pci_disable_device(pdev);
5990
5991	/* The error could cause the FW to trigger a flash debug dump.
5992	 * Resetting the card while flash dump is in progress
5993	 * can cause it not to recover; wait for it to finish.
5994	 * Wait only for first function as it is needed only once per
5995	 * adapter.
5996	 */
5997	if (pdev->devfn == 0)
5998		ssleep(30);
5999
6000	return PCI_ERS_RESULT_NEED_RESET;
6001}
6002
6003static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6004{
6005	struct be_adapter *adapter = pci_get_drvdata(pdev);
6006	int status;
6007
6008	dev_info(&adapter->pdev->dev, "EEH reset\n");
6009
6010	status = pci_enable_device(pdev);
6011	if (status)
6012		return PCI_ERS_RESULT_DISCONNECT;
6013
6014	pci_set_master(pdev);
6015	pci_restore_state(pdev);
6016
6017	/* Check if card is ok and fw is ready */
6018	dev_info(&adapter->pdev->dev,
6019		 "Waiting for FW to be ready after EEH reset\n");
6020	status = be_fw_wait_ready(adapter);
6021	if (status)
6022		return PCI_ERS_RESULT_DISCONNECT;
6023
6024	be_clear_error(adapter, BE_CLEAR_ALL);
6025	return PCI_ERS_RESULT_RECOVERED;
6026}
6027
6028static void be_eeh_resume(struct pci_dev *pdev)
6029{
6030	int status = 0;
6031	struct be_adapter *adapter = pci_get_drvdata(pdev);
6032
6033	dev_info(&adapter->pdev->dev, "EEH resume\n");
6034
6035	pci_save_state(pdev);
6036
6037	status = be_resume(adapter);
6038	if (status)
6039		goto err;
6040
6041	be_roce_dev_add(adapter);
6042
6043	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6044	return;
6045err:
6046	dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6047}
6048
6049static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6050{
6051	struct be_adapter *adapter = pci_get_drvdata(pdev);
6052	struct be_resources vft_res = {0};
6053	int status;
6054
6055	if (!num_vfs)
6056		be_vf_clear(adapter);
6057
6058	adapter->num_vfs = num_vfs;
6059
6060	if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6061		dev_warn(&pdev->dev,
6062			 "Cannot disable VFs while they are assigned\n");
6063		return -EBUSY;
6064	}
6065
6066	/* When the HW is in SRIOV capable configuration, the PF-pool resources
6067	 * are equally distributed across the max-number of VFs. The user may
6068	 * request only a subset of the max-vfs to be enabled.
6069	 * Based on num_vfs, redistribute the resources across num_vfs so that
6070	 * each VF will have access to more number of resources.
6071	 * This facility is not available in BE3 FW.
6072	 * Also, this is done by FW in Lancer chip.
6073	 */
6074	if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6075		be_calculate_vf_res(adapter, adapter->num_vfs,
6076				    &vft_res);
6077		status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6078						 adapter->num_vfs, &vft_res);
6079		if (status)
6080			dev_err(&pdev->dev,
6081				"Failed to optimize SR-IOV resources\n");
6082	}
6083
6084	status = be_get_resources(adapter);
6085	if (status)
6086		return be_cmd_status(status);
6087
6088	/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6089	rtnl_lock();
6090	status = be_update_queues(adapter);
6091	rtnl_unlock();
6092	if (status)
6093		return be_cmd_status(status);
6094
6095	if (adapter->num_vfs)
6096		status = be_vf_setup(adapter);
6097
6098	if (!status)
6099		return adapter->num_vfs;
6100
6101	return 0;
6102}
6103
6104static const struct pci_error_handlers be_eeh_handlers = {
6105	.error_detected = be_eeh_err_detected,
6106	.slot_reset = be_eeh_reset,
6107	.resume = be_eeh_resume,
6108};
6109
6110static SIMPLE_DEV_PM_OPS(be_pci_pm_ops, be_suspend, be_pci_resume);
6111
6112static struct pci_driver be_driver = {
6113	.name = DRV_NAME,
6114	.id_table = be_dev_ids,
6115	.probe = be_probe,
6116	.remove = be_remove,
6117	.driver.pm = &be_pci_pm_ops,
6118	.shutdown = be_shutdown,
6119	.sriov_configure = be_pci_sriov_configure,
6120	.err_handler = &be_eeh_handlers
6121};
6122
6123static int __init be_init_module(void)
6124{
6125	int status;
6126
6127	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6128	    rx_frag_size != 2048) {
6129		printk(KERN_WARNING DRV_NAME
6130			" : Module param rx_frag_size must be 2048/4096/8192."
6131			" Using 2048\n");
6132		rx_frag_size = 2048;
6133	}
6134
6135	if (num_vfs > 0) {
6136		pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6137		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6138	}
6139
6140	be_wq = create_singlethread_workqueue("be_wq");
6141	if (!be_wq) {
6142		pr_warn(DRV_NAME "workqueue creation failed\n");
6143		return -1;
6144	}
6145
6146	be_err_recovery_workq =
6147		create_singlethread_workqueue("be_err_recover");
6148	if (!be_err_recovery_workq)
6149		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6150
6151	status = pci_register_driver(&be_driver);
6152	if (status) {
6153		destroy_workqueue(be_wq);
6154		be_destroy_err_recovery_workq();
6155	}
6156	return status;
6157}
6158module_init(be_init_module);
6159
6160static void __exit be_exit_module(void)
6161{
6162	pci_unregister_driver(&be_driver);
6163
6164	be_destroy_err_recovery_workq();
6165
6166	if (be_wq)
6167		destroy_workqueue(be_wq);
6168}
6169module_exit(be_exit_module);
6170