1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
2/* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7#include <linux/bpf.h>
8#include <linux/cpumask.h>
9#include <linux/etherdevice.h>
10#include <linux/filter.h>
11#include <linux/interrupt.h>
12#include <linux/module.h>
13#include <linux/pci.h>
14#include <linux/sched.h>
15#include <linux/timer.h>
16#include <linux/workqueue.h>
17#include <linux/utsname.h>
18#include <linux/version.h>
19#include <net/sch_generic.h>
20#include <net/xdp_sock_drv.h>
21#include "gve.h"
22#include "gve_dqo.h"
23#include "gve_adminq.h"
24#include "gve_register.h"
25
26#define GVE_DEFAULT_RX_COPYBREAK	(256)
27
28#define DEFAULT_MSG_LEVEL	(NETIF_MSG_DRV | NETIF_MSG_LINK)
29#define GVE_VERSION		"1.0.0"
30#define GVE_VERSION_PREFIX	"GVE-"
31
32// Minimum amount of time between queue kicks in msec (10 seconds)
33#define MIN_TX_TIMEOUT_GAP (1000 * 10)
34
35char gve_driver_name[] = "gve";
36const char gve_version_str[] = GVE_VERSION;
37static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
38
39static int gve_verify_driver_compatibility(struct gve_priv *priv)
40{
41	int err;
42	struct gve_driver_info *driver_info;
43	dma_addr_t driver_info_bus;
44
45	driver_info = dma_alloc_coherent(&priv->pdev->dev,
46					 sizeof(struct gve_driver_info),
47					 &driver_info_bus, GFP_KERNEL);
48	if (!driver_info)
49		return -ENOMEM;
50
51	*driver_info = (struct gve_driver_info) {
52		.os_type = 1, /* Linux */
53		.os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
54		.os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
55		.os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
56		.driver_capability_flags = {
57			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
58			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
59			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
60			cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
61		},
62	};
63	strscpy(driver_info->os_version_str1, utsname()->release,
64		sizeof(driver_info->os_version_str1));
65	strscpy(driver_info->os_version_str2, utsname()->version,
66		sizeof(driver_info->os_version_str2));
67
68	err = gve_adminq_verify_driver_compatibility(priv,
69						     sizeof(struct gve_driver_info),
70						     driver_info_bus);
71
72	/* It's ok if the device doesn't support this */
73	if (err == -EOPNOTSUPP)
74		err = 0;
75
76	dma_free_coherent(&priv->pdev->dev,
77			  sizeof(struct gve_driver_info),
78			  driver_info, driver_info_bus);
79	return err;
80}
81
82static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
83{
84	struct gve_priv *priv = netdev_priv(dev);
85
86	if (gve_is_gqi(priv))
87		return gve_tx(skb, dev);
88	else
89		return gve_tx_dqo(skb, dev);
90}
91
92static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
93{
94	struct gve_priv *priv = netdev_priv(dev);
95	unsigned int start;
96	u64 packets, bytes;
97	int num_tx_queues;
98	int ring;
99
100	num_tx_queues = gve_num_tx_queues(priv);
101	if (priv->rx) {
102		for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
103			do {
104				start =
105				  u64_stats_fetch_begin(&priv->rx[ring].statss);
106				packets = priv->rx[ring].rpackets;
107				bytes = priv->rx[ring].rbytes;
108			} while (u64_stats_fetch_retry(&priv->rx[ring].statss,
109						       start));
110			s->rx_packets += packets;
111			s->rx_bytes += bytes;
112		}
113	}
114	if (priv->tx) {
115		for (ring = 0; ring < num_tx_queues; ring++) {
116			do {
117				start =
118				  u64_stats_fetch_begin(&priv->tx[ring].statss);
119				packets = priv->tx[ring].pkt_done;
120				bytes = priv->tx[ring].bytes_done;
121			} while (u64_stats_fetch_retry(&priv->tx[ring].statss,
122						       start));
123			s->tx_packets += packets;
124			s->tx_bytes += bytes;
125		}
126	}
127}
128
129static int gve_alloc_counter_array(struct gve_priv *priv)
130{
131	priv->counter_array =
132		dma_alloc_coherent(&priv->pdev->dev,
133				   priv->num_event_counters *
134				   sizeof(*priv->counter_array),
135				   &priv->counter_array_bus, GFP_KERNEL);
136	if (!priv->counter_array)
137		return -ENOMEM;
138
139	return 0;
140}
141
142static void gve_free_counter_array(struct gve_priv *priv)
143{
144	if (!priv->counter_array)
145		return;
146
147	dma_free_coherent(&priv->pdev->dev,
148			  priv->num_event_counters *
149			  sizeof(*priv->counter_array),
150			  priv->counter_array, priv->counter_array_bus);
151	priv->counter_array = NULL;
152}
153
154/* NIC requests to report stats */
155static void gve_stats_report_task(struct work_struct *work)
156{
157	struct gve_priv *priv = container_of(work, struct gve_priv,
158					     stats_report_task);
159	if (gve_get_do_report_stats(priv)) {
160		gve_handle_report_stats(priv);
161		gve_clear_do_report_stats(priv);
162	}
163}
164
165static void gve_stats_report_schedule(struct gve_priv *priv)
166{
167	if (!gve_get_probe_in_progress(priv) &&
168	    !gve_get_reset_in_progress(priv)) {
169		gve_set_do_report_stats(priv);
170		queue_work(priv->gve_wq, &priv->stats_report_task);
171	}
172}
173
174static void gve_stats_report_timer(struct timer_list *t)
175{
176	struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
177
178	mod_timer(&priv->stats_report_timer,
179		  round_jiffies(jiffies +
180		  msecs_to_jiffies(priv->stats_report_timer_period)));
181	gve_stats_report_schedule(priv);
182}
183
184static int gve_alloc_stats_report(struct gve_priv *priv)
185{
186	int tx_stats_num, rx_stats_num;
187
188	tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
189		       gve_num_tx_queues(priv);
190	rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
191		       priv->rx_cfg.num_queues;
192	priv->stats_report_len = struct_size(priv->stats_report, stats,
193					     size_add(tx_stats_num, rx_stats_num));
194	priv->stats_report =
195		dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
196				   &priv->stats_report_bus, GFP_KERNEL);
197	if (!priv->stats_report)
198		return -ENOMEM;
199	/* Set up timer for the report-stats task */
200	timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
201	priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
202	return 0;
203}
204
205static void gve_free_stats_report(struct gve_priv *priv)
206{
207	if (!priv->stats_report)
208		return;
209
210	del_timer_sync(&priv->stats_report_timer);
211	dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
212			  priv->stats_report, priv->stats_report_bus);
213	priv->stats_report = NULL;
214}
215
216static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
217{
218	struct gve_priv *priv = arg;
219
220	queue_work(priv->gve_wq, &priv->service_task);
221	return IRQ_HANDLED;
222}
223
224static irqreturn_t gve_intr(int irq, void *arg)
225{
226	struct gve_notify_block *block = arg;
227	struct gve_priv *priv = block->priv;
228
229	iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
230	napi_schedule_irqoff(&block->napi);
231	return IRQ_HANDLED;
232}
233
234static irqreturn_t gve_intr_dqo(int irq, void *arg)
235{
236	struct gve_notify_block *block = arg;
237
238	/* Interrupts are automatically masked */
239	napi_schedule_irqoff(&block->napi);
240	return IRQ_HANDLED;
241}
242
243static int gve_napi_poll(struct napi_struct *napi, int budget)
244{
245	struct gve_notify_block *block;
246	__be32 __iomem *irq_doorbell;
247	bool reschedule = false;
248	struct gve_priv *priv;
249	int work_done = 0;
250
251	block = container_of(napi, struct gve_notify_block, napi);
252	priv = block->priv;
253
254	if (block->tx) {
255		if (block->tx->q_num < priv->tx_cfg.num_queues)
256			reschedule |= gve_tx_poll(block, budget);
257		else if (budget)
258			reschedule |= gve_xdp_poll(block, budget);
259	}
260
261	if (!budget)
262		return 0;
263
264	if (block->rx) {
265		work_done = gve_rx_poll(block, budget);
266		reschedule |= work_done == budget;
267	}
268
269	if (reschedule)
270		return budget;
271
272       /* Complete processing - don't unmask irq if busy polling is enabled */
273	if (likely(napi_complete_done(napi, work_done))) {
274		irq_doorbell = gve_irq_doorbell(priv, block);
275		iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
276
277		/* Ensure IRQ ACK is visible before we check pending work.
278		 * If queue had issued updates, it would be truly visible.
279		 */
280		mb();
281
282		if (block->tx)
283			reschedule |= gve_tx_clean_pending(priv, block->tx);
284		if (block->rx)
285			reschedule |= gve_rx_work_pending(block->rx);
286
287		if (reschedule && napi_reschedule(napi))
288			iowrite32be(GVE_IRQ_MASK, irq_doorbell);
289	}
290	return work_done;
291}
292
293static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
294{
295	struct gve_notify_block *block =
296		container_of(napi, struct gve_notify_block, napi);
297	struct gve_priv *priv = block->priv;
298	bool reschedule = false;
299	int work_done = 0;
300
301	if (block->tx)
302		reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
303
304	if (!budget)
305		return 0;
306
307	if (block->rx) {
308		work_done = gve_rx_poll_dqo(block, budget);
309		reschedule |= work_done == budget;
310	}
311
312	if (reschedule)
313		return budget;
314
315	if (likely(napi_complete_done(napi, work_done))) {
316		/* Enable interrupts again.
317		 *
318		 * We don't need to repoll afterwards because HW supports the
319		 * PCI MSI-X PBA feature.
320		 *
321		 * Another interrupt would be triggered if a new event came in
322		 * since the last one.
323		 */
324		gve_write_irq_doorbell_dqo(priv, block,
325					   GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
326	}
327
328	return work_done;
329}
330
331static int gve_alloc_notify_blocks(struct gve_priv *priv)
332{
333	int num_vecs_requested = priv->num_ntfy_blks + 1;
334	unsigned int active_cpus;
335	int vecs_enabled;
336	int i, j;
337	int err;
338
339	priv->msix_vectors = kvcalloc(num_vecs_requested,
340				      sizeof(*priv->msix_vectors), GFP_KERNEL);
341	if (!priv->msix_vectors)
342		return -ENOMEM;
343	for (i = 0; i < num_vecs_requested; i++)
344		priv->msix_vectors[i].entry = i;
345	vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
346					     GVE_MIN_MSIX, num_vecs_requested);
347	if (vecs_enabled < 0) {
348		dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
349			GVE_MIN_MSIX, vecs_enabled);
350		err = vecs_enabled;
351		goto abort_with_msix_vectors;
352	}
353	if (vecs_enabled != num_vecs_requested) {
354		int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
355		int vecs_per_type = new_num_ntfy_blks / 2;
356		int vecs_left = new_num_ntfy_blks % 2;
357
358		priv->num_ntfy_blks = new_num_ntfy_blks;
359		priv->mgmt_msix_idx = priv->num_ntfy_blks;
360		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
361						vecs_per_type);
362		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
363						vecs_per_type + vecs_left);
364		dev_err(&priv->pdev->dev,
365			"Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
366			vecs_enabled, priv->tx_cfg.max_queues,
367			priv->rx_cfg.max_queues);
368		if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
369			priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
370		if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
371			priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
372	}
373	/* Half the notification blocks go to TX and half to RX */
374	active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
375
376	/* Setup Management Vector  - the last vector */
377	snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
378		 pci_name(priv->pdev));
379	err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
380			  gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
381	if (err) {
382		dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
383		goto abort_with_msix_enabled;
384	}
385	priv->irq_db_indices =
386		dma_alloc_coherent(&priv->pdev->dev,
387				   priv->num_ntfy_blks *
388				   sizeof(*priv->irq_db_indices),
389				   &priv->irq_db_indices_bus, GFP_KERNEL);
390	if (!priv->irq_db_indices) {
391		err = -ENOMEM;
392		goto abort_with_mgmt_vector;
393	}
394
395	priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
396				     sizeof(*priv->ntfy_blocks), GFP_KERNEL);
397	if (!priv->ntfy_blocks) {
398		err = -ENOMEM;
399		goto abort_with_irq_db_indices;
400	}
401
402	/* Setup the other blocks - the first n-1 vectors */
403	for (i = 0; i < priv->num_ntfy_blks; i++) {
404		struct gve_notify_block *block = &priv->ntfy_blocks[i];
405		int msix_idx = i;
406
407		snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
408			 i, pci_name(priv->pdev));
409		block->priv = priv;
410		err = request_irq(priv->msix_vectors[msix_idx].vector,
411				  gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
412				  0, block->name, block);
413		if (err) {
414			dev_err(&priv->pdev->dev,
415				"Failed to receive msix vector %d\n", i);
416			goto abort_with_some_ntfy_blocks;
417		}
418		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
419				      get_cpu_mask(i % active_cpus));
420		block->irq_db_index = &priv->irq_db_indices[i].index;
421	}
422	return 0;
423abort_with_some_ntfy_blocks:
424	for (j = 0; j < i; j++) {
425		struct gve_notify_block *block = &priv->ntfy_blocks[j];
426		int msix_idx = j;
427
428		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
429				      NULL);
430		free_irq(priv->msix_vectors[msix_idx].vector, block);
431	}
432	kvfree(priv->ntfy_blocks);
433	priv->ntfy_blocks = NULL;
434abort_with_irq_db_indices:
435	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
436			  sizeof(*priv->irq_db_indices),
437			  priv->irq_db_indices, priv->irq_db_indices_bus);
438	priv->irq_db_indices = NULL;
439abort_with_mgmt_vector:
440	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
441abort_with_msix_enabled:
442	pci_disable_msix(priv->pdev);
443abort_with_msix_vectors:
444	kvfree(priv->msix_vectors);
445	priv->msix_vectors = NULL;
446	return err;
447}
448
449static void gve_free_notify_blocks(struct gve_priv *priv)
450{
451	int i;
452
453	if (!priv->msix_vectors)
454		return;
455
456	/* Free the irqs */
457	for (i = 0; i < priv->num_ntfy_blks; i++) {
458		struct gve_notify_block *block = &priv->ntfy_blocks[i];
459		int msix_idx = i;
460
461		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
462				      NULL);
463		free_irq(priv->msix_vectors[msix_idx].vector, block);
464	}
465	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
466	kvfree(priv->ntfy_blocks);
467	priv->ntfy_blocks = NULL;
468	dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
469			  sizeof(*priv->irq_db_indices),
470			  priv->irq_db_indices, priv->irq_db_indices_bus);
471	priv->irq_db_indices = NULL;
472	pci_disable_msix(priv->pdev);
473	kvfree(priv->msix_vectors);
474	priv->msix_vectors = NULL;
475}
476
477static int gve_setup_device_resources(struct gve_priv *priv)
478{
479	int err;
480
481	err = gve_alloc_counter_array(priv);
482	if (err)
483		return err;
484	err = gve_alloc_notify_blocks(priv);
485	if (err)
486		goto abort_with_counter;
487	err = gve_alloc_stats_report(priv);
488	if (err)
489		goto abort_with_ntfy_blocks;
490	err = gve_adminq_configure_device_resources(priv,
491						    priv->counter_array_bus,
492						    priv->num_event_counters,
493						    priv->irq_db_indices_bus,
494						    priv->num_ntfy_blks);
495	if (unlikely(err)) {
496		dev_err(&priv->pdev->dev,
497			"could not setup device_resources: err=%d\n", err);
498		err = -ENXIO;
499		goto abort_with_stats_report;
500	}
501
502	if (!gve_is_gqi(priv)) {
503		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
504					       GFP_KERNEL);
505		if (!priv->ptype_lut_dqo) {
506			err = -ENOMEM;
507			goto abort_with_stats_report;
508		}
509		err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
510		if (err) {
511			dev_err(&priv->pdev->dev,
512				"Failed to get ptype map: err=%d\n", err);
513			goto abort_with_ptype_lut;
514		}
515	}
516
517	err = gve_adminq_report_stats(priv, priv->stats_report_len,
518				      priv->stats_report_bus,
519				      GVE_STATS_REPORT_TIMER_PERIOD);
520	if (err)
521		dev_err(&priv->pdev->dev,
522			"Failed to report stats: err=%d\n", err);
523	gve_set_device_resources_ok(priv);
524	return 0;
525
526abort_with_ptype_lut:
527	kvfree(priv->ptype_lut_dqo);
528	priv->ptype_lut_dqo = NULL;
529abort_with_stats_report:
530	gve_free_stats_report(priv);
531abort_with_ntfy_blocks:
532	gve_free_notify_blocks(priv);
533abort_with_counter:
534	gve_free_counter_array(priv);
535
536	return err;
537}
538
539static void gve_trigger_reset(struct gve_priv *priv);
540
541static void gve_teardown_device_resources(struct gve_priv *priv)
542{
543	int err;
544
545	/* Tell device its resources are being freed */
546	if (gve_get_device_resources_ok(priv)) {
547		/* detach the stats report */
548		err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
549		if (err) {
550			dev_err(&priv->pdev->dev,
551				"Failed to detach stats report: err=%d\n", err);
552			gve_trigger_reset(priv);
553		}
554		err = gve_adminq_deconfigure_device_resources(priv);
555		if (err) {
556			dev_err(&priv->pdev->dev,
557				"Could not deconfigure device resources: err=%d\n",
558				err);
559			gve_trigger_reset(priv);
560		}
561	}
562
563	kvfree(priv->ptype_lut_dqo);
564	priv->ptype_lut_dqo = NULL;
565
566	gve_free_counter_array(priv);
567	gve_free_notify_blocks(priv);
568	gve_free_stats_report(priv);
569	gve_clear_device_resources_ok(priv);
570}
571
572static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
573			 int (*gve_poll)(struct napi_struct *, int))
574{
575	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
576
577	netif_napi_add(priv->dev, &block->napi, gve_poll);
578}
579
580static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
581{
582	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
583
584	netif_napi_del(&block->napi);
585}
586
587static int gve_register_xdp_qpls(struct gve_priv *priv)
588{
589	int start_id;
590	int err;
591	int i;
592
593	start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
594	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
595		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
596		if (err) {
597			netif_err(priv, drv, priv->dev,
598				  "failed to register queue page list %d\n",
599				  priv->qpls[i].id);
600			/* This failure will trigger a reset - no need to clean
601			 * up
602			 */
603			return err;
604		}
605	}
606	return 0;
607}
608
609static int gve_register_qpls(struct gve_priv *priv)
610{
611	int start_id;
612	int err;
613	int i;
614
615	start_id = gve_tx_start_qpl_id(priv);
616	for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
617		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
618		if (err) {
619			netif_err(priv, drv, priv->dev,
620				  "failed to register queue page list %d\n",
621				  priv->qpls[i].id);
622			/* This failure will trigger a reset - no need to clean
623			 * up
624			 */
625			return err;
626		}
627	}
628
629	start_id = gve_rx_start_qpl_id(priv);
630	for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
631		err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
632		if (err) {
633			netif_err(priv, drv, priv->dev,
634				  "failed to register queue page list %d\n",
635				  priv->qpls[i].id);
636			/* This failure will trigger a reset - no need to clean
637			 * up
638			 */
639			return err;
640		}
641	}
642	return 0;
643}
644
645static int gve_unregister_xdp_qpls(struct gve_priv *priv)
646{
647	int start_id;
648	int err;
649	int i;
650
651	start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
652	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
653		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
654		/* This failure will trigger a reset - no need to clean up */
655		if (err) {
656			netif_err(priv, drv, priv->dev,
657				  "Failed to unregister queue page list %d\n",
658				  priv->qpls[i].id);
659			return err;
660		}
661	}
662	return 0;
663}
664
665static int gve_unregister_qpls(struct gve_priv *priv)
666{
667	int start_id;
668	int err;
669	int i;
670
671	start_id = gve_tx_start_qpl_id(priv);
672	for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
673		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
674		/* This failure will trigger a reset - no need to clean up */
675		if (err) {
676			netif_err(priv, drv, priv->dev,
677				  "Failed to unregister queue page list %d\n",
678				  priv->qpls[i].id);
679			return err;
680		}
681	}
682
683	start_id = gve_rx_start_qpl_id(priv);
684	for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
685		err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
686		/* This failure will trigger a reset - no need to clean up */
687		if (err) {
688			netif_err(priv, drv, priv->dev,
689				  "Failed to unregister queue page list %d\n",
690				  priv->qpls[i].id);
691			return err;
692		}
693	}
694	return 0;
695}
696
697static int gve_create_xdp_rings(struct gve_priv *priv)
698{
699	int err;
700
701	err = gve_adminq_create_tx_queues(priv,
702					  gve_xdp_tx_start_queue_id(priv),
703					  priv->num_xdp_queues);
704	if (err) {
705		netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
706			  priv->num_xdp_queues);
707		/* This failure will trigger a reset - no need to clean
708		 * up
709		 */
710		return err;
711	}
712	netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
713		  priv->num_xdp_queues);
714
715	return 0;
716}
717
718static int gve_create_rings(struct gve_priv *priv)
719{
720	int num_tx_queues = gve_num_tx_queues(priv);
721	int err;
722	int i;
723
724	err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
725	if (err) {
726		netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
727			  num_tx_queues);
728		/* This failure will trigger a reset - no need to clean
729		 * up
730		 */
731		return err;
732	}
733	netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
734		  num_tx_queues);
735
736	err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
737	if (err) {
738		netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
739			  priv->rx_cfg.num_queues);
740		/* This failure will trigger a reset - no need to clean
741		 * up
742		 */
743		return err;
744	}
745	netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
746		  priv->rx_cfg.num_queues);
747
748	if (gve_is_gqi(priv)) {
749		/* Rx data ring has been prefilled with packet buffers at queue
750		 * allocation time.
751		 *
752		 * Write the doorbell to provide descriptor slots and packet
753		 * buffers to the NIC.
754		 */
755		for (i = 0; i < priv->rx_cfg.num_queues; i++)
756			gve_rx_write_doorbell(priv, &priv->rx[i]);
757	} else {
758		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
759			/* Post buffers and ring doorbell. */
760			gve_rx_post_buffers_dqo(&priv->rx[i]);
761		}
762	}
763
764	return 0;
765}
766
767static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
768					 int (*napi_poll)(struct napi_struct *napi,
769							  int budget))
770{
771	int start_id = gve_xdp_tx_start_queue_id(priv);
772	int i;
773
774	/* Add xdp tx napi & init sync stats*/
775	for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
776		int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
777
778		u64_stats_init(&priv->tx[i].statss);
779		priv->tx[i].ntfy_id = ntfy_idx;
780		gve_add_napi(priv, ntfy_idx, napi_poll);
781	}
782}
783
784static void add_napi_init_sync_stats(struct gve_priv *priv,
785				     int (*napi_poll)(struct napi_struct *napi,
786						      int budget))
787{
788	int i;
789
790	/* Add tx napi & init sync stats*/
791	for (i = 0; i < gve_num_tx_queues(priv); i++) {
792		int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
793
794		u64_stats_init(&priv->tx[i].statss);
795		priv->tx[i].ntfy_id = ntfy_idx;
796		gve_add_napi(priv, ntfy_idx, napi_poll);
797	}
798	/* Add rx napi  & init sync stats*/
799	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
800		int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
801
802		u64_stats_init(&priv->rx[i].statss);
803		priv->rx[i].ntfy_id = ntfy_idx;
804		gve_add_napi(priv, ntfy_idx, napi_poll);
805	}
806}
807
808static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
809{
810	if (gve_is_gqi(priv)) {
811		gve_tx_free_rings_gqi(priv, start_id, num_rings);
812	} else {
813		gve_tx_free_rings_dqo(priv);
814	}
815}
816
817static int gve_alloc_xdp_rings(struct gve_priv *priv)
818{
819	int start_id;
820	int err = 0;
821
822	if (!priv->num_xdp_queues)
823		return 0;
824
825	start_id = gve_xdp_tx_start_queue_id(priv);
826	err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
827	if (err)
828		return err;
829	add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
830
831	return 0;
832}
833
834static int gve_alloc_rings(struct gve_priv *priv)
835{
836	int err;
837
838	/* Setup tx rings */
839	priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
840			    GFP_KERNEL);
841	if (!priv->tx)
842		return -ENOMEM;
843
844	if (gve_is_gqi(priv))
845		err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
846	else
847		err = gve_tx_alloc_rings_dqo(priv);
848	if (err)
849		goto free_tx;
850
851	/* Setup rx rings */
852	priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
853			    GFP_KERNEL);
854	if (!priv->rx) {
855		err = -ENOMEM;
856		goto free_tx_queue;
857	}
858
859	if (gve_is_gqi(priv))
860		err = gve_rx_alloc_rings(priv);
861	else
862		err = gve_rx_alloc_rings_dqo(priv);
863	if (err)
864		goto free_rx;
865
866	if (gve_is_gqi(priv))
867		add_napi_init_sync_stats(priv, gve_napi_poll);
868	else
869		add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
870
871	return 0;
872
873free_rx:
874	kvfree(priv->rx);
875	priv->rx = NULL;
876free_tx_queue:
877	gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
878free_tx:
879	kvfree(priv->tx);
880	priv->tx = NULL;
881	return err;
882}
883
884static int gve_destroy_xdp_rings(struct gve_priv *priv)
885{
886	int start_id;
887	int err;
888
889	start_id = gve_xdp_tx_start_queue_id(priv);
890	err = gve_adminq_destroy_tx_queues(priv,
891					   start_id,
892					   priv->num_xdp_queues);
893	if (err) {
894		netif_err(priv, drv, priv->dev,
895			  "failed to destroy XDP queues\n");
896		/* This failure will trigger a reset - no need to clean up */
897		return err;
898	}
899	netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
900
901	return 0;
902}
903
904static int gve_destroy_rings(struct gve_priv *priv)
905{
906	int num_tx_queues = gve_num_tx_queues(priv);
907	int err;
908
909	err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
910	if (err) {
911		netif_err(priv, drv, priv->dev,
912			  "failed to destroy tx queues\n");
913		/* This failure will trigger a reset - no need to clean up */
914		return err;
915	}
916	netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
917	err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
918	if (err) {
919		netif_err(priv, drv, priv->dev,
920			  "failed to destroy rx queues\n");
921		/* This failure will trigger a reset - no need to clean up */
922		return err;
923	}
924	netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
925	return 0;
926}
927
928static void gve_rx_free_rings(struct gve_priv *priv)
929{
930	if (gve_is_gqi(priv))
931		gve_rx_free_rings_gqi(priv);
932	else
933		gve_rx_free_rings_dqo(priv);
934}
935
936static void gve_free_xdp_rings(struct gve_priv *priv)
937{
938	int ntfy_idx, start_id;
939	int i;
940
941	start_id = gve_xdp_tx_start_queue_id(priv);
942	if (priv->tx) {
943		for (i = start_id; i <  start_id + priv->num_xdp_queues; i++) {
944			ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
945			gve_remove_napi(priv, ntfy_idx);
946		}
947		gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
948	}
949}
950
951static void gve_free_rings(struct gve_priv *priv)
952{
953	int num_tx_queues = gve_num_tx_queues(priv);
954	int ntfy_idx;
955	int i;
956
957	if (priv->tx) {
958		for (i = 0; i < num_tx_queues; i++) {
959			ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
960			gve_remove_napi(priv, ntfy_idx);
961		}
962		gve_tx_free_rings(priv, 0, num_tx_queues);
963		kvfree(priv->tx);
964		priv->tx = NULL;
965	}
966	if (priv->rx) {
967		for (i = 0; i < priv->rx_cfg.num_queues; i++) {
968			ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
969			gve_remove_napi(priv, ntfy_idx);
970		}
971		gve_rx_free_rings(priv);
972		kvfree(priv->rx);
973		priv->rx = NULL;
974	}
975}
976
977int gve_alloc_page(struct gve_priv *priv, struct device *dev,
978		   struct page **page, dma_addr_t *dma,
979		   enum dma_data_direction dir, gfp_t gfp_flags)
980{
981	*page = alloc_page(gfp_flags);
982	if (!*page) {
983		priv->page_alloc_fail++;
984		return -ENOMEM;
985	}
986	*dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
987	if (dma_mapping_error(dev, *dma)) {
988		priv->dma_mapping_error++;
989		put_page(*page);
990		return -ENOMEM;
991	}
992	return 0;
993}
994
995static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
996				     int pages)
997{
998	struct gve_queue_page_list *qpl = &priv->qpls[id];
999	int err;
1000	int i;
1001
1002	if (pages + priv->num_registered_pages > priv->max_registered_pages) {
1003		netif_err(priv, drv, priv->dev,
1004			  "Reached max number of registered pages %llu > %llu\n",
1005			  pages + priv->num_registered_pages,
1006			  priv->max_registered_pages);
1007		return -EINVAL;
1008	}
1009
1010	qpl->id = id;
1011	qpl->num_entries = 0;
1012	qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1013	/* caller handles clean up */
1014	if (!qpl->pages)
1015		return -ENOMEM;
1016	qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1017	/* caller handles clean up */
1018	if (!qpl->page_buses)
1019		return -ENOMEM;
1020
1021	for (i = 0; i < pages; i++) {
1022		err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1023				     &qpl->page_buses[i],
1024				     gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1025		/* caller handles clean up */
1026		if (err)
1027			return -ENOMEM;
1028		qpl->num_entries++;
1029	}
1030	priv->num_registered_pages += pages;
1031
1032	return 0;
1033}
1034
1035void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1036		   enum dma_data_direction dir)
1037{
1038	if (!dma_mapping_error(dev, dma))
1039		dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1040	if (page)
1041		put_page(page);
1042}
1043
1044static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
1045{
1046	struct gve_queue_page_list *qpl = &priv->qpls[id];
1047	int i;
1048
1049	if (!qpl->pages)
1050		return;
1051	if (!qpl->page_buses)
1052		goto free_pages;
1053
1054	for (i = 0; i < qpl->num_entries; i++)
1055		gve_free_page(&priv->pdev->dev, qpl->pages[i],
1056			      qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1057
1058	kvfree(qpl->page_buses);
1059	qpl->page_buses = NULL;
1060free_pages:
1061	kvfree(qpl->pages);
1062	qpl->pages = NULL;
1063	priv->num_registered_pages -= qpl->num_entries;
1064}
1065
1066static int gve_alloc_xdp_qpls(struct gve_priv *priv)
1067{
1068	int start_id;
1069	int i, j;
1070	int err;
1071
1072	start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1073	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
1074		err = gve_alloc_queue_page_list(priv, i,
1075						priv->tx_pages_per_qpl);
1076		if (err)
1077			goto free_qpls;
1078	}
1079
1080	return 0;
1081
1082free_qpls:
1083	for (j = start_id; j <= i; j++)
1084		gve_free_queue_page_list(priv, j);
1085	return err;
1086}
1087
1088static int gve_alloc_qpls(struct gve_priv *priv)
1089{
1090	int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1091	int page_count;
1092	int start_id;
1093	int i, j;
1094	int err;
1095
1096	if (!gve_is_qpl(priv))
1097		return 0;
1098
1099	priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
1100	if (!priv->qpls)
1101		return -ENOMEM;
1102
1103	start_id = gve_tx_start_qpl_id(priv);
1104	page_count = priv->tx_pages_per_qpl;
1105	for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
1106		err = gve_alloc_queue_page_list(priv, i,
1107						page_count);
1108		if (err)
1109			goto free_qpls;
1110	}
1111
1112	start_id = gve_rx_start_qpl_id(priv);
1113
1114	/* For GQI_QPL number of pages allocated have 1:1 relationship with
1115	 * number of descriptors. For DQO, number of pages required are
1116	 * more than descriptors (because of out of order completions).
1117	 */
1118	page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
1119		priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
1120	for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
1121		err = gve_alloc_queue_page_list(priv, i,
1122						page_count);
1123		if (err)
1124			goto free_qpls;
1125	}
1126
1127	priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
1128				     sizeof(unsigned long) * BITS_PER_BYTE;
1129	priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1130					    sizeof(unsigned long), GFP_KERNEL);
1131	if (!priv->qpl_cfg.qpl_id_map) {
1132		err = -ENOMEM;
1133		goto free_qpls;
1134	}
1135
1136	return 0;
1137
1138free_qpls:
1139	for (j = 0; j <= i; j++)
1140		gve_free_queue_page_list(priv, j);
1141	kvfree(priv->qpls);
1142	priv->qpls = NULL;
1143	return err;
1144}
1145
1146static void gve_free_xdp_qpls(struct gve_priv *priv)
1147{
1148	int start_id;
1149	int i;
1150
1151	start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1152	for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
1153		gve_free_queue_page_list(priv, i);
1154}
1155
1156static void gve_free_qpls(struct gve_priv *priv)
1157{
1158	int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1159	int i;
1160
1161	if (!priv->qpls)
1162		return;
1163
1164	kvfree(priv->qpl_cfg.qpl_id_map);
1165	priv->qpl_cfg.qpl_id_map = NULL;
1166
1167	for (i = 0; i < max_queues; i++)
1168		gve_free_queue_page_list(priv, i);
1169
1170	kvfree(priv->qpls);
1171	priv->qpls = NULL;
1172}
1173
1174/* Use this to schedule a reset when the device is capable of continuing
1175 * to handle other requests in its current state. If it is not, do a reset
1176 * in thread instead.
1177 */
1178void gve_schedule_reset(struct gve_priv *priv)
1179{
1180	gve_set_do_reset(priv);
1181	queue_work(priv->gve_wq, &priv->service_task);
1182}
1183
1184static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1185static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1186static void gve_turndown(struct gve_priv *priv);
1187static void gve_turnup(struct gve_priv *priv);
1188
1189static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1190{
1191	struct napi_struct *napi;
1192	struct gve_rx_ring *rx;
1193	int err = 0;
1194	int i, j;
1195	u32 tx_qid;
1196
1197	if (!priv->num_xdp_queues)
1198		return 0;
1199
1200	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1201		rx = &priv->rx[i];
1202		napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1203
1204		err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1205				       napi->napi_id);
1206		if (err)
1207			goto err;
1208		err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1209						 MEM_TYPE_PAGE_SHARED, NULL);
1210		if (err)
1211			goto err;
1212		rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1213		if (rx->xsk_pool) {
1214			err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1215					       napi->napi_id);
1216			if (err)
1217				goto err;
1218			err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1219							 MEM_TYPE_XSK_BUFF_POOL, NULL);
1220			if (err)
1221				goto err;
1222			xsk_pool_set_rxq_info(rx->xsk_pool,
1223					      &rx->xsk_rxq);
1224		}
1225	}
1226
1227	for (i = 0; i < priv->num_xdp_queues; i++) {
1228		tx_qid = gve_xdp_tx_queue_id(priv, i);
1229		priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1230	}
1231	return 0;
1232
1233err:
1234	for (j = i; j >= 0; j--) {
1235		rx = &priv->rx[j];
1236		if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1237			xdp_rxq_info_unreg(&rx->xdp_rxq);
1238		if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1239			xdp_rxq_info_unreg(&rx->xsk_rxq);
1240	}
1241	return err;
1242}
1243
1244static void gve_unreg_xdp_info(struct gve_priv *priv)
1245{
1246	int i, tx_qid;
1247
1248	if (!priv->num_xdp_queues)
1249		return;
1250
1251	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1252		struct gve_rx_ring *rx = &priv->rx[i];
1253
1254		xdp_rxq_info_unreg(&rx->xdp_rxq);
1255		if (rx->xsk_pool) {
1256			xdp_rxq_info_unreg(&rx->xsk_rxq);
1257			rx->xsk_pool = NULL;
1258		}
1259	}
1260
1261	for (i = 0; i < priv->num_xdp_queues; i++) {
1262		tx_qid = gve_xdp_tx_queue_id(priv, i);
1263		priv->tx[tx_qid].xsk_pool = NULL;
1264	}
1265}
1266
1267static void gve_drain_page_cache(struct gve_priv *priv)
1268{
1269	struct page_frag_cache *nc;
1270	int i;
1271
1272	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1273		nc = &priv->rx[i].page_cache;
1274		if (nc->va) {
1275			__page_frag_cache_drain(virt_to_page(nc->va),
1276						nc->pagecnt_bias);
1277			nc->va = NULL;
1278		}
1279	}
1280}
1281
1282static int gve_open(struct net_device *dev)
1283{
1284	struct gve_priv *priv = netdev_priv(dev);
1285	int err;
1286
1287	if (priv->xdp_prog)
1288		priv->num_xdp_queues = priv->rx_cfg.num_queues;
1289	else
1290		priv->num_xdp_queues = 0;
1291
1292	err = gve_alloc_qpls(priv);
1293	if (err)
1294		return err;
1295
1296	err = gve_alloc_rings(priv);
1297	if (err)
1298		goto free_qpls;
1299
1300	err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1301	if (err)
1302		goto free_rings;
1303	err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1304	if (err)
1305		goto free_rings;
1306
1307	err = gve_reg_xdp_info(priv, dev);
1308	if (err)
1309		goto free_rings;
1310
1311	err = gve_register_qpls(priv);
1312	if (err)
1313		goto reset;
1314
1315	if (!gve_is_gqi(priv)) {
1316		/* Hard code this for now. This may be tuned in the future for
1317		 * performance.
1318		 */
1319		priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
1320	}
1321	err = gve_create_rings(priv);
1322	if (err)
1323		goto reset;
1324
1325	gve_set_device_rings_ok(priv);
1326
1327	if (gve_get_report_stats(priv))
1328		mod_timer(&priv->stats_report_timer,
1329			  round_jiffies(jiffies +
1330				msecs_to_jiffies(priv->stats_report_timer_period)));
1331
1332	gve_turnup(priv);
1333	queue_work(priv->gve_wq, &priv->service_task);
1334	priv->interface_up_cnt++;
1335	return 0;
1336
1337free_rings:
1338	gve_free_rings(priv);
1339free_qpls:
1340	gve_free_qpls(priv);
1341	return err;
1342
1343reset:
1344	/* This must have been called from a reset due to the rtnl lock
1345	 * so just return at this point.
1346	 */
1347	if (gve_get_reset_in_progress(priv))
1348		return err;
1349	/* Otherwise reset before returning */
1350	gve_reset_and_teardown(priv, true);
1351	/* if this fails there is nothing we can do so just ignore the return */
1352	gve_reset_recovery(priv, false);
1353	/* return the original error */
1354	return err;
1355}
1356
1357static int gve_close(struct net_device *dev)
1358{
1359	struct gve_priv *priv = netdev_priv(dev);
1360	int err;
1361
1362	netif_carrier_off(dev);
1363	if (gve_get_device_rings_ok(priv)) {
1364		gve_turndown(priv);
1365		gve_drain_page_cache(priv);
1366		err = gve_destroy_rings(priv);
1367		if (err)
1368			goto err;
1369		err = gve_unregister_qpls(priv);
1370		if (err)
1371			goto err;
1372		gve_clear_device_rings_ok(priv);
1373	}
1374	del_timer_sync(&priv->stats_report_timer);
1375
1376	gve_unreg_xdp_info(priv);
1377	gve_free_rings(priv);
1378	gve_free_qpls(priv);
1379	priv->interface_down_cnt++;
1380	return 0;
1381
1382err:
1383	/* This must have been called from a reset due to the rtnl lock
1384	 * so just return at this point.
1385	 */
1386	if (gve_get_reset_in_progress(priv))
1387		return err;
1388	/* Otherwise reset before returning */
1389	gve_reset_and_teardown(priv, true);
1390	return gve_reset_recovery(priv, false);
1391}
1392
1393static int gve_remove_xdp_queues(struct gve_priv *priv)
1394{
1395	int err;
1396
1397	err = gve_destroy_xdp_rings(priv);
1398	if (err)
1399		return err;
1400
1401	err = gve_unregister_xdp_qpls(priv);
1402	if (err)
1403		return err;
1404
1405	gve_unreg_xdp_info(priv);
1406	gve_free_xdp_rings(priv);
1407	gve_free_xdp_qpls(priv);
1408	priv->num_xdp_queues = 0;
1409	return 0;
1410}
1411
1412static int gve_add_xdp_queues(struct gve_priv *priv)
1413{
1414	int err;
1415
1416	priv->num_xdp_queues = priv->tx_cfg.num_queues;
1417
1418	err = gve_alloc_xdp_qpls(priv);
1419	if (err)
1420		goto err;
1421
1422	err = gve_alloc_xdp_rings(priv);
1423	if (err)
1424		goto free_xdp_qpls;
1425
1426	err = gve_reg_xdp_info(priv, priv->dev);
1427	if (err)
1428		goto free_xdp_rings;
1429
1430	err = gve_register_xdp_qpls(priv);
1431	if (err)
1432		goto free_xdp_rings;
1433
1434	err = gve_create_xdp_rings(priv);
1435	if (err)
1436		goto free_xdp_rings;
1437
1438	return 0;
1439
1440free_xdp_rings:
1441	gve_free_xdp_rings(priv);
1442free_xdp_qpls:
1443	gve_free_xdp_qpls(priv);
1444err:
1445	priv->num_xdp_queues = 0;
1446	return err;
1447}
1448
1449static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1450{
1451	if (!gve_get_napi_enabled(priv))
1452		return;
1453
1454	if (link_status == netif_carrier_ok(priv->dev))
1455		return;
1456
1457	if (link_status) {
1458		netdev_info(priv->dev, "Device link is up.\n");
1459		netif_carrier_on(priv->dev);
1460	} else {
1461		netdev_info(priv->dev, "Device link is down.\n");
1462		netif_carrier_off(priv->dev);
1463	}
1464}
1465
1466static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1467		       struct netlink_ext_ack *extack)
1468{
1469	struct bpf_prog *old_prog;
1470	int err = 0;
1471	u32 status;
1472
1473	old_prog = READ_ONCE(priv->xdp_prog);
1474	if (!netif_carrier_ok(priv->dev)) {
1475		WRITE_ONCE(priv->xdp_prog, prog);
1476		if (old_prog)
1477			bpf_prog_put(old_prog);
1478		return 0;
1479	}
1480
1481	gve_turndown(priv);
1482	if (!old_prog && prog) {
1483		// Allocate XDP TX queues if an XDP program is
1484		// being installed
1485		err = gve_add_xdp_queues(priv);
1486		if (err)
1487			goto out;
1488	} else if (old_prog && !prog) {
1489		// Remove XDP TX queues if an XDP program is
1490		// being uninstalled
1491		err = gve_remove_xdp_queues(priv);
1492		if (err)
1493			goto out;
1494	}
1495	WRITE_ONCE(priv->xdp_prog, prog);
1496	if (old_prog)
1497		bpf_prog_put(old_prog);
1498
1499out:
1500	gve_turnup(priv);
1501	status = ioread32be(&priv->reg_bar0->device_status);
1502	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1503	return err;
1504}
1505
1506static int gve_xsk_pool_enable(struct net_device *dev,
1507			       struct xsk_buff_pool *pool,
1508			       u16 qid)
1509{
1510	struct gve_priv *priv = netdev_priv(dev);
1511	struct napi_struct *napi;
1512	struct gve_rx_ring *rx;
1513	int tx_qid;
1514	int err;
1515
1516	if (qid >= priv->rx_cfg.num_queues) {
1517		dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1518		return -EINVAL;
1519	}
1520	if (xsk_pool_get_rx_frame_size(pool) <
1521	     priv->dev->max_mtu + sizeof(struct ethhdr)) {
1522		dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1523		return -EINVAL;
1524	}
1525
1526	err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1527			       DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1528	if (err)
1529		return err;
1530
1531	/* If XDP prog is not installed, return */
1532	if (!priv->xdp_prog)
1533		return 0;
1534
1535	rx = &priv->rx[qid];
1536	napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1537	err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1538	if (err)
1539		goto err;
1540
1541	err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1542					 MEM_TYPE_XSK_BUFF_POOL, NULL);
1543	if (err)
1544		goto err;
1545
1546	xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1547	rx->xsk_pool = pool;
1548
1549	tx_qid = gve_xdp_tx_queue_id(priv, qid);
1550	priv->tx[tx_qid].xsk_pool = pool;
1551
1552	return 0;
1553err:
1554	if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1555		xdp_rxq_info_unreg(&rx->xsk_rxq);
1556
1557	xsk_pool_dma_unmap(pool,
1558			   DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1559	return err;
1560}
1561
1562static int gve_xsk_pool_disable(struct net_device *dev,
1563				u16 qid)
1564{
1565	struct gve_priv *priv = netdev_priv(dev);
1566	struct napi_struct *napi_rx;
1567	struct napi_struct *napi_tx;
1568	struct xsk_buff_pool *pool;
1569	int tx_qid;
1570
1571	pool = xsk_get_pool_from_qid(dev, qid);
1572	if (!pool)
1573		return -EINVAL;
1574	if (qid >= priv->rx_cfg.num_queues)
1575		return -EINVAL;
1576
1577	/* If XDP prog is not installed, unmap DMA and return */
1578	if (!priv->xdp_prog)
1579		goto done;
1580
1581	tx_qid = gve_xdp_tx_queue_id(priv, qid);
1582	if (!netif_running(dev)) {
1583		priv->rx[qid].xsk_pool = NULL;
1584		xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1585		priv->tx[tx_qid].xsk_pool = NULL;
1586		goto done;
1587	}
1588
1589	napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1590	napi_disable(napi_rx); /* make sure current rx poll is done */
1591
1592	napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1593	napi_disable(napi_tx); /* make sure current tx poll is done */
1594
1595	priv->rx[qid].xsk_pool = NULL;
1596	xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1597	priv->tx[tx_qid].xsk_pool = NULL;
1598	smp_mb(); /* Make sure it is visible to the workers on datapath */
1599
1600	napi_enable(napi_rx);
1601	if (gve_rx_work_pending(&priv->rx[qid]))
1602		napi_schedule(napi_rx);
1603
1604	napi_enable(napi_tx);
1605	if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1606		napi_schedule(napi_tx);
1607
1608done:
1609	xsk_pool_dma_unmap(pool,
1610			   DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1611	return 0;
1612}
1613
1614static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1615{
1616	struct gve_priv *priv = netdev_priv(dev);
1617	int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1618
1619	if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1620		return -EINVAL;
1621
1622	if (flags & XDP_WAKEUP_TX) {
1623		struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1624		struct napi_struct *napi =
1625			&priv->ntfy_blocks[tx->ntfy_id].napi;
1626
1627		if (!napi_if_scheduled_mark_missed(napi)) {
1628			/* Call local_bh_enable to trigger SoftIRQ processing */
1629			local_bh_disable();
1630			napi_schedule(napi);
1631			local_bh_enable();
1632		}
1633
1634		tx->xdp_xsk_wakeup++;
1635	}
1636
1637	return 0;
1638}
1639
1640static int verify_xdp_configuration(struct net_device *dev)
1641{
1642	struct gve_priv *priv = netdev_priv(dev);
1643
1644	if (dev->features & NETIF_F_LRO) {
1645		netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1646		return -EOPNOTSUPP;
1647	}
1648
1649	if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1650		netdev_warn(dev, "XDP is not supported in mode %d.\n",
1651			    priv->queue_format);
1652		return -EOPNOTSUPP;
1653	}
1654
1655	if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) {
1656		netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1657			    dev->mtu);
1658		return -EOPNOTSUPP;
1659	}
1660
1661	if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1662	    (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1663		netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1664			    priv->rx_cfg.num_queues,
1665			    priv->tx_cfg.num_queues,
1666			    priv->tx_cfg.max_queues);
1667		return -EINVAL;
1668	}
1669	return 0;
1670}
1671
1672static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1673{
1674	struct gve_priv *priv = netdev_priv(dev);
1675	int err;
1676
1677	err = verify_xdp_configuration(dev);
1678	if (err)
1679		return err;
1680	switch (xdp->command) {
1681	case XDP_SETUP_PROG:
1682		return gve_set_xdp(priv, xdp->prog, xdp->extack);
1683	case XDP_SETUP_XSK_POOL:
1684		if (xdp->xsk.pool)
1685			return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1686		else
1687			return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1688	default:
1689		return -EINVAL;
1690	}
1691}
1692
1693int gve_adjust_queues(struct gve_priv *priv,
1694		      struct gve_queue_config new_rx_config,
1695		      struct gve_queue_config new_tx_config)
1696{
1697	int err;
1698
1699	if (netif_carrier_ok(priv->dev)) {
1700		/* To make this process as simple as possible we teardown the
1701		 * device, set the new configuration, and then bring the device
1702		 * up again.
1703		 */
1704		err = gve_close(priv->dev);
1705		/* we have already tried to reset in close,
1706		 * just fail at this point
1707		 */
1708		if (err)
1709			return err;
1710		priv->tx_cfg = new_tx_config;
1711		priv->rx_cfg = new_rx_config;
1712
1713		err = gve_open(priv->dev);
1714		if (err)
1715			goto err;
1716
1717		return 0;
1718	}
1719	/* Set the config for the next up. */
1720	priv->tx_cfg = new_tx_config;
1721	priv->rx_cfg = new_rx_config;
1722
1723	return 0;
1724err:
1725	netif_err(priv, drv, priv->dev,
1726		  "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1727	gve_turndown(priv);
1728	return err;
1729}
1730
1731static void gve_turndown(struct gve_priv *priv)
1732{
1733	int idx;
1734
1735	if (netif_carrier_ok(priv->dev))
1736		netif_carrier_off(priv->dev);
1737
1738	if (!gve_get_napi_enabled(priv))
1739		return;
1740
1741	/* Disable napi to prevent more work from coming in */
1742	for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1743		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1744		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1745
1746		napi_disable(&block->napi);
1747	}
1748	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1749		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1750		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1751
1752		napi_disable(&block->napi);
1753	}
1754
1755	/* Stop tx queues */
1756	netif_tx_disable(priv->dev);
1757
1758	gve_clear_napi_enabled(priv);
1759	gve_clear_report_stats(priv);
1760}
1761
1762static void gve_turnup(struct gve_priv *priv)
1763{
1764	int idx;
1765
1766	/* Start the tx queues */
1767	netif_tx_start_all_queues(priv->dev);
1768
1769	/* Enable napi and unmask interrupts for all queues */
1770	for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1771		int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1772		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1773
1774		napi_enable(&block->napi);
1775		if (gve_is_gqi(priv)) {
1776			iowrite32be(0, gve_irq_doorbell(priv, block));
1777		} else {
1778			gve_set_itr_coalesce_usecs_dqo(priv, block,
1779						       priv->tx_coalesce_usecs);
1780		}
1781	}
1782	for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1783		int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1784		struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1785
1786		napi_enable(&block->napi);
1787		if (gve_is_gqi(priv)) {
1788			iowrite32be(0, gve_irq_doorbell(priv, block));
1789		} else {
1790			gve_set_itr_coalesce_usecs_dqo(priv, block,
1791						       priv->rx_coalesce_usecs);
1792		}
1793	}
1794
1795	gve_set_napi_enabled(priv);
1796}
1797
1798static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1799{
1800	struct gve_notify_block *block;
1801	struct gve_tx_ring *tx = NULL;
1802	struct gve_priv *priv;
1803	u32 last_nic_done;
1804	u32 current_time;
1805	u32 ntfy_idx;
1806
1807	netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1808	priv = netdev_priv(dev);
1809	if (txqueue > priv->tx_cfg.num_queues)
1810		goto reset;
1811
1812	ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1813	if (ntfy_idx >= priv->num_ntfy_blks)
1814		goto reset;
1815
1816	block = &priv->ntfy_blocks[ntfy_idx];
1817	tx = block->tx;
1818
1819	current_time = jiffies_to_msecs(jiffies);
1820	if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1821		goto reset;
1822
1823	/* Check to see if there are missed completions, which will allow us to
1824	 * kick the queue.
1825	 */
1826	last_nic_done = gve_tx_load_event_counter(priv, tx);
1827	if (last_nic_done - tx->done) {
1828		netdev_info(dev, "Kicking queue %d", txqueue);
1829		iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1830		napi_schedule(&block->napi);
1831		tx->last_kick_msec = current_time;
1832		goto out;
1833	} // Else reset.
1834
1835reset:
1836	gve_schedule_reset(priv);
1837
1838out:
1839	if (tx)
1840		tx->queue_timeout++;
1841	priv->tx_timeo_cnt++;
1842}
1843
1844static int gve_set_features(struct net_device *netdev,
1845			    netdev_features_t features)
1846{
1847	const netdev_features_t orig_features = netdev->features;
1848	struct gve_priv *priv = netdev_priv(netdev);
1849	int err;
1850
1851	if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1852		netdev->features ^= NETIF_F_LRO;
1853		if (netif_carrier_ok(netdev)) {
1854			/* To make this process as simple as possible we
1855			 * teardown the device, set the new configuration,
1856			 * and then bring the device up again.
1857			 */
1858			err = gve_close(netdev);
1859			/* We have already tried to reset in close, just fail
1860			 * at this point.
1861			 */
1862			if (err)
1863				goto err;
1864
1865			err = gve_open(netdev);
1866			if (err)
1867				goto err;
1868		}
1869	}
1870
1871	return 0;
1872err:
1873	/* Reverts the change on error. */
1874	netdev->features = orig_features;
1875	netif_err(priv, drv, netdev,
1876		  "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1877	return err;
1878}
1879
1880static const struct net_device_ops gve_netdev_ops = {
1881	.ndo_start_xmit		=	gve_start_xmit,
1882	.ndo_open		=	gve_open,
1883	.ndo_stop		=	gve_close,
1884	.ndo_get_stats64	=	gve_get_stats,
1885	.ndo_tx_timeout         =       gve_tx_timeout,
1886	.ndo_set_features	=	gve_set_features,
1887	.ndo_bpf		=	gve_xdp,
1888	.ndo_xdp_xmit		=	gve_xdp_xmit,
1889	.ndo_xsk_wakeup		=	gve_xsk_wakeup,
1890};
1891
1892static void gve_handle_status(struct gve_priv *priv, u32 status)
1893{
1894	if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1895		dev_info(&priv->pdev->dev, "Device requested reset.\n");
1896		gve_set_do_reset(priv);
1897	}
1898	if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1899		priv->stats_report_trigger_cnt++;
1900		gve_set_do_report_stats(priv);
1901	}
1902}
1903
1904static void gve_handle_reset(struct gve_priv *priv)
1905{
1906	/* A service task will be scheduled at the end of probe to catch any
1907	 * resets that need to happen, and we don't want to reset until
1908	 * probe is done.
1909	 */
1910	if (gve_get_probe_in_progress(priv))
1911		return;
1912
1913	if (gve_get_do_reset(priv)) {
1914		rtnl_lock();
1915		gve_reset(priv, false);
1916		rtnl_unlock();
1917	}
1918}
1919
1920void gve_handle_report_stats(struct gve_priv *priv)
1921{
1922	struct stats *stats = priv->stats_report->stats;
1923	int idx, stats_idx = 0;
1924	unsigned int start = 0;
1925	u64 tx_bytes;
1926
1927	if (!gve_get_report_stats(priv))
1928		return;
1929
1930	be64_add_cpu(&priv->stats_report->written_count, 1);
1931	/* tx stats */
1932	if (priv->tx) {
1933		for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1934			u32 last_completion = 0;
1935			u32 tx_frames = 0;
1936
1937			/* DQO doesn't currently support these metrics. */
1938			if (gve_is_gqi(priv)) {
1939				last_completion = priv->tx[idx].done;
1940				tx_frames = priv->tx[idx].req;
1941			}
1942
1943			do {
1944				start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1945				tx_bytes = priv->tx[idx].bytes_done;
1946			} while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1947			stats[stats_idx++] = (struct stats) {
1948				.stat_name = cpu_to_be32(TX_WAKE_CNT),
1949				.value = cpu_to_be64(priv->tx[idx].wake_queue),
1950				.queue_id = cpu_to_be32(idx),
1951			};
1952			stats[stats_idx++] = (struct stats) {
1953				.stat_name = cpu_to_be32(TX_STOP_CNT),
1954				.value = cpu_to_be64(priv->tx[idx].stop_queue),
1955				.queue_id = cpu_to_be32(idx),
1956			};
1957			stats[stats_idx++] = (struct stats) {
1958				.stat_name = cpu_to_be32(TX_FRAMES_SENT),
1959				.value = cpu_to_be64(tx_frames),
1960				.queue_id = cpu_to_be32(idx),
1961			};
1962			stats[stats_idx++] = (struct stats) {
1963				.stat_name = cpu_to_be32(TX_BYTES_SENT),
1964				.value = cpu_to_be64(tx_bytes),
1965				.queue_id = cpu_to_be32(idx),
1966			};
1967			stats[stats_idx++] = (struct stats) {
1968				.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1969				.value = cpu_to_be64(last_completion),
1970				.queue_id = cpu_to_be32(idx),
1971			};
1972			stats[stats_idx++] = (struct stats) {
1973				.stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1974				.value = cpu_to_be64(priv->tx[idx].queue_timeout),
1975				.queue_id = cpu_to_be32(idx),
1976			};
1977		}
1978	}
1979	/* rx stats */
1980	if (priv->rx) {
1981		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1982			stats[stats_idx++] = (struct stats) {
1983				.stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1984				.value = cpu_to_be64(priv->rx[idx].desc.seqno),
1985				.queue_id = cpu_to_be32(idx),
1986			};
1987			stats[stats_idx++] = (struct stats) {
1988				.stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1989				.value = cpu_to_be64(priv->rx[0].fill_cnt),
1990				.queue_id = cpu_to_be32(idx),
1991			};
1992		}
1993	}
1994}
1995
1996/* Handle NIC status register changes, reset requests and report stats */
1997static void gve_service_task(struct work_struct *work)
1998{
1999	struct gve_priv *priv = container_of(work, struct gve_priv,
2000					     service_task);
2001	u32 status = ioread32be(&priv->reg_bar0->device_status);
2002
2003	gve_handle_status(priv, status);
2004
2005	gve_handle_reset(priv);
2006	gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2007}
2008
2009static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2010{
2011	if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2012		priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2013		priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2014		priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2015		priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2016	} else {
2017		priv->dev->xdp_features = 0;
2018	}
2019}
2020
2021static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2022{
2023	int num_ntfy;
2024	int err;
2025
2026	/* Set up the adminq */
2027	err = gve_adminq_alloc(&priv->pdev->dev, priv);
2028	if (err) {
2029		dev_err(&priv->pdev->dev,
2030			"Failed to alloc admin queue: err=%d\n", err);
2031		return err;
2032	}
2033
2034	err = gve_verify_driver_compatibility(priv);
2035	if (err) {
2036		dev_err(&priv->pdev->dev,
2037			"Could not verify driver compatibility: err=%d\n", err);
2038		goto err;
2039	}
2040
2041	if (skip_describe_device)
2042		goto setup_device;
2043
2044	priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2045	/* Get the initial information we need from the device */
2046	err = gve_adminq_describe_device(priv);
2047	if (err) {
2048		dev_err(&priv->pdev->dev,
2049			"Could not get device information: err=%d\n", err);
2050		goto err;
2051	}
2052	priv->dev->mtu = priv->dev->max_mtu;
2053	num_ntfy = pci_msix_vec_count(priv->pdev);
2054	if (num_ntfy <= 0) {
2055		dev_err(&priv->pdev->dev,
2056			"could not count MSI-x vectors: err=%d\n", num_ntfy);
2057		err = num_ntfy;
2058		goto err;
2059	} else if (num_ntfy < GVE_MIN_MSIX) {
2060		dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2061			GVE_MIN_MSIX, num_ntfy);
2062		err = -EINVAL;
2063		goto err;
2064	}
2065
2066	/* Big TCP is only supported on DQ*/
2067	if (!gve_is_gqi(priv))
2068		netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2069
2070	priv->num_registered_pages = 0;
2071	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2072	/* gvnic has one Notification Block per MSI-x vector, except for the
2073	 * management vector
2074	 */
2075	priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2076	priv->mgmt_msix_idx = priv->num_ntfy_blks;
2077
2078	priv->tx_cfg.max_queues =
2079		min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2080	priv->rx_cfg.max_queues =
2081		min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2082
2083	priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2084	priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2085	if (priv->default_num_queues > 0) {
2086		priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2087						priv->tx_cfg.num_queues);
2088		priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2089						priv->rx_cfg.num_queues);
2090	}
2091
2092	dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2093		 priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2094	dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2095		 priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2096
2097	if (!gve_is_gqi(priv)) {
2098		priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2099		priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2100	}
2101
2102setup_device:
2103	gve_set_netdev_xdp_features(priv);
2104	err = gve_setup_device_resources(priv);
2105	if (!err)
2106		return 0;
2107err:
2108	gve_adminq_free(&priv->pdev->dev, priv);
2109	return err;
2110}
2111
2112static void gve_teardown_priv_resources(struct gve_priv *priv)
2113{
2114	gve_teardown_device_resources(priv);
2115	gve_adminq_free(&priv->pdev->dev, priv);
2116}
2117
2118static void gve_trigger_reset(struct gve_priv *priv)
2119{
2120	/* Reset the device by releasing the AQ */
2121	gve_adminq_release(priv);
2122}
2123
2124static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2125{
2126	gve_trigger_reset(priv);
2127	/* With the reset having already happened, close cannot fail */
2128	if (was_up)
2129		gve_close(priv->dev);
2130	gve_teardown_priv_resources(priv);
2131}
2132
2133static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2134{
2135	int err;
2136
2137	err = gve_init_priv(priv, true);
2138	if (err)
2139		goto err;
2140	if (was_up) {
2141		err = gve_open(priv->dev);
2142		if (err)
2143			goto err;
2144	}
2145	return 0;
2146err:
2147	dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2148	gve_turndown(priv);
2149	return err;
2150}
2151
2152int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2153{
2154	bool was_up = netif_carrier_ok(priv->dev);
2155	int err;
2156
2157	dev_info(&priv->pdev->dev, "Performing reset\n");
2158	gve_clear_do_reset(priv);
2159	gve_set_reset_in_progress(priv);
2160	/* If we aren't attempting to teardown normally, just go turndown and
2161	 * reset right away.
2162	 */
2163	if (!attempt_teardown) {
2164		gve_turndown(priv);
2165		gve_reset_and_teardown(priv, was_up);
2166	} else {
2167		/* Otherwise attempt to close normally */
2168		if (was_up) {
2169			err = gve_close(priv->dev);
2170			/* If that fails reset as we did above */
2171			if (err)
2172				gve_reset_and_teardown(priv, was_up);
2173		}
2174		/* Clean up any remaining resources */
2175		gve_teardown_priv_resources(priv);
2176	}
2177
2178	/* Set it all back up */
2179	err = gve_reset_recovery(priv, was_up);
2180	gve_clear_reset_in_progress(priv);
2181	priv->reset_cnt++;
2182	priv->interface_up_cnt = 0;
2183	priv->interface_down_cnt = 0;
2184	priv->stats_report_trigger_cnt = 0;
2185	return err;
2186}
2187
2188static void gve_write_version(u8 __iomem *driver_version_register)
2189{
2190	const char *c = gve_version_prefix;
2191
2192	while (*c) {
2193		writeb(*c, driver_version_register);
2194		c++;
2195	}
2196
2197	c = gve_version_str;
2198	while (*c) {
2199		writeb(*c, driver_version_register);
2200		c++;
2201	}
2202	writeb('\n', driver_version_register);
2203}
2204
2205static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2206{
2207	int max_tx_queues, max_rx_queues;
2208	struct net_device *dev;
2209	__be32 __iomem *db_bar;
2210	struct gve_registers __iomem *reg_bar;
2211	struct gve_priv *priv;
2212	int err;
2213
2214	err = pci_enable_device(pdev);
2215	if (err)
2216		return err;
2217
2218	err = pci_request_regions(pdev, gve_driver_name);
2219	if (err)
2220		goto abort_with_enabled;
2221
2222	pci_set_master(pdev);
2223
2224	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2225	if (err) {
2226		dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2227		goto abort_with_pci_region;
2228	}
2229
2230	reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2231	if (!reg_bar) {
2232		dev_err(&pdev->dev, "Failed to map pci bar!\n");
2233		err = -ENOMEM;
2234		goto abort_with_pci_region;
2235	}
2236
2237	db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2238	if (!db_bar) {
2239		dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2240		err = -ENOMEM;
2241		goto abort_with_reg_bar;
2242	}
2243
2244	gve_write_version(&reg_bar->driver_version);
2245	/* Get max queues to alloc etherdev */
2246	max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
2247	max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
2248	/* Alloc and setup the netdev and priv */
2249	dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2250	if (!dev) {
2251		dev_err(&pdev->dev, "could not allocate netdev\n");
2252		err = -ENOMEM;
2253		goto abort_with_db_bar;
2254	}
2255	SET_NETDEV_DEV(dev, &pdev->dev);
2256	pci_set_drvdata(pdev, dev);
2257	dev->ethtool_ops = &gve_ethtool_ops;
2258	dev->netdev_ops = &gve_netdev_ops;
2259
2260	/* Set default and supported features.
2261	 *
2262	 * Features might be set in other locations as well (such as
2263	 * `gve_adminq_describe_device`).
2264	 */
2265	dev->hw_features = NETIF_F_HIGHDMA;
2266	dev->hw_features |= NETIF_F_SG;
2267	dev->hw_features |= NETIF_F_HW_CSUM;
2268	dev->hw_features |= NETIF_F_TSO;
2269	dev->hw_features |= NETIF_F_TSO6;
2270	dev->hw_features |= NETIF_F_TSO_ECN;
2271	dev->hw_features |= NETIF_F_RXCSUM;
2272	dev->hw_features |= NETIF_F_RXHASH;
2273	dev->features = dev->hw_features;
2274	dev->watchdog_timeo = 5 * HZ;
2275	dev->min_mtu = ETH_MIN_MTU;
2276	netif_carrier_off(dev);
2277
2278	priv = netdev_priv(dev);
2279	priv->dev = dev;
2280	priv->pdev = pdev;
2281	priv->msg_enable = DEFAULT_MSG_LEVEL;
2282	priv->reg_bar0 = reg_bar;
2283	priv->db_bar2 = db_bar;
2284	priv->service_task_flags = 0x0;
2285	priv->state_flags = 0x0;
2286	priv->ethtool_flags = 0x0;
2287
2288	gve_set_probe_in_progress(priv);
2289	priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2290	if (!priv->gve_wq) {
2291		dev_err(&pdev->dev, "Could not allocate workqueue");
2292		err = -ENOMEM;
2293		goto abort_with_netdev;
2294	}
2295	INIT_WORK(&priv->service_task, gve_service_task);
2296	INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2297	priv->tx_cfg.max_queues = max_tx_queues;
2298	priv->rx_cfg.max_queues = max_rx_queues;
2299
2300	err = gve_init_priv(priv, false);
2301	if (err)
2302		goto abort_with_wq;
2303
2304	err = register_netdev(dev);
2305	if (err)
2306		goto abort_with_gve_init;
2307
2308	dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2309	dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2310	gve_clear_probe_in_progress(priv);
2311	queue_work(priv->gve_wq, &priv->service_task);
2312	return 0;
2313
2314abort_with_gve_init:
2315	gve_teardown_priv_resources(priv);
2316
2317abort_with_wq:
2318	destroy_workqueue(priv->gve_wq);
2319
2320abort_with_netdev:
2321	free_netdev(dev);
2322
2323abort_with_db_bar:
2324	pci_iounmap(pdev, db_bar);
2325
2326abort_with_reg_bar:
2327	pci_iounmap(pdev, reg_bar);
2328
2329abort_with_pci_region:
2330	pci_release_regions(pdev);
2331
2332abort_with_enabled:
2333	pci_disable_device(pdev);
2334	return err;
2335}
2336
2337static void gve_remove(struct pci_dev *pdev)
2338{
2339	struct net_device *netdev = pci_get_drvdata(pdev);
2340	struct gve_priv *priv = netdev_priv(netdev);
2341	__be32 __iomem *db_bar = priv->db_bar2;
2342	void __iomem *reg_bar = priv->reg_bar0;
2343
2344	unregister_netdev(netdev);
2345	gve_teardown_priv_resources(priv);
2346	destroy_workqueue(priv->gve_wq);
2347	free_netdev(netdev);
2348	pci_iounmap(pdev, db_bar);
2349	pci_iounmap(pdev, reg_bar);
2350	pci_release_regions(pdev);
2351	pci_disable_device(pdev);
2352}
2353
2354static void gve_shutdown(struct pci_dev *pdev)
2355{
2356	struct net_device *netdev = pci_get_drvdata(pdev);
2357	struct gve_priv *priv = netdev_priv(netdev);
2358	bool was_up = netif_carrier_ok(priv->dev);
2359
2360	rtnl_lock();
2361	if (was_up && gve_close(priv->dev)) {
2362		/* If the dev was up, attempt to close, if close fails, reset */
2363		gve_reset_and_teardown(priv, was_up);
2364	} else {
2365		/* If the dev wasn't up or close worked, finish tearing down */
2366		gve_teardown_priv_resources(priv);
2367	}
2368	rtnl_unlock();
2369}
2370
2371#ifdef CONFIG_PM
2372static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2373{
2374	struct net_device *netdev = pci_get_drvdata(pdev);
2375	struct gve_priv *priv = netdev_priv(netdev);
2376	bool was_up = netif_carrier_ok(priv->dev);
2377
2378	priv->suspend_cnt++;
2379	rtnl_lock();
2380	if (was_up && gve_close(priv->dev)) {
2381		/* If the dev was up, attempt to close, if close fails, reset */
2382		gve_reset_and_teardown(priv, was_up);
2383	} else {
2384		/* If the dev wasn't up or close worked, finish tearing down */
2385		gve_teardown_priv_resources(priv);
2386	}
2387	priv->up_before_suspend = was_up;
2388	rtnl_unlock();
2389	return 0;
2390}
2391
2392static int gve_resume(struct pci_dev *pdev)
2393{
2394	struct net_device *netdev = pci_get_drvdata(pdev);
2395	struct gve_priv *priv = netdev_priv(netdev);
2396	int err;
2397
2398	priv->resume_cnt++;
2399	rtnl_lock();
2400	err = gve_reset_recovery(priv, priv->up_before_suspend);
2401	rtnl_unlock();
2402	return err;
2403}
2404#endif /* CONFIG_PM */
2405
2406static const struct pci_device_id gve_id_table[] = {
2407	{ PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2408	{ }
2409};
2410
2411static struct pci_driver gve_driver = {
2412	.name		= gve_driver_name,
2413	.id_table	= gve_id_table,
2414	.probe		= gve_probe,
2415	.remove		= gve_remove,
2416	.shutdown	= gve_shutdown,
2417#ifdef CONFIG_PM
2418	.suspend        = gve_suspend,
2419	.resume         = gve_resume,
2420#endif
2421};
2422
2423module_pci_driver(gve_driver);
2424
2425MODULE_DEVICE_TABLE(pci, gve_id_table);
2426MODULE_AUTHOR("Google, Inc.");
2427MODULE_DESCRIPTION("Google Virtual NIC Driver");
2428MODULE_LICENSE("Dual MIT/GPL");
2429MODULE_VERSION(GVE_VERSION);
2430