1// SPDX-License-Identifier: (GPL-2.0 OR MIT)
2/* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2019 Google, Inc.
5 */
6
7#include "gve.h"
8#include "gve_adminq.h"
9#include <linux/etherdevice.h>
10
11static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx)
12{
13	struct gve_notify_block *block =
14			&priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)];
15
16	block->rx = NULL;
17}
18
19static void gve_rx_free_ring(struct gve_priv *priv, int idx)
20{
21	struct gve_rx_ring *rx = &priv->rx[idx];
22	struct device *dev = &priv->pdev->dev;
23	size_t bytes;
24	u32 slots;
25
26	gve_rx_remove_from_block(priv, idx);
27
28	bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
29	dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
30	rx->desc.desc_ring = NULL;
31
32	dma_free_coherent(dev, sizeof(*rx->q_resources),
33			  rx->q_resources, rx->q_resources_bus);
34	rx->q_resources = NULL;
35
36	gve_unassign_qpl(priv, rx->data.qpl->id);
37	rx->data.qpl = NULL;
38	kvfree(rx->data.page_info);
39
40	slots = rx->mask + 1;
41	bytes = sizeof(*rx->data.data_ring) * slots;
42	dma_free_coherent(dev, bytes, rx->data.data_ring,
43			  rx->data.data_bus);
44	rx->data.data_ring = NULL;
45	netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
46}
47
48static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
49				struct gve_rx_data_slot *slot,
50				dma_addr_t addr, struct page *page)
51{
52	page_info->page = page;
53	page_info->page_offset = 0;
54	page_info->page_address = page_address(page);
55	slot->qpl_offset = cpu_to_be64(addr);
56}
57
58static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
59{
60	struct gve_priv *priv = rx->gve;
61	u32 slots;
62	int i;
63
64	/* Allocate one page per Rx queue slot. Each page is split into two
65	 * packet buffers, when possible we "page flip" between the two.
66	 */
67	slots = rx->mask + 1;
68
69	rx->data.page_info = kvzalloc(slots *
70				      sizeof(*rx->data.page_info), GFP_KERNEL);
71	if (!rx->data.page_info)
72		return -ENOMEM;
73
74	rx->data.qpl = gve_assign_rx_qpl(priv);
75
76	for (i = 0; i < slots; i++) {
77		struct page *page = rx->data.qpl->pages[i];
78		dma_addr_t addr = i * PAGE_SIZE;
79
80		gve_setup_rx_buffer(&rx->data.page_info[i],
81				    &rx->data.data_ring[i], addr, page);
82	}
83
84	return slots;
85}
86
87static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx)
88{
89	u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx);
90	struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
91	struct gve_rx_ring *rx = &priv->rx[queue_idx];
92
93	block->rx = rx;
94	rx->ntfy_id = ntfy_idx;
95}
96
97static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
98{
99	struct gve_rx_ring *rx = &priv->rx[idx];
100	struct device *hdev = &priv->pdev->dev;
101	u32 slots, npages;
102	int filled_pages;
103	size_t bytes;
104	int err;
105
106	netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
107	/* Make sure everything is zeroed to start with */
108	memset(rx, 0, sizeof(*rx));
109
110	rx->gve = priv;
111	rx->q_num = idx;
112
113	slots = priv->rx_pages_per_qpl;
114	rx->mask = slots - 1;
115
116	/* alloc rx data ring */
117	bytes = sizeof(*rx->data.data_ring) * slots;
118	rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
119						&rx->data.data_bus,
120						GFP_KERNEL);
121	if (!rx->data.data_ring)
122		return -ENOMEM;
123	filled_pages = gve_prefill_rx_pages(rx);
124	if (filled_pages < 0) {
125		err = -ENOMEM;
126		goto abort_with_slots;
127	}
128	rx->fill_cnt = filled_pages;
129	/* Ensure data ring slots (packet buffers) are visible. */
130	dma_wmb();
131
132	/* Alloc gve_queue_resources */
133	rx->q_resources =
134		dma_alloc_coherent(hdev,
135				   sizeof(*rx->q_resources),
136				   &rx->q_resources_bus,
137				   GFP_KERNEL);
138	if (!rx->q_resources) {
139		err = -ENOMEM;
140		goto abort_filled;
141	}
142	netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
143		  (unsigned long)rx->data.data_bus);
144
145	/* alloc rx desc ring */
146	bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
147	npages = bytes / PAGE_SIZE;
148	if (npages * PAGE_SIZE != bytes) {
149		err = -EIO;
150		goto abort_with_q_resources;
151	}
152
153	rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
154						GFP_KERNEL);
155	if (!rx->desc.desc_ring) {
156		err = -ENOMEM;
157		goto abort_with_q_resources;
158	}
159	rx->mask = slots - 1;
160	rx->cnt = 0;
161	rx->desc.seqno = 1;
162	gve_rx_add_to_block(priv, idx);
163
164	return 0;
165
166abort_with_q_resources:
167	dma_free_coherent(hdev, sizeof(*rx->q_resources),
168			  rx->q_resources, rx->q_resources_bus);
169	rx->q_resources = NULL;
170abort_filled:
171	kvfree(rx->data.page_info);
172abort_with_slots:
173	bytes = sizeof(*rx->data.data_ring) * slots;
174	dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
175	rx->data.data_ring = NULL;
176
177	return err;
178}
179
180int gve_rx_alloc_rings(struct gve_priv *priv)
181{
182	int err = 0;
183	int i;
184
185	for (i = 0; i < priv->rx_cfg.num_queues; i++) {
186		err = gve_rx_alloc_ring(priv, i);
187		if (err) {
188			netif_err(priv, drv, priv->dev,
189				  "Failed to alloc rx ring=%d: err=%d\n",
190				  i, err);
191			break;
192		}
193	}
194	/* Unallocate if there was an error */
195	if (err) {
196		int j;
197
198		for (j = 0; j < i; j++)
199			gve_rx_free_ring(priv, j);
200	}
201	return err;
202}
203
204void gve_rx_free_rings(struct gve_priv *priv)
205{
206	int i;
207
208	for (i = 0; i < priv->rx_cfg.num_queues; i++)
209		gve_rx_free_ring(priv, i);
210}
211
212void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
213{
214	u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
215
216	iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
217}
218
219static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
220{
221	if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
222		return PKT_HASH_TYPE_L4;
223	if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
224		return PKT_HASH_TYPE_L3;
225	return PKT_HASH_TYPE_L2;
226}
227
228static struct sk_buff *gve_rx_copy(struct gve_rx_ring *rx,
229				   struct net_device *dev,
230				   struct napi_struct *napi,
231				   struct gve_rx_slot_page_info *page_info,
232				   u16 len)
233{
234	struct sk_buff *skb = napi_alloc_skb(napi, len);
235	void *va = page_info->page_address + GVE_RX_PAD +
236		   page_info->page_offset;
237
238	if (unlikely(!skb))
239		return NULL;
240
241	__skb_put(skb, len);
242
243	skb_copy_to_linear_data(skb, va, len);
244
245	skb->protocol = eth_type_trans(skb, dev);
246
247	u64_stats_update_begin(&rx->statss);
248	rx->rx_copied_pkt++;
249	u64_stats_update_end(&rx->statss);
250
251	return skb;
252}
253
254static struct sk_buff *gve_rx_add_frags(struct net_device *dev,
255					struct napi_struct *napi,
256					struct gve_rx_slot_page_info *page_info,
257					u16 len)
258{
259	struct sk_buff *skb = napi_get_frags(napi);
260
261	if (unlikely(!skb))
262		return NULL;
263
264	skb_add_rx_frag(skb, 0, page_info->page,
265			page_info->page_offset +
266			GVE_RX_PAD, len, PAGE_SIZE / 2);
267
268	return skb;
269}
270
271static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info,
272			     struct gve_rx_data_slot *data_ring)
273{
274	u64 addr = be64_to_cpu(data_ring->qpl_offset);
275
276	page_info->page_offset ^= PAGE_SIZE / 2;
277	addr ^= PAGE_SIZE / 2;
278	data_ring->qpl_offset = cpu_to_be64(addr);
279}
280
281static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
282		   netdev_features_t feat, u32 idx)
283{
284	struct gve_rx_slot_page_info *page_info;
285	struct gve_priv *priv = rx->gve;
286	struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
287	struct net_device *dev = priv->dev;
288	struct sk_buff *skb;
289	int pagecount;
290	u16 len;
291
292	/* drop this packet */
293	if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
294		u64_stats_update_begin(&rx->statss);
295		rx->rx_desc_err_dropped_pkt++;
296		u64_stats_update_end(&rx->statss);
297		return true;
298	}
299
300	len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
301	page_info = &rx->data.page_info[idx];
302	dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx],
303				PAGE_SIZE, DMA_FROM_DEVICE);
304
305	/* gvnic can only receive into registered segments. If the buffer
306	 * can't be recycled, our only choice is to copy the data out of
307	 * it so that we can return it to the device.
308	 */
309
310	if (PAGE_SIZE == 4096) {
311		if (len <= priv->rx_copybreak) {
312			/* Just copy small packets */
313			skb = gve_rx_copy(rx, dev, napi, page_info, len);
314			u64_stats_update_begin(&rx->statss);
315			rx->rx_copybreak_pkt++;
316			u64_stats_update_end(&rx->statss);
317			goto have_skb;
318		}
319		if (unlikely(!gve_can_recycle_pages(dev))) {
320			skb = gve_rx_copy(rx, dev, napi, page_info, len);
321			goto have_skb;
322		}
323		pagecount = page_count(page_info->page);
324		if (pagecount == 1) {
325			/* No part of this page is used by any SKBs; we attach
326			 * the page fragment to a new SKB and pass it up the
327			 * stack.
328			 */
329			skb = gve_rx_add_frags(dev, napi, page_info, len);
330			if (!skb) {
331				u64_stats_update_begin(&rx->statss);
332				rx->rx_skb_alloc_fail++;
333				u64_stats_update_end(&rx->statss);
334				return true;
335			}
336			/* Make sure the kernel stack can't release the page */
337			get_page(page_info->page);
338			/* "flip" to other packet buffer on this page */
339			gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]);
340		} else if (pagecount >= 2) {
341			/* We have previously passed the other half of this
342			 * page up the stack, but it has not yet been freed.
343			 */
344			skb = gve_rx_copy(rx, dev, napi, page_info, len);
345		} else {
346			WARN(pagecount < 1, "Pagecount should never be < 1");
347			return false;
348		}
349	} else {
350		skb = gve_rx_copy(rx, dev, napi, page_info, len);
351	}
352
353have_skb:
354	/* We didn't manage to allocate an skb but we haven't had any
355	 * reset worthy failures.
356	 */
357	if (!skb) {
358		u64_stats_update_begin(&rx->statss);
359		rx->rx_skb_alloc_fail++;
360		u64_stats_update_end(&rx->statss);
361		return true;
362	}
363
364	if (likely(feat & NETIF_F_RXCSUM)) {
365		/* NIC passes up the partial sum */
366		if (rx_desc->csum)
367			skb->ip_summed = CHECKSUM_COMPLETE;
368		else
369			skb->ip_summed = CHECKSUM_NONE;
370		skb->csum = csum_unfold(rx_desc->csum);
371	}
372
373	/* parse flags & pass relevant info up */
374	if (likely(feat & NETIF_F_RXHASH) &&
375	    gve_needs_rss(rx_desc->flags_seq))
376		skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
377			     gve_rss_type(rx_desc->flags_seq));
378
379	if (skb_is_nonlinear(skb))
380		napi_gro_frags(napi);
381	else
382		napi_gro_receive(napi, skb);
383	return true;
384}
385
386static bool gve_rx_work_pending(struct gve_rx_ring *rx)
387{
388	struct gve_rx_desc *desc;
389	__be16 flags_seq;
390	u32 next_idx;
391
392	next_idx = rx->cnt & rx->mask;
393	desc = rx->desc.desc_ring + next_idx;
394
395	flags_seq = desc->flags_seq;
396	/* Make sure we have synchronized the seq no with the device */
397	smp_rmb();
398
399	return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
400}
401
402bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
403		       netdev_features_t feat)
404{
405	struct gve_priv *priv = rx->gve;
406	struct gve_rx_desc *desc;
407	u32 cnt = rx->cnt;
408	u32 idx = cnt & rx->mask;
409	u32 work_done = 0;
410	u64 bytes = 0;
411
412	desc = rx->desc.desc_ring + idx;
413	while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
414	       work_done < budget) {
415		netif_info(priv, rx_status, priv->dev,
416			   "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
417			   rx->q_num, idx, desc, desc->flags_seq);
418		netif_info(priv, rx_status, priv->dev,
419			   "[%d] seqno=%d rx->desc.seqno=%d\n",
420			   rx->q_num, GVE_SEQNO(desc->flags_seq),
421			   rx->desc.seqno);
422		bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
423		if (!gve_rx(rx, desc, feat, idx))
424			gve_schedule_reset(priv);
425		cnt++;
426		idx = cnt & rx->mask;
427		desc = rx->desc.desc_ring + idx;
428		rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
429		work_done++;
430	}
431
432	if (!work_done)
433		return false;
434
435	u64_stats_update_begin(&rx->statss);
436	rx->rpackets += work_done;
437	rx->rbytes += bytes;
438	u64_stats_update_end(&rx->statss);
439	rx->cnt = cnt;
440	rx->fill_cnt += work_done;
441
442	gve_rx_write_doorbell(priv, rx);
443	return gve_rx_work_pending(rx);
444}
445
446bool gve_rx_poll(struct gve_notify_block *block, int budget)
447{
448	struct gve_rx_ring *rx = block->rx;
449	netdev_features_t feat;
450	bool repoll = false;
451
452	feat = block->napi.dev->features;
453
454	/* If budget is 0, do all the work */
455	if (budget == 0)
456		budget = INT_MAX;
457
458	if (budget > 0)
459		repoll |= gve_clean_rx_done(rx, budget, feat);
460	else
461		repoll |= gve_rx_work_pending(rx);
462	return repoll;
463}
464