162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/* XDP sockets
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * AF_XDP sockets allows a channel between XDP programs and userspace
562306a36Sopenharmony_ci * applications.
662306a36Sopenharmony_ci * Copyright(c) 2018 Intel Corporation.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Author(s): Björn Töpel <bjorn.topel@intel.com>
962306a36Sopenharmony_ci *	      Magnus Karlsson <magnus.karlsson@intel.com>
1062306a36Sopenharmony_ci */
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <linux/if_xdp.h>
1562306a36Sopenharmony_ci#include <linux/init.h>
1662306a36Sopenharmony_ci#include <linux/sched/mm.h>
1762306a36Sopenharmony_ci#include <linux/sched/signal.h>
1862306a36Sopenharmony_ci#include <linux/sched/task.h>
1962306a36Sopenharmony_ci#include <linux/socket.h>
2062306a36Sopenharmony_ci#include <linux/file.h>
2162306a36Sopenharmony_ci#include <linux/uaccess.h>
2262306a36Sopenharmony_ci#include <linux/net.h>
2362306a36Sopenharmony_ci#include <linux/netdevice.h>
2462306a36Sopenharmony_ci#include <linux/rculist.h>
2562306a36Sopenharmony_ci#include <linux/vmalloc.h>
2662306a36Sopenharmony_ci#include <net/xdp_sock_drv.h>
2762306a36Sopenharmony_ci#include <net/busy_poll.h>
2862306a36Sopenharmony_ci#include <net/netdev_rx_queue.h>
2962306a36Sopenharmony_ci#include <net/xdp.h>
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#include "xsk_queue.h"
3262306a36Sopenharmony_ci#include "xdp_umem.h"
3362306a36Sopenharmony_ci#include "xsk.h"
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci#define TX_BATCH_SIZE 32
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_civoid xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
4062306a36Sopenharmony_ci{
4162306a36Sopenharmony_ci	if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
4262306a36Sopenharmony_ci		return;
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	pool->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
4562306a36Sopenharmony_ci	pool->cached_need_wakeup |= XDP_WAKEUP_RX;
4662306a36Sopenharmony_ci}
4762306a36Sopenharmony_ciEXPORT_SYMBOL(xsk_set_rx_need_wakeup);
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_civoid xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool)
5062306a36Sopenharmony_ci{
5162306a36Sopenharmony_ci	struct xdp_sock *xs;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
5462306a36Sopenharmony_ci		return;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	rcu_read_lock();
5762306a36Sopenharmony_ci	list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
5862306a36Sopenharmony_ci		xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
5962306a36Sopenharmony_ci	}
6062306a36Sopenharmony_ci	rcu_read_unlock();
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	pool->cached_need_wakeup |= XDP_WAKEUP_TX;
6362306a36Sopenharmony_ci}
6462306a36Sopenharmony_ciEXPORT_SYMBOL(xsk_set_tx_need_wakeup);
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_civoid xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool)
6762306a36Sopenharmony_ci{
6862306a36Sopenharmony_ci	if (!(pool->cached_need_wakeup & XDP_WAKEUP_RX))
6962306a36Sopenharmony_ci		return;
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	pool->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
7262306a36Sopenharmony_ci	pool->cached_need_wakeup &= ~XDP_WAKEUP_RX;
7362306a36Sopenharmony_ci}
7462306a36Sopenharmony_ciEXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_civoid xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool)
7762306a36Sopenharmony_ci{
7862306a36Sopenharmony_ci	struct xdp_sock *xs;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	if (!(pool->cached_need_wakeup & XDP_WAKEUP_TX))
8162306a36Sopenharmony_ci		return;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	rcu_read_lock();
8462306a36Sopenharmony_ci	list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
8562306a36Sopenharmony_ci		xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
8662306a36Sopenharmony_ci	}
8762306a36Sopenharmony_ci	rcu_read_unlock();
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	pool->cached_need_wakeup &= ~XDP_WAKEUP_TX;
9062306a36Sopenharmony_ci}
9162306a36Sopenharmony_ciEXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_cibool xsk_uses_need_wakeup(struct xsk_buff_pool *pool)
9462306a36Sopenharmony_ci{
9562306a36Sopenharmony_ci	return pool->uses_need_wakeup;
9662306a36Sopenharmony_ci}
9762306a36Sopenharmony_ciEXPORT_SYMBOL(xsk_uses_need_wakeup);
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_cistruct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev,
10062306a36Sopenharmony_ci					    u16 queue_id)
10162306a36Sopenharmony_ci{
10262306a36Sopenharmony_ci	if (queue_id < dev->real_num_rx_queues)
10362306a36Sopenharmony_ci		return dev->_rx[queue_id].pool;
10462306a36Sopenharmony_ci	if (queue_id < dev->real_num_tx_queues)
10562306a36Sopenharmony_ci		return dev->_tx[queue_id].pool;
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	return NULL;
10862306a36Sopenharmony_ci}
10962306a36Sopenharmony_ciEXPORT_SYMBOL(xsk_get_pool_from_qid);
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_civoid xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id)
11262306a36Sopenharmony_ci{
11362306a36Sopenharmony_ci	if (queue_id < dev->num_rx_queues)
11462306a36Sopenharmony_ci		dev->_rx[queue_id].pool = NULL;
11562306a36Sopenharmony_ci	if (queue_id < dev->num_tx_queues)
11662306a36Sopenharmony_ci		dev->_tx[queue_id].pool = NULL;
11762306a36Sopenharmony_ci}
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci/* The buffer pool is stored both in the _rx struct and the _tx struct as we do
12062306a36Sopenharmony_ci * not know if the device has more tx queues than rx, or the opposite.
12162306a36Sopenharmony_ci * This might also change during run time.
12262306a36Sopenharmony_ci */
12362306a36Sopenharmony_ciint xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
12462306a36Sopenharmony_ci			u16 queue_id)
12562306a36Sopenharmony_ci{
12662306a36Sopenharmony_ci	if (queue_id >= max_t(unsigned int,
12762306a36Sopenharmony_ci			      dev->real_num_rx_queues,
12862306a36Sopenharmony_ci			      dev->real_num_tx_queues))
12962306a36Sopenharmony_ci		return -EINVAL;
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	if (queue_id < dev->real_num_rx_queues)
13262306a36Sopenharmony_ci		dev->_rx[queue_id].pool = pool;
13362306a36Sopenharmony_ci	if (queue_id < dev->real_num_tx_queues)
13462306a36Sopenharmony_ci		dev->_tx[queue_id].pool = pool;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	return 0;
13762306a36Sopenharmony_ci}
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_cistatic int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len,
14062306a36Sopenharmony_ci			u32 flags)
14162306a36Sopenharmony_ci{
14262306a36Sopenharmony_ci	u64 addr;
14362306a36Sopenharmony_ci	int err;
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	addr = xp_get_handle(xskb);
14662306a36Sopenharmony_ci	err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
14762306a36Sopenharmony_ci	if (err) {
14862306a36Sopenharmony_ci		xs->rx_queue_full++;
14962306a36Sopenharmony_ci		return err;
15062306a36Sopenharmony_ci	}
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	xp_release(xskb);
15362306a36Sopenharmony_ci	return 0;
15462306a36Sopenharmony_ci}
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_cistatic int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
15762306a36Sopenharmony_ci{
15862306a36Sopenharmony_ci	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
15962306a36Sopenharmony_ci	u32 frags = xdp_buff_has_frags(xdp);
16062306a36Sopenharmony_ci	struct xdp_buff_xsk *pos, *tmp;
16162306a36Sopenharmony_ci	struct list_head *xskb_list;
16262306a36Sopenharmony_ci	u32 contd = 0;
16362306a36Sopenharmony_ci	int err;
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci	if (frags)
16662306a36Sopenharmony_ci		contd = XDP_PKT_CONTD;
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	err = __xsk_rcv_zc(xs, xskb, len, contd);
16962306a36Sopenharmony_ci	if (err)
17062306a36Sopenharmony_ci		goto err;
17162306a36Sopenharmony_ci	if (likely(!frags))
17262306a36Sopenharmony_ci		return 0;
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	xskb_list = &xskb->pool->xskb_list;
17562306a36Sopenharmony_ci	list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
17662306a36Sopenharmony_ci		if (list_is_singular(xskb_list))
17762306a36Sopenharmony_ci			contd = 0;
17862306a36Sopenharmony_ci		len = pos->xdp.data_end - pos->xdp.data;
17962306a36Sopenharmony_ci		err = __xsk_rcv_zc(xs, pos, len, contd);
18062306a36Sopenharmony_ci		if (err)
18162306a36Sopenharmony_ci			goto err;
18262306a36Sopenharmony_ci		list_del(&pos->xskb_list_node);
18362306a36Sopenharmony_ci	}
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	return 0;
18662306a36Sopenharmony_cierr:
18762306a36Sopenharmony_ci	xsk_buff_free(xdp);
18862306a36Sopenharmony_ci	return err;
18962306a36Sopenharmony_ci}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_cistatic void *xsk_copy_xdp_start(struct xdp_buff *from)
19262306a36Sopenharmony_ci{
19362306a36Sopenharmony_ci	if (unlikely(xdp_data_meta_unsupported(from)))
19462306a36Sopenharmony_ci		return from->data;
19562306a36Sopenharmony_ci	else
19662306a36Sopenharmony_ci		return from->data_meta;
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_cistatic u32 xsk_copy_xdp(void *to, void **from, u32 to_len,
20062306a36Sopenharmony_ci			u32 *from_len, skb_frag_t **frag, u32 rem)
20162306a36Sopenharmony_ci{
20262306a36Sopenharmony_ci	u32 copied = 0;
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	while (1) {
20562306a36Sopenharmony_ci		u32 copy_len = min_t(u32, *from_len, to_len);
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci		memcpy(to, *from, copy_len);
20862306a36Sopenharmony_ci		copied += copy_len;
20962306a36Sopenharmony_ci		if (rem == copied)
21062306a36Sopenharmony_ci			return copied;
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci		if (*from_len == copy_len) {
21362306a36Sopenharmony_ci			*from = skb_frag_address(*frag);
21462306a36Sopenharmony_ci			*from_len = skb_frag_size((*frag)++);
21562306a36Sopenharmony_ci		} else {
21662306a36Sopenharmony_ci			*from += copy_len;
21762306a36Sopenharmony_ci			*from_len -= copy_len;
21862306a36Sopenharmony_ci		}
21962306a36Sopenharmony_ci		if (to_len == copy_len)
22062306a36Sopenharmony_ci			return copied;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci		to_len -= copy_len;
22362306a36Sopenharmony_ci		to += copy_len;
22462306a36Sopenharmony_ci	}
22562306a36Sopenharmony_ci}
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_cistatic int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
22862306a36Sopenharmony_ci{
22962306a36Sopenharmony_ci	u32 frame_size = xsk_pool_get_rx_frame_size(xs->pool);
23062306a36Sopenharmony_ci	void *copy_from = xsk_copy_xdp_start(xdp), *copy_to;
23162306a36Sopenharmony_ci	u32 from_len, meta_len, rem, num_desc;
23262306a36Sopenharmony_ci	struct xdp_buff_xsk *xskb;
23362306a36Sopenharmony_ci	struct xdp_buff *xsk_xdp;
23462306a36Sopenharmony_ci	skb_frag_t *frag;
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	from_len = xdp->data_end - copy_from;
23762306a36Sopenharmony_ci	meta_len = xdp->data - copy_from;
23862306a36Sopenharmony_ci	rem = len + meta_len;
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	if (len <= frame_size && !xdp_buff_has_frags(xdp)) {
24162306a36Sopenharmony_ci		int err;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci		xsk_xdp = xsk_buff_alloc(xs->pool);
24462306a36Sopenharmony_ci		if (!xsk_xdp) {
24562306a36Sopenharmony_ci			xs->rx_dropped++;
24662306a36Sopenharmony_ci			return -ENOMEM;
24762306a36Sopenharmony_ci		}
24862306a36Sopenharmony_ci		memcpy(xsk_xdp->data - meta_len, copy_from, rem);
24962306a36Sopenharmony_ci		xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
25062306a36Sopenharmony_ci		err = __xsk_rcv_zc(xs, xskb, len, 0);
25162306a36Sopenharmony_ci		if (err) {
25262306a36Sopenharmony_ci			xsk_buff_free(xsk_xdp);
25362306a36Sopenharmony_ci			return err;
25462306a36Sopenharmony_ci		}
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci		return 0;
25762306a36Sopenharmony_ci	}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	num_desc = (len - 1) / frame_size + 1;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	if (!xsk_buff_can_alloc(xs->pool, num_desc)) {
26262306a36Sopenharmony_ci		xs->rx_dropped++;
26362306a36Sopenharmony_ci		return -ENOMEM;
26462306a36Sopenharmony_ci	}
26562306a36Sopenharmony_ci	if (xskq_prod_nb_free(xs->rx, num_desc) < num_desc) {
26662306a36Sopenharmony_ci		xs->rx_queue_full++;
26762306a36Sopenharmony_ci		return -ENOBUFS;
26862306a36Sopenharmony_ci	}
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	if (xdp_buff_has_frags(xdp)) {
27162306a36Sopenharmony_ci		struct skb_shared_info *sinfo;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci		sinfo = xdp_get_shared_info_from_buff(xdp);
27462306a36Sopenharmony_ci		frag =  &sinfo->frags[0];
27562306a36Sopenharmony_ci	}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	do {
27862306a36Sopenharmony_ci		u32 to_len = frame_size + meta_len;
27962306a36Sopenharmony_ci		u32 copied;
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci		xsk_xdp = xsk_buff_alloc(xs->pool);
28262306a36Sopenharmony_ci		copy_to = xsk_xdp->data - meta_len;
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci		copied = xsk_copy_xdp(copy_to, &copy_from, to_len, &from_len, &frag, rem);
28562306a36Sopenharmony_ci		rem -= copied;
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci		xskb = container_of(xsk_xdp, struct xdp_buff_xsk, xdp);
28862306a36Sopenharmony_ci		__xsk_rcv_zc(xs, xskb, copied - meta_len, rem ? XDP_PKT_CONTD : 0);
28962306a36Sopenharmony_ci		meta_len = 0;
29062306a36Sopenharmony_ci	} while (rem);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	return 0;
29362306a36Sopenharmony_ci}
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_cistatic bool xsk_tx_writeable(struct xdp_sock *xs)
29662306a36Sopenharmony_ci{
29762306a36Sopenharmony_ci	if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2)
29862306a36Sopenharmony_ci		return false;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	return true;
30162306a36Sopenharmony_ci}
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_cistatic bool xsk_is_bound(struct xdp_sock *xs)
30462306a36Sopenharmony_ci{
30562306a36Sopenharmony_ci	if (READ_ONCE(xs->state) == XSK_BOUND) {
30662306a36Sopenharmony_ci		/* Matches smp_wmb() in bind(). */
30762306a36Sopenharmony_ci		smp_rmb();
30862306a36Sopenharmony_ci		return true;
30962306a36Sopenharmony_ci	}
31062306a36Sopenharmony_ci	return false;
31162306a36Sopenharmony_ci}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_cistatic int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
31462306a36Sopenharmony_ci{
31562306a36Sopenharmony_ci	if (!xsk_is_bound(xs))
31662306a36Sopenharmony_ci		return -ENXIO;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
31962306a36Sopenharmony_ci		return -EINVAL;
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
32262306a36Sopenharmony_ci		xs->rx_dropped++;
32362306a36Sopenharmony_ci		return -ENOSPC;
32462306a36Sopenharmony_ci	}
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	sk_mark_napi_id_once_xdp(&xs->sk, xdp);
32762306a36Sopenharmony_ci	return 0;
32862306a36Sopenharmony_ci}
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_cistatic void xsk_flush(struct xdp_sock *xs)
33162306a36Sopenharmony_ci{
33262306a36Sopenharmony_ci	xskq_prod_submit(xs->rx);
33362306a36Sopenharmony_ci	__xskq_cons_release(xs->pool->fq);
33462306a36Sopenharmony_ci	sock_def_readable(&xs->sk);
33562306a36Sopenharmony_ci}
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ciint xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
33862306a36Sopenharmony_ci{
33962306a36Sopenharmony_ci	u32 len = xdp_get_buff_len(xdp);
34062306a36Sopenharmony_ci	int err;
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	spin_lock_bh(&xs->rx_lock);
34362306a36Sopenharmony_ci	err = xsk_rcv_check(xs, xdp, len);
34462306a36Sopenharmony_ci	if (!err) {
34562306a36Sopenharmony_ci		err = __xsk_rcv(xs, xdp, len);
34662306a36Sopenharmony_ci		xsk_flush(xs);
34762306a36Sopenharmony_ci	}
34862306a36Sopenharmony_ci	spin_unlock_bh(&xs->rx_lock);
34962306a36Sopenharmony_ci	return err;
35062306a36Sopenharmony_ci}
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_cistatic int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
35362306a36Sopenharmony_ci{
35462306a36Sopenharmony_ci	u32 len = xdp_get_buff_len(xdp);
35562306a36Sopenharmony_ci	int err;
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	err = xsk_rcv_check(xs, xdp, len);
35862306a36Sopenharmony_ci	if (err)
35962306a36Sopenharmony_ci		return err;
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
36262306a36Sopenharmony_ci		len = xdp->data_end - xdp->data;
36362306a36Sopenharmony_ci		return xsk_rcv_zc(xs, xdp, len);
36462306a36Sopenharmony_ci	}
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	err = __xsk_rcv(xs, xdp, len);
36762306a36Sopenharmony_ci	if (!err)
36862306a36Sopenharmony_ci		xdp_return_buff(xdp);
36962306a36Sopenharmony_ci	return err;
37062306a36Sopenharmony_ci}
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ciint __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
37362306a36Sopenharmony_ci{
37462306a36Sopenharmony_ci	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
37562306a36Sopenharmony_ci	int err;
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	err = xsk_rcv(xs, xdp);
37862306a36Sopenharmony_ci	if (err)
37962306a36Sopenharmony_ci		return err;
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci	if (!xs->flush_node.prev)
38262306a36Sopenharmony_ci		list_add(&xs->flush_node, flush_list);
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	return 0;
38562306a36Sopenharmony_ci}
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_civoid __xsk_map_flush(void)
38862306a36Sopenharmony_ci{
38962306a36Sopenharmony_ci	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
39062306a36Sopenharmony_ci	struct xdp_sock *xs, *tmp;
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
39362306a36Sopenharmony_ci		xsk_flush(xs);
39462306a36Sopenharmony_ci		__list_del_clearprev(&xs->flush_node);
39562306a36Sopenharmony_ci	}
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_civoid xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries)
39962306a36Sopenharmony_ci{
40062306a36Sopenharmony_ci	xskq_prod_submit_n(pool->cq, nb_entries);
40162306a36Sopenharmony_ci}
40262306a36Sopenharmony_ciEXPORT_SYMBOL(xsk_tx_completed);
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_civoid xsk_tx_release(struct xsk_buff_pool *pool)
40562306a36Sopenharmony_ci{
40662306a36Sopenharmony_ci	struct xdp_sock *xs;
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	rcu_read_lock();
40962306a36Sopenharmony_ci	list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
41062306a36Sopenharmony_ci		__xskq_cons_release(xs->tx);
41162306a36Sopenharmony_ci		if (xsk_tx_writeable(xs))
41262306a36Sopenharmony_ci			xs->sk.sk_write_space(&xs->sk);
41362306a36Sopenharmony_ci	}
41462306a36Sopenharmony_ci	rcu_read_unlock();
41562306a36Sopenharmony_ci}
41662306a36Sopenharmony_ciEXPORT_SYMBOL(xsk_tx_release);
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_cibool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	struct xdp_sock *xs;
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	rcu_read_lock();
42362306a36Sopenharmony_ci	list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
42462306a36Sopenharmony_ci		if (!xskq_cons_peek_desc(xs->tx, desc, pool)) {
42562306a36Sopenharmony_ci			if (xskq_has_descs(xs->tx))
42662306a36Sopenharmony_ci				xskq_cons_release(xs->tx);
42762306a36Sopenharmony_ci			continue;
42862306a36Sopenharmony_ci		}
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci		/* This is the backpressure mechanism for the Tx path.
43162306a36Sopenharmony_ci		 * Reserve space in the completion queue and only proceed
43262306a36Sopenharmony_ci		 * if there is space in it. This avoids having to implement
43362306a36Sopenharmony_ci		 * any buffering in the Tx path.
43462306a36Sopenharmony_ci		 */
43562306a36Sopenharmony_ci		if (xskq_prod_reserve_addr(pool->cq, desc->addr))
43662306a36Sopenharmony_ci			goto out;
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci		xskq_cons_release(xs->tx);
43962306a36Sopenharmony_ci		rcu_read_unlock();
44062306a36Sopenharmony_ci		return true;
44162306a36Sopenharmony_ci	}
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ciout:
44462306a36Sopenharmony_ci	rcu_read_unlock();
44562306a36Sopenharmony_ci	return false;
44662306a36Sopenharmony_ci}
44762306a36Sopenharmony_ciEXPORT_SYMBOL(xsk_tx_peek_desc);
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_cistatic u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entries)
45062306a36Sopenharmony_ci{
45162306a36Sopenharmony_ci	struct xdp_desc *descs = pool->tx_descs;
45262306a36Sopenharmony_ci	u32 nb_pkts = 0;
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci	while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts]))
45562306a36Sopenharmony_ci		nb_pkts++;
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	xsk_tx_release(pool);
45862306a36Sopenharmony_ci	return nb_pkts;
45962306a36Sopenharmony_ci}
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ciu32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 nb_pkts)
46262306a36Sopenharmony_ci{
46362306a36Sopenharmony_ci	struct xdp_sock *xs;
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci	rcu_read_lock();
46662306a36Sopenharmony_ci	if (!list_is_singular(&pool->xsk_tx_list)) {
46762306a36Sopenharmony_ci		/* Fallback to the non-batched version */
46862306a36Sopenharmony_ci		rcu_read_unlock();
46962306a36Sopenharmony_ci		return xsk_tx_peek_release_fallback(pool, nb_pkts);
47062306a36Sopenharmony_ci	}
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list);
47362306a36Sopenharmony_ci	if (!xs) {
47462306a36Sopenharmony_ci		nb_pkts = 0;
47562306a36Sopenharmony_ci		goto out;
47662306a36Sopenharmony_ci	}
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	nb_pkts = xskq_cons_nb_entries(xs->tx, nb_pkts);
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci	/* This is the backpressure mechanism for the Tx path. Try to
48162306a36Sopenharmony_ci	 * reserve space in the completion queue for all packets, but
48262306a36Sopenharmony_ci	 * if there are fewer slots available, just process that many
48362306a36Sopenharmony_ci	 * packets. This avoids having to implement any buffering in
48462306a36Sopenharmony_ci	 * the Tx path.
48562306a36Sopenharmony_ci	 */
48662306a36Sopenharmony_ci	nb_pkts = xskq_prod_nb_free(pool->cq, nb_pkts);
48762306a36Sopenharmony_ci	if (!nb_pkts)
48862306a36Sopenharmony_ci		goto out;
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci	nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, nb_pkts);
49162306a36Sopenharmony_ci	if (!nb_pkts) {
49262306a36Sopenharmony_ci		xs->tx->queue_empty_descs++;
49362306a36Sopenharmony_ci		goto out;
49462306a36Sopenharmony_ci	}
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	__xskq_cons_release(xs->tx);
49762306a36Sopenharmony_ci	xskq_prod_write_addr_batch(pool->cq, pool->tx_descs, nb_pkts);
49862306a36Sopenharmony_ci	xs->sk.sk_write_space(&xs->sk);
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_ciout:
50162306a36Sopenharmony_ci	rcu_read_unlock();
50262306a36Sopenharmony_ci	return nb_pkts;
50362306a36Sopenharmony_ci}
50462306a36Sopenharmony_ciEXPORT_SYMBOL(xsk_tx_peek_release_desc_batch);
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_cistatic int xsk_wakeup(struct xdp_sock *xs, u8 flags)
50762306a36Sopenharmony_ci{
50862306a36Sopenharmony_ci	struct net_device *dev = xs->dev;
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci	return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
51162306a36Sopenharmony_ci}
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_cistatic int xsk_cq_reserve_addr_locked(struct xdp_sock *xs, u64 addr)
51462306a36Sopenharmony_ci{
51562306a36Sopenharmony_ci	unsigned long flags;
51662306a36Sopenharmony_ci	int ret;
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci	spin_lock_irqsave(&xs->pool->cq_lock, flags);
51962306a36Sopenharmony_ci	ret = xskq_prod_reserve_addr(xs->pool->cq, addr);
52062306a36Sopenharmony_ci	spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	return ret;
52362306a36Sopenharmony_ci}
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_cistatic void xsk_cq_submit_locked(struct xdp_sock *xs, u32 n)
52662306a36Sopenharmony_ci{
52762306a36Sopenharmony_ci	unsigned long flags;
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	spin_lock_irqsave(&xs->pool->cq_lock, flags);
53062306a36Sopenharmony_ci	xskq_prod_submit_n(xs->pool->cq, n);
53162306a36Sopenharmony_ci	spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
53262306a36Sopenharmony_ci}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_cistatic void xsk_cq_cancel_locked(struct xdp_sock *xs, u32 n)
53562306a36Sopenharmony_ci{
53662306a36Sopenharmony_ci	unsigned long flags;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	spin_lock_irqsave(&xs->pool->cq_lock, flags);
53962306a36Sopenharmony_ci	xskq_prod_cancel_n(xs->pool->cq, n);
54062306a36Sopenharmony_ci	spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
54162306a36Sopenharmony_ci}
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_cistatic u32 xsk_get_num_desc(struct sk_buff *skb)
54462306a36Sopenharmony_ci{
54562306a36Sopenharmony_ci	return skb ? (long)skb_shinfo(skb)->destructor_arg : 0;
54662306a36Sopenharmony_ci}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_cistatic void xsk_destruct_skb(struct sk_buff *skb)
54962306a36Sopenharmony_ci{
55062306a36Sopenharmony_ci	xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
55162306a36Sopenharmony_ci	sock_wfree(skb);
55262306a36Sopenharmony_ci}
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_cistatic void xsk_set_destructor_arg(struct sk_buff *skb)
55562306a36Sopenharmony_ci{
55662306a36Sopenharmony_ci	long num = xsk_get_num_desc(xdp_sk(skb->sk)->skb) + 1;
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci	skb_shinfo(skb)->destructor_arg = (void *)num;
55962306a36Sopenharmony_ci}
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_cistatic void xsk_consume_skb(struct sk_buff *skb)
56262306a36Sopenharmony_ci{
56362306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(skb->sk);
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	skb->destructor = sock_wfree;
56662306a36Sopenharmony_ci	xsk_cq_cancel_locked(xs, xsk_get_num_desc(skb));
56762306a36Sopenharmony_ci	/* Free skb without triggering the perf drop trace */
56862306a36Sopenharmony_ci	consume_skb(skb);
56962306a36Sopenharmony_ci	xs->skb = NULL;
57062306a36Sopenharmony_ci}
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_cistatic void xsk_drop_skb(struct sk_buff *skb)
57362306a36Sopenharmony_ci{
57462306a36Sopenharmony_ci	xdp_sk(skb->sk)->tx->invalid_descs += xsk_get_num_desc(skb);
57562306a36Sopenharmony_ci	xsk_consume_skb(skb);
57662306a36Sopenharmony_ci}
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_cistatic struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
57962306a36Sopenharmony_ci					      struct xdp_desc *desc)
58062306a36Sopenharmony_ci{
58162306a36Sopenharmony_ci	struct xsk_buff_pool *pool = xs->pool;
58262306a36Sopenharmony_ci	u32 hr, len, ts, offset, copy, copied;
58362306a36Sopenharmony_ci	struct sk_buff *skb = xs->skb;
58462306a36Sopenharmony_ci	struct page *page;
58562306a36Sopenharmony_ci	void *buffer;
58662306a36Sopenharmony_ci	int err, i;
58762306a36Sopenharmony_ci	u64 addr;
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	if (!skb) {
59062306a36Sopenharmony_ci		hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom));
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ci		skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err);
59362306a36Sopenharmony_ci		if (unlikely(!skb))
59462306a36Sopenharmony_ci			return ERR_PTR(err);
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci		skb_reserve(skb, hr);
59762306a36Sopenharmony_ci	}
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	addr = desc->addr;
60062306a36Sopenharmony_ci	len = desc->len;
60162306a36Sopenharmony_ci	ts = pool->unaligned ? len : pool->chunk_size;
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci	buffer = xsk_buff_raw_get_data(pool, addr);
60462306a36Sopenharmony_ci	offset = offset_in_page(buffer);
60562306a36Sopenharmony_ci	addr = buffer - pool->addrs;
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci	for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
60862306a36Sopenharmony_ci		if (unlikely(i >= MAX_SKB_FRAGS))
60962306a36Sopenharmony_ci			return ERR_PTR(-EOVERFLOW);
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci		page = pool->umem->pgs[addr >> PAGE_SHIFT];
61262306a36Sopenharmony_ci		get_page(page);
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci		copy = min_t(u32, PAGE_SIZE - offset, len - copied);
61562306a36Sopenharmony_ci		skb_fill_page_desc(skb, i, page, offset, copy);
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci		copied += copy;
61862306a36Sopenharmony_ci		addr += copy;
61962306a36Sopenharmony_ci		offset = 0;
62062306a36Sopenharmony_ci	}
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci	skb->len += len;
62362306a36Sopenharmony_ci	skb->data_len += len;
62462306a36Sopenharmony_ci	skb->truesize += ts;
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	refcount_add(ts, &xs->sk.sk_wmem_alloc);
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci	return skb;
62962306a36Sopenharmony_ci}
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_cistatic struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
63262306a36Sopenharmony_ci				     struct xdp_desc *desc)
63362306a36Sopenharmony_ci{
63462306a36Sopenharmony_ci	struct net_device *dev = xs->dev;
63562306a36Sopenharmony_ci	struct sk_buff *skb = xs->skb;
63662306a36Sopenharmony_ci	int err;
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci	if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
63962306a36Sopenharmony_ci		skb = xsk_build_skb_zerocopy(xs, desc);
64062306a36Sopenharmony_ci		if (IS_ERR(skb)) {
64162306a36Sopenharmony_ci			err = PTR_ERR(skb);
64262306a36Sopenharmony_ci			goto free_err;
64362306a36Sopenharmony_ci		}
64462306a36Sopenharmony_ci	} else {
64562306a36Sopenharmony_ci		u32 hr, tr, len;
64662306a36Sopenharmony_ci		void *buffer;
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci		buffer = xsk_buff_raw_get_data(xs->pool, desc->addr);
64962306a36Sopenharmony_ci		len = desc->len;
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci		if (!skb) {
65262306a36Sopenharmony_ci			hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
65362306a36Sopenharmony_ci			tr = dev->needed_tailroom;
65462306a36Sopenharmony_ci			skb = sock_alloc_send_skb(&xs->sk, hr + len + tr, 1, &err);
65562306a36Sopenharmony_ci			if (unlikely(!skb))
65662306a36Sopenharmony_ci				goto free_err;
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci			skb_reserve(skb, hr);
65962306a36Sopenharmony_ci			skb_put(skb, len);
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci			err = skb_store_bits(skb, 0, buffer, len);
66262306a36Sopenharmony_ci			if (unlikely(err)) {
66362306a36Sopenharmony_ci				kfree_skb(skb);
66462306a36Sopenharmony_ci				goto free_err;
66562306a36Sopenharmony_ci			}
66662306a36Sopenharmony_ci		} else {
66762306a36Sopenharmony_ci			int nr_frags = skb_shinfo(skb)->nr_frags;
66862306a36Sopenharmony_ci			struct page *page;
66962306a36Sopenharmony_ci			u8 *vaddr;
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci			if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
67262306a36Sopenharmony_ci				err = -EOVERFLOW;
67362306a36Sopenharmony_ci				goto free_err;
67462306a36Sopenharmony_ci			}
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci			page = alloc_page(xs->sk.sk_allocation);
67762306a36Sopenharmony_ci			if (unlikely(!page)) {
67862306a36Sopenharmony_ci				err = -EAGAIN;
67962306a36Sopenharmony_ci				goto free_err;
68062306a36Sopenharmony_ci			}
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci			vaddr = kmap_local_page(page);
68362306a36Sopenharmony_ci			memcpy(vaddr, buffer, len);
68462306a36Sopenharmony_ci			kunmap_local(vaddr);
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_ci			skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
68762306a36Sopenharmony_ci			refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
68862306a36Sopenharmony_ci		}
68962306a36Sopenharmony_ci	}
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci	skb->dev = dev;
69262306a36Sopenharmony_ci	skb->priority = xs->sk.sk_priority;
69362306a36Sopenharmony_ci	skb->mark = READ_ONCE(xs->sk.sk_mark);
69462306a36Sopenharmony_ci	skb->destructor = xsk_destruct_skb;
69562306a36Sopenharmony_ci	xsk_set_destructor_arg(skb);
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	return skb;
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_cifree_err:
70062306a36Sopenharmony_ci	if (err == -EOVERFLOW) {
70162306a36Sopenharmony_ci		/* Drop the packet */
70262306a36Sopenharmony_ci		xsk_set_destructor_arg(xs->skb);
70362306a36Sopenharmony_ci		xsk_drop_skb(xs->skb);
70462306a36Sopenharmony_ci		xskq_cons_release(xs->tx);
70562306a36Sopenharmony_ci	} else {
70662306a36Sopenharmony_ci		/* Let application retry */
70762306a36Sopenharmony_ci		xsk_cq_cancel_locked(xs, 1);
70862306a36Sopenharmony_ci	}
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci	return ERR_PTR(err);
71162306a36Sopenharmony_ci}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_cistatic int __xsk_generic_xmit(struct sock *sk)
71462306a36Sopenharmony_ci{
71562306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(sk);
71662306a36Sopenharmony_ci	u32 max_batch = TX_BATCH_SIZE;
71762306a36Sopenharmony_ci	bool sent_frame = false;
71862306a36Sopenharmony_ci	struct xdp_desc desc;
71962306a36Sopenharmony_ci	struct sk_buff *skb;
72062306a36Sopenharmony_ci	int err = 0;
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci	mutex_lock(&xs->mutex);
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	/* Since we dropped the RCU read lock, the socket state might have changed. */
72562306a36Sopenharmony_ci	if (unlikely(!xsk_is_bound(xs))) {
72662306a36Sopenharmony_ci		err = -ENXIO;
72762306a36Sopenharmony_ci		goto out;
72862306a36Sopenharmony_ci	}
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci	if (xs->queue_id >= xs->dev->real_num_tx_queues)
73162306a36Sopenharmony_ci		goto out;
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
73462306a36Sopenharmony_ci		if (max_batch-- == 0) {
73562306a36Sopenharmony_ci			err = -EAGAIN;
73662306a36Sopenharmony_ci			goto out;
73762306a36Sopenharmony_ci		}
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci		/* This is the backpressure mechanism for the Tx path.
74062306a36Sopenharmony_ci		 * Reserve space in the completion queue and only proceed
74162306a36Sopenharmony_ci		 * if there is space in it. This avoids having to implement
74262306a36Sopenharmony_ci		 * any buffering in the Tx path.
74362306a36Sopenharmony_ci		 */
74462306a36Sopenharmony_ci		if (xsk_cq_reserve_addr_locked(xs, desc.addr))
74562306a36Sopenharmony_ci			goto out;
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci		skb = xsk_build_skb(xs, &desc);
74862306a36Sopenharmony_ci		if (IS_ERR(skb)) {
74962306a36Sopenharmony_ci			err = PTR_ERR(skb);
75062306a36Sopenharmony_ci			if (err != -EOVERFLOW)
75162306a36Sopenharmony_ci				goto out;
75262306a36Sopenharmony_ci			err = 0;
75362306a36Sopenharmony_ci			continue;
75462306a36Sopenharmony_ci		}
75562306a36Sopenharmony_ci
75662306a36Sopenharmony_ci		xskq_cons_release(xs->tx);
75762306a36Sopenharmony_ci
75862306a36Sopenharmony_ci		if (xp_mb_desc(&desc)) {
75962306a36Sopenharmony_ci			xs->skb = skb;
76062306a36Sopenharmony_ci			continue;
76162306a36Sopenharmony_ci		}
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci		err = __dev_direct_xmit(skb, xs->queue_id);
76462306a36Sopenharmony_ci		if  (err == NETDEV_TX_BUSY) {
76562306a36Sopenharmony_ci			/* Tell user-space to retry the send */
76662306a36Sopenharmony_ci			xskq_cons_cancel_n(xs->tx, xsk_get_num_desc(skb));
76762306a36Sopenharmony_ci			xsk_consume_skb(skb);
76862306a36Sopenharmony_ci			err = -EAGAIN;
76962306a36Sopenharmony_ci			goto out;
77062306a36Sopenharmony_ci		}
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci		/* Ignore NET_XMIT_CN as packet might have been sent */
77362306a36Sopenharmony_ci		if (err == NET_XMIT_DROP) {
77462306a36Sopenharmony_ci			/* SKB completed but not sent */
77562306a36Sopenharmony_ci			err = -EBUSY;
77662306a36Sopenharmony_ci			xs->skb = NULL;
77762306a36Sopenharmony_ci			goto out;
77862306a36Sopenharmony_ci		}
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci		sent_frame = true;
78162306a36Sopenharmony_ci		xs->skb = NULL;
78262306a36Sopenharmony_ci	}
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	if (xskq_has_descs(xs->tx)) {
78562306a36Sopenharmony_ci		if (xs->skb)
78662306a36Sopenharmony_ci			xsk_drop_skb(xs->skb);
78762306a36Sopenharmony_ci		xskq_cons_release(xs->tx);
78862306a36Sopenharmony_ci	}
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ciout:
79162306a36Sopenharmony_ci	if (sent_frame)
79262306a36Sopenharmony_ci		if (xsk_tx_writeable(xs))
79362306a36Sopenharmony_ci			sk->sk_write_space(sk);
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci	mutex_unlock(&xs->mutex);
79662306a36Sopenharmony_ci	return err;
79762306a36Sopenharmony_ci}
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_cistatic int xsk_generic_xmit(struct sock *sk)
80062306a36Sopenharmony_ci{
80162306a36Sopenharmony_ci	int ret;
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	/* Drop the RCU lock since the SKB path might sleep. */
80462306a36Sopenharmony_ci	rcu_read_unlock();
80562306a36Sopenharmony_ci	ret = __xsk_generic_xmit(sk);
80662306a36Sopenharmony_ci	/* Reaquire RCU lock before going into common code. */
80762306a36Sopenharmony_ci	rcu_read_lock();
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_ci	return ret;
81062306a36Sopenharmony_ci}
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_cistatic bool xsk_no_wakeup(struct sock *sk)
81362306a36Sopenharmony_ci{
81462306a36Sopenharmony_ci#ifdef CONFIG_NET_RX_BUSY_POLL
81562306a36Sopenharmony_ci	/* Prefer busy-polling, skip the wakeup. */
81662306a36Sopenharmony_ci	return READ_ONCE(sk->sk_prefer_busy_poll) && READ_ONCE(sk->sk_ll_usec) &&
81762306a36Sopenharmony_ci		READ_ONCE(sk->sk_napi_id) >= MIN_NAPI_ID;
81862306a36Sopenharmony_ci#else
81962306a36Sopenharmony_ci	return false;
82062306a36Sopenharmony_ci#endif
82162306a36Sopenharmony_ci}
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_cistatic int xsk_check_common(struct xdp_sock *xs)
82462306a36Sopenharmony_ci{
82562306a36Sopenharmony_ci	if (unlikely(!xsk_is_bound(xs)))
82662306a36Sopenharmony_ci		return -ENXIO;
82762306a36Sopenharmony_ci	if (unlikely(!(xs->dev->flags & IFF_UP)))
82862306a36Sopenharmony_ci		return -ENETDOWN;
82962306a36Sopenharmony_ci
83062306a36Sopenharmony_ci	return 0;
83162306a36Sopenharmony_ci}
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_cistatic int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
83462306a36Sopenharmony_ci{
83562306a36Sopenharmony_ci	bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
83662306a36Sopenharmony_ci	struct sock *sk = sock->sk;
83762306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(sk);
83862306a36Sopenharmony_ci	struct xsk_buff_pool *pool;
83962306a36Sopenharmony_ci	int err;
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	err = xsk_check_common(xs);
84262306a36Sopenharmony_ci	if (err)
84362306a36Sopenharmony_ci		return err;
84462306a36Sopenharmony_ci	if (unlikely(need_wait))
84562306a36Sopenharmony_ci		return -EOPNOTSUPP;
84662306a36Sopenharmony_ci	if (unlikely(!xs->tx))
84762306a36Sopenharmony_ci		return -ENOBUFS;
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	if (sk_can_busy_loop(sk)) {
85062306a36Sopenharmony_ci		if (xs->zc)
85162306a36Sopenharmony_ci			__sk_mark_napi_id_once(sk, xsk_pool_get_napi_id(xs->pool));
85262306a36Sopenharmony_ci		sk_busy_loop(sk, 1); /* only support non-blocking sockets */
85362306a36Sopenharmony_ci	}
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_ci	if (xs->zc && xsk_no_wakeup(sk))
85662306a36Sopenharmony_ci		return 0;
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	pool = xs->pool;
85962306a36Sopenharmony_ci	if (pool->cached_need_wakeup & XDP_WAKEUP_TX) {
86062306a36Sopenharmony_ci		if (xs->zc)
86162306a36Sopenharmony_ci			return xsk_wakeup(xs, XDP_WAKEUP_TX);
86262306a36Sopenharmony_ci		return xsk_generic_xmit(sk);
86362306a36Sopenharmony_ci	}
86462306a36Sopenharmony_ci	return 0;
86562306a36Sopenharmony_ci}
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_cistatic int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
86862306a36Sopenharmony_ci{
86962306a36Sopenharmony_ci	int ret;
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	rcu_read_lock();
87262306a36Sopenharmony_ci	ret = __xsk_sendmsg(sock, m, total_len);
87362306a36Sopenharmony_ci	rcu_read_unlock();
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	return ret;
87662306a36Sopenharmony_ci}
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_cistatic int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
87962306a36Sopenharmony_ci{
88062306a36Sopenharmony_ci	bool need_wait = !(flags & MSG_DONTWAIT);
88162306a36Sopenharmony_ci	struct sock *sk = sock->sk;
88262306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(sk);
88362306a36Sopenharmony_ci	int err;
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	err = xsk_check_common(xs);
88662306a36Sopenharmony_ci	if (err)
88762306a36Sopenharmony_ci		return err;
88862306a36Sopenharmony_ci	if (unlikely(!xs->rx))
88962306a36Sopenharmony_ci		return -ENOBUFS;
89062306a36Sopenharmony_ci	if (unlikely(need_wait))
89162306a36Sopenharmony_ci		return -EOPNOTSUPP;
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci	if (sk_can_busy_loop(sk))
89462306a36Sopenharmony_ci		sk_busy_loop(sk, 1); /* only support non-blocking sockets */
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci	if (xsk_no_wakeup(sk))
89762306a36Sopenharmony_ci		return 0;
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	if (xs->pool->cached_need_wakeup & XDP_WAKEUP_RX && xs->zc)
90062306a36Sopenharmony_ci		return xsk_wakeup(xs, XDP_WAKEUP_RX);
90162306a36Sopenharmony_ci	return 0;
90262306a36Sopenharmony_ci}
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_cistatic int xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)
90562306a36Sopenharmony_ci{
90662306a36Sopenharmony_ci	int ret;
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci	rcu_read_lock();
90962306a36Sopenharmony_ci	ret = __xsk_recvmsg(sock, m, len, flags);
91062306a36Sopenharmony_ci	rcu_read_unlock();
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci	return ret;
91362306a36Sopenharmony_ci}
91462306a36Sopenharmony_ci
91562306a36Sopenharmony_cistatic __poll_t xsk_poll(struct file *file, struct socket *sock,
91662306a36Sopenharmony_ci			     struct poll_table_struct *wait)
91762306a36Sopenharmony_ci{
91862306a36Sopenharmony_ci	__poll_t mask = 0;
91962306a36Sopenharmony_ci	struct sock *sk = sock->sk;
92062306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(sk);
92162306a36Sopenharmony_ci	struct xsk_buff_pool *pool;
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci	sock_poll_wait(file, sock, wait);
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci	rcu_read_lock();
92662306a36Sopenharmony_ci	if (xsk_check_common(xs))
92762306a36Sopenharmony_ci		goto out;
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ci	pool = xs->pool;
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ci	if (pool->cached_need_wakeup) {
93262306a36Sopenharmony_ci		if (xs->zc)
93362306a36Sopenharmony_ci			xsk_wakeup(xs, pool->cached_need_wakeup);
93462306a36Sopenharmony_ci		else if (xs->tx)
93562306a36Sopenharmony_ci			/* Poll needs to drive Tx also in copy mode */
93662306a36Sopenharmony_ci			xsk_generic_xmit(sk);
93762306a36Sopenharmony_ci	}
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci	if (xs->rx && !xskq_prod_is_empty(xs->rx))
94062306a36Sopenharmony_ci		mask |= EPOLLIN | EPOLLRDNORM;
94162306a36Sopenharmony_ci	if (xs->tx && xsk_tx_writeable(xs))
94262306a36Sopenharmony_ci		mask |= EPOLLOUT | EPOLLWRNORM;
94362306a36Sopenharmony_ciout:
94462306a36Sopenharmony_ci	rcu_read_unlock();
94562306a36Sopenharmony_ci	return mask;
94662306a36Sopenharmony_ci}
94762306a36Sopenharmony_ci
94862306a36Sopenharmony_cistatic int xsk_init_queue(u32 entries, struct xsk_queue **queue,
94962306a36Sopenharmony_ci			  bool umem_queue)
95062306a36Sopenharmony_ci{
95162306a36Sopenharmony_ci	struct xsk_queue *q;
95262306a36Sopenharmony_ci
95362306a36Sopenharmony_ci	if (entries == 0 || *queue || !is_power_of_2(entries))
95462306a36Sopenharmony_ci		return -EINVAL;
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci	q = xskq_create(entries, umem_queue);
95762306a36Sopenharmony_ci	if (!q)
95862306a36Sopenharmony_ci		return -ENOMEM;
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	/* Make sure queue is ready before it can be seen by others */
96162306a36Sopenharmony_ci	smp_wmb();
96262306a36Sopenharmony_ci	WRITE_ONCE(*queue, q);
96362306a36Sopenharmony_ci	return 0;
96462306a36Sopenharmony_ci}
96562306a36Sopenharmony_ci
96662306a36Sopenharmony_cistatic void xsk_unbind_dev(struct xdp_sock *xs)
96762306a36Sopenharmony_ci{
96862306a36Sopenharmony_ci	struct net_device *dev = xs->dev;
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci	if (xs->state != XSK_BOUND)
97162306a36Sopenharmony_ci		return;
97262306a36Sopenharmony_ci	WRITE_ONCE(xs->state, XSK_UNBOUND);
97362306a36Sopenharmony_ci
97462306a36Sopenharmony_ci	/* Wait for driver to stop using the xdp socket. */
97562306a36Sopenharmony_ci	xp_del_xsk(xs->pool, xs);
97662306a36Sopenharmony_ci	synchronize_net();
97762306a36Sopenharmony_ci	dev_put(dev);
97862306a36Sopenharmony_ci}
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_cistatic struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
98162306a36Sopenharmony_ci					      struct xdp_sock __rcu ***map_entry)
98262306a36Sopenharmony_ci{
98362306a36Sopenharmony_ci	struct xsk_map *map = NULL;
98462306a36Sopenharmony_ci	struct xsk_map_node *node;
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_ci	*map_entry = NULL;
98762306a36Sopenharmony_ci
98862306a36Sopenharmony_ci	spin_lock_bh(&xs->map_list_lock);
98962306a36Sopenharmony_ci	node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
99062306a36Sopenharmony_ci					node);
99162306a36Sopenharmony_ci	if (node) {
99262306a36Sopenharmony_ci		bpf_map_inc(&node->map->map);
99362306a36Sopenharmony_ci		map = node->map;
99462306a36Sopenharmony_ci		*map_entry = node->map_entry;
99562306a36Sopenharmony_ci	}
99662306a36Sopenharmony_ci	spin_unlock_bh(&xs->map_list_lock);
99762306a36Sopenharmony_ci	return map;
99862306a36Sopenharmony_ci}
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_cistatic void xsk_delete_from_maps(struct xdp_sock *xs)
100162306a36Sopenharmony_ci{
100262306a36Sopenharmony_ci	/* This function removes the current XDP socket from all the
100362306a36Sopenharmony_ci	 * maps it resides in. We need to take extra care here, due to
100462306a36Sopenharmony_ci	 * the two locks involved. Each map has a lock synchronizing
100562306a36Sopenharmony_ci	 * updates to the entries, and each socket has a lock that
100662306a36Sopenharmony_ci	 * synchronizes access to the list of maps (map_list). For
100762306a36Sopenharmony_ci	 * deadlock avoidance the locks need to be taken in the order
100862306a36Sopenharmony_ci	 * "map lock"->"socket map list lock". We start off by
100962306a36Sopenharmony_ci	 * accessing the socket map list, and take a reference to the
101062306a36Sopenharmony_ci	 * map to guarantee existence between the
101162306a36Sopenharmony_ci	 * xsk_get_map_list_entry() and xsk_map_try_sock_delete()
101262306a36Sopenharmony_ci	 * calls. Then we ask the map to remove the socket, which
101362306a36Sopenharmony_ci	 * tries to remove the socket from the map. Note that there
101462306a36Sopenharmony_ci	 * might be updates to the map between
101562306a36Sopenharmony_ci	 * xsk_get_map_list_entry() and xsk_map_try_sock_delete().
101662306a36Sopenharmony_ci	 */
101762306a36Sopenharmony_ci	struct xdp_sock __rcu **map_entry = NULL;
101862306a36Sopenharmony_ci	struct xsk_map *map;
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci	while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
102162306a36Sopenharmony_ci		xsk_map_try_sock_delete(map, xs, map_entry);
102262306a36Sopenharmony_ci		bpf_map_put(&map->map);
102362306a36Sopenharmony_ci	}
102462306a36Sopenharmony_ci}
102562306a36Sopenharmony_ci
102662306a36Sopenharmony_cistatic int xsk_release(struct socket *sock)
102762306a36Sopenharmony_ci{
102862306a36Sopenharmony_ci	struct sock *sk = sock->sk;
102962306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(sk);
103062306a36Sopenharmony_ci	struct net *net;
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	if (!sk)
103362306a36Sopenharmony_ci		return 0;
103462306a36Sopenharmony_ci
103562306a36Sopenharmony_ci	net = sock_net(sk);
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_ci	if (xs->skb)
103862306a36Sopenharmony_ci		xsk_drop_skb(xs->skb);
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_ci	mutex_lock(&net->xdp.lock);
104162306a36Sopenharmony_ci	sk_del_node_init_rcu(sk);
104262306a36Sopenharmony_ci	mutex_unlock(&net->xdp.lock);
104362306a36Sopenharmony_ci
104462306a36Sopenharmony_ci	sock_prot_inuse_add(net, sk->sk_prot, -1);
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ci	xsk_delete_from_maps(xs);
104762306a36Sopenharmony_ci	mutex_lock(&xs->mutex);
104862306a36Sopenharmony_ci	xsk_unbind_dev(xs);
104962306a36Sopenharmony_ci	mutex_unlock(&xs->mutex);
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci	xskq_destroy(xs->rx);
105262306a36Sopenharmony_ci	xskq_destroy(xs->tx);
105362306a36Sopenharmony_ci	xskq_destroy(xs->fq_tmp);
105462306a36Sopenharmony_ci	xskq_destroy(xs->cq_tmp);
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci	sock_orphan(sk);
105762306a36Sopenharmony_ci	sock->sk = NULL;
105862306a36Sopenharmony_ci
105962306a36Sopenharmony_ci	sock_put(sk);
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci	return 0;
106262306a36Sopenharmony_ci}
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_cistatic struct socket *xsk_lookup_xsk_from_fd(int fd)
106562306a36Sopenharmony_ci{
106662306a36Sopenharmony_ci	struct socket *sock;
106762306a36Sopenharmony_ci	int err;
106862306a36Sopenharmony_ci
106962306a36Sopenharmony_ci	sock = sockfd_lookup(fd, &err);
107062306a36Sopenharmony_ci	if (!sock)
107162306a36Sopenharmony_ci		return ERR_PTR(-ENOTSOCK);
107262306a36Sopenharmony_ci
107362306a36Sopenharmony_ci	if (sock->sk->sk_family != PF_XDP) {
107462306a36Sopenharmony_ci		sockfd_put(sock);
107562306a36Sopenharmony_ci		return ERR_PTR(-ENOPROTOOPT);
107662306a36Sopenharmony_ci	}
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_ci	return sock;
107962306a36Sopenharmony_ci}
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_cistatic bool xsk_validate_queues(struct xdp_sock *xs)
108262306a36Sopenharmony_ci{
108362306a36Sopenharmony_ci	return xs->fq_tmp && xs->cq_tmp;
108462306a36Sopenharmony_ci}
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_cistatic int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
108762306a36Sopenharmony_ci{
108862306a36Sopenharmony_ci	struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
108962306a36Sopenharmony_ci	struct sock *sk = sock->sk;
109062306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(sk);
109162306a36Sopenharmony_ci	struct net_device *dev;
109262306a36Sopenharmony_ci	int bound_dev_if;
109362306a36Sopenharmony_ci	u32 flags, qid;
109462306a36Sopenharmony_ci	int err = 0;
109562306a36Sopenharmony_ci
109662306a36Sopenharmony_ci	if (addr_len < sizeof(struct sockaddr_xdp))
109762306a36Sopenharmony_ci		return -EINVAL;
109862306a36Sopenharmony_ci	if (sxdp->sxdp_family != AF_XDP)
109962306a36Sopenharmony_ci		return -EINVAL;
110062306a36Sopenharmony_ci
110162306a36Sopenharmony_ci	flags = sxdp->sxdp_flags;
110262306a36Sopenharmony_ci	if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
110362306a36Sopenharmony_ci		      XDP_USE_NEED_WAKEUP | XDP_USE_SG))
110462306a36Sopenharmony_ci		return -EINVAL;
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci	bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
110762306a36Sopenharmony_ci	if (bound_dev_if && bound_dev_if != sxdp->sxdp_ifindex)
110862306a36Sopenharmony_ci		return -EINVAL;
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci	rtnl_lock();
111162306a36Sopenharmony_ci	mutex_lock(&xs->mutex);
111262306a36Sopenharmony_ci	if (xs->state != XSK_READY) {
111362306a36Sopenharmony_ci		err = -EBUSY;
111462306a36Sopenharmony_ci		goto out_release;
111562306a36Sopenharmony_ci	}
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci	dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
111862306a36Sopenharmony_ci	if (!dev) {
111962306a36Sopenharmony_ci		err = -ENODEV;
112062306a36Sopenharmony_ci		goto out_release;
112162306a36Sopenharmony_ci	}
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci	if (!xs->rx && !xs->tx) {
112462306a36Sopenharmony_ci		err = -EINVAL;
112562306a36Sopenharmony_ci		goto out_unlock;
112662306a36Sopenharmony_ci	}
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci	qid = sxdp->sxdp_queue_id;
112962306a36Sopenharmony_ci
113062306a36Sopenharmony_ci	if (flags & XDP_SHARED_UMEM) {
113162306a36Sopenharmony_ci		struct xdp_sock *umem_xs;
113262306a36Sopenharmony_ci		struct socket *sock;
113362306a36Sopenharmony_ci
113462306a36Sopenharmony_ci		if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
113562306a36Sopenharmony_ci		    (flags & XDP_USE_NEED_WAKEUP) || (flags & XDP_USE_SG)) {
113662306a36Sopenharmony_ci			/* Cannot specify flags for shared sockets. */
113762306a36Sopenharmony_ci			err = -EINVAL;
113862306a36Sopenharmony_ci			goto out_unlock;
113962306a36Sopenharmony_ci		}
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci		if (xs->umem) {
114262306a36Sopenharmony_ci			/* We have already our own. */
114362306a36Sopenharmony_ci			err = -EINVAL;
114462306a36Sopenharmony_ci			goto out_unlock;
114562306a36Sopenharmony_ci		}
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci		sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd);
114862306a36Sopenharmony_ci		if (IS_ERR(sock)) {
114962306a36Sopenharmony_ci			err = PTR_ERR(sock);
115062306a36Sopenharmony_ci			goto out_unlock;
115162306a36Sopenharmony_ci		}
115262306a36Sopenharmony_ci
115362306a36Sopenharmony_ci		umem_xs = xdp_sk(sock->sk);
115462306a36Sopenharmony_ci		if (!xsk_is_bound(umem_xs)) {
115562306a36Sopenharmony_ci			err = -EBADF;
115662306a36Sopenharmony_ci			sockfd_put(sock);
115762306a36Sopenharmony_ci			goto out_unlock;
115862306a36Sopenharmony_ci		}
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci		if (umem_xs->queue_id != qid || umem_xs->dev != dev) {
116162306a36Sopenharmony_ci			/* Share the umem with another socket on another qid
116262306a36Sopenharmony_ci			 * and/or device.
116362306a36Sopenharmony_ci			 */
116462306a36Sopenharmony_ci			xs->pool = xp_create_and_assign_umem(xs,
116562306a36Sopenharmony_ci							     umem_xs->umem);
116662306a36Sopenharmony_ci			if (!xs->pool) {
116762306a36Sopenharmony_ci				err = -ENOMEM;
116862306a36Sopenharmony_ci				sockfd_put(sock);
116962306a36Sopenharmony_ci				goto out_unlock;
117062306a36Sopenharmony_ci			}
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci			err = xp_assign_dev_shared(xs->pool, umem_xs, dev,
117362306a36Sopenharmony_ci						   qid);
117462306a36Sopenharmony_ci			if (err) {
117562306a36Sopenharmony_ci				xp_destroy(xs->pool);
117662306a36Sopenharmony_ci				xs->pool = NULL;
117762306a36Sopenharmony_ci				sockfd_put(sock);
117862306a36Sopenharmony_ci				goto out_unlock;
117962306a36Sopenharmony_ci			}
118062306a36Sopenharmony_ci		} else {
118162306a36Sopenharmony_ci			/* Share the buffer pool with the other socket. */
118262306a36Sopenharmony_ci			if (xs->fq_tmp || xs->cq_tmp) {
118362306a36Sopenharmony_ci				/* Do not allow setting your own fq or cq. */
118462306a36Sopenharmony_ci				err = -EINVAL;
118562306a36Sopenharmony_ci				sockfd_put(sock);
118662306a36Sopenharmony_ci				goto out_unlock;
118762306a36Sopenharmony_ci			}
118862306a36Sopenharmony_ci
118962306a36Sopenharmony_ci			xp_get_pool(umem_xs->pool);
119062306a36Sopenharmony_ci			xs->pool = umem_xs->pool;
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci			/* If underlying shared umem was created without Tx
119362306a36Sopenharmony_ci			 * ring, allocate Tx descs array that Tx batching API
119462306a36Sopenharmony_ci			 * utilizes
119562306a36Sopenharmony_ci			 */
119662306a36Sopenharmony_ci			if (xs->tx && !xs->pool->tx_descs) {
119762306a36Sopenharmony_ci				err = xp_alloc_tx_descs(xs->pool, xs);
119862306a36Sopenharmony_ci				if (err) {
119962306a36Sopenharmony_ci					xp_put_pool(xs->pool);
120062306a36Sopenharmony_ci					xs->pool = NULL;
120162306a36Sopenharmony_ci					sockfd_put(sock);
120262306a36Sopenharmony_ci					goto out_unlock;
120362306a36Sopenharmony_ci				}
120462306a36Sopenharmony_ci			}
120562306a36Sopenharmony_ci		}
120662306a36Sopenharmony_ci
120762306a36Sopenharmony_ci		xdp_get_umem(umem_xs->umem);
120862306a36Sopenharmony_ci		WRITE_ONCE(xs->umem, umem_xs->umem);
120962306a36Sopenharmony_ci		sockfd_put(sock);
121062306a36Sopenharmony_ci	} else if (!xs->umem || !xsk_validate_queues(xs)) {
121162306a36Sopenharmony_ci		err = -EINVAL;
121262306a36Sopenharmony_ci		goto out_unlock;
121362306a36Sopenharmony_ci	} else {
121462306a36Sopenharmony_ci		/* This xsk has its own umem. */
121562306a36Sopenharmony_ci		xs->pool = xp_create_and_assign_umem(xs, xs->umem);
121662306a36Sopenharmony_ci		if (!xs->pool) {
121762306a36Sopenharmony_ci			err = -ENOMEM;
121862306a36Sopenharmony_ci			goto out_unlock;
121962306a36Sopenharmony_ci		}
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_ci		err = xp_assign_dev(xs->pool, dev, qid, flags);
122262306a36Sopenharmony_ci		if (err) {
122362306a36Sopenharmony_ci			xp_destroy(xs->pool);
122462306a36Sopenharmony_ci			xs->pool = NULL;
122562306a36Sopenharmony_ci			goto out_unlock;
122662306a36Sopenharmony_ci		}
122762306a36Sopenharmony_ci	}
122862306a36Sopenharmony_ci
122962306a36Sopenharmony_ci	/* FQ and CQ are now owned by the buffer pool and cleaned up with it. */
123062306a36Sopenharmony_ci	xs->fq_tmp = NULL;
123162306a36Sopenharmony_ci	xs->cq_tmp = NULL;
123262306a36Sopenharmony_ci
123362306a36Sopenharmony_ci	xs->dev = dev;
123462306a36Sopenharmony_ci	xs->zc = xs->umem->zc;
123562306a36Sopenharmony_ci	xs->sg = !!(xs->umem->flags & XDP_UMEM_SG_FLAG);
123662306a36Sopenharmony_ci	xs->queue_id = qid;
123762306a36Sopenharmony_ci	xp_add_xsk(xs->pool, xs);
123862306a36Sopenharmony_ci
123962306a36Sopenharmony_ciout_unlock:
124062306a36Sopenharmony_ci	if (err) {
124162306a36Sopenharmony_ci		dev_put(dev);
124262306a36Sopenharmony_ci	} else {
124362306a36Sopenharmony_ci		/* Matches smp_rmb() in bind() for shared umem
124462306a36Sopenharmony_ci		 * sockets, and xsk_is_bound().
124562306a36Sopenharmony_ci		 */
124662306a36Sopenharmony_ci		smp_wmb();
124762306a36Sopenharmony_ci		WRITE_ONCE(xs->state, XSK_BOUND);
124862306a36Sopenharmony_ci	}
124962306a36Sopenharmony_ciout_release:
125062306a36Sopenharmony_ci	mutex_unlock(&xs->mutex);
125162306a36Sopenharmony_ci	rtnl_unlock();
125262306a36Sopenharmony_ci	return err;
125362306a36Sopenharmony_ci}
125462306a36Sopenharmony_ci
125562306a36Sopenharmony_cistruct xdp_umem_reg_v1 {
125662306a36Sopenharmony_ci	__u64 addr; /* Start of packet data area */
125762306a36Sopenharmony_ci	__u64 len; /* Length of packet data area */
125862306a36Sopenharmony_ci	__u32 chunk_size;
125962306a36Sopenharmony_ci	__u32 headroom;
126062306a36Sopenharmony_ci};
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_cistatic int xsk_setsockopt(struct socket *sock, int level, int optname,
126362306a36Sopenharmony_ci			  sockptr_t optval, unsigned int optlen)
126462306a36Sopenharmony_ci{
126562306a36Sopenharmony_ci	struct sock *sk = sock->sk;
126662306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(sk);
126762306a36Sopenharmony_ci	int err;
126862306a36Sopenharmony_ci
126962306a36Sopenharmony_ci	if (level != SOL_XDP)
127062306a36Sopenharmony_ci		return -ENOPROTOOPT;
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_ci	switch (optname) {
127362306a36Sopenharmony_ci	case XDP_RX_RING:
127462306a36Sopenharmony_ci	case XDP_TX_RING:
127562306a36Sopenharmony_ci	{
127662306a36Sopenharmony_ci		struct xsk_queue **q;
127762306a36Sopenharmony_ci		int entries;
127862306a36Sopenharmony_ci
127962306a36Sopenharmony_ci		if (optlen < sizeof(entries))
128062306a36Sopenharmony_ci			return -EINVAL;
128162306a36Sopenharmony_ci		if (copy_from_sockptr(&entries, optval, sizeof(entries)))
128262306a36Sopenharmony_ci			return -EFAULT;
128362306a36Sopenharmony_ci
128462306a36Sopenharmony_ci		mutex_lock(&xs->mutex);
128562306a36Sopenharmony_ci		if (xs->state != XSK_READY) {
128662306a36Sopenharmony_ci			mutex_unlock(&xs->mutex);
128762306a36Sopenharmony_ci			return -EBUSY;
128862306a36Sopenharmony_ci		}
128962306a36Sopenharmony_ci		q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
129062306a36Sopenharmony_ci		err = xsk_init_queue(entries, q, false);
129162306a36Sopenharmony_ci		if (!err && optname == XDP_TX_RING)
129262306a36Sopenharmony_ci			/* Tx needs to be explicitly woken up the first time */
129362306a36Sopenharmony_ci			xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
129462306a36Sopenharmony_ci		mutex_unlock(&xs->mutex);
129562306a36Sopenharmony_ci		return err;
129662306a36Sopenharmony_ci	}
129762306a36Sopenharmony_ci	case XDP_UMEM_REG:
129862306a36Sopenharmony_ci	{
129962306a36Sopenharmony_ci		size_t mr_size = sizeof(struct xdp_umem_reg);
130062306a36Sopenharmony_ci		struct xdp_umem_reg mr = {};
130162306a36Sopenharmony_ci		struct xdp_umem *umem;
130262306a36Sopenharmony_ci
130362306a36Sopenharmony_ci		if (optlen < sizeof(struct xdp_umem_reg_v1))
130462306a36Sopenharmony_ci			return -EINVAL;
130562306a36Sopenharmony_ci		else if (optlen < sizeof(mr))
130662306a36Sopenharmony_ci			mr_size = sizeof(struct xdp_umem_reg_v1);
130762306a36Sopenharmony_ci
130862306a36Sopenharmony_ci		if (copy_from_sockptr(&mr, optval, mr_size))
130962306a36Sopenharmony_ci			return -EFAULT;
131062306a36Sopenharmony_ci
131162306a36Sopenharmony_ci		mutex_lock(&xs->mutex);
131262306a36Sopenharmony_ci		if (xs->state != XSK_READY || xs->umem) {
131362306a36Sopenharmony_ci			mutex_unlock(&xs->mutex);
131462306a36Sopenharmony_ci			return -EBUSY;
131562306a36Sopenharmony_ci		}
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_ci		umem = xdp_umem_create(&mr);
131862306a36Sopenharmony_ci		if (IS_ERR(umem)) {
131962306a36Sopenharmony_ci			mutex_unlock(&xs->mutex);
132062306a36Sopenharmony_ci			return PTR_ERR(umem);
132162306a36Sopenharmony_ci		}
132262306a36Sopenharmony_ci
132362306a36Sopenharmony_ci		/* Make sure umem is ready before it can be seen by others */
132462306a36Sopenharmony_ci		smp_wmb();
132562306a36Sopenharmony_ci		WRITE_ONCE(xs->umem, umem);
132662306a36Sopenharmony_ci		mutex_unlock(&xs->mutex);
132762306a36Sopenharmony_ci		return 0;
132862306a36Sopenharmony_ci	}
132962306a36Sopenharmony_ci	case XDP_UMEM_FILL_RING:
133062306a36Sopenharmony_ci	case XDP_UMEM_COMPLETION_RING:
133162306a36Sopenharmony_ci	{
133262306a36Sopenharmony_ci		struct xsk_queue **q;
133362306a36Sopenharmony_ci		int entries;
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci		if (copy_from_sockptr(&entries, optval, sizeof(entries)))
133662306a36Sopenharmony_ci			return -EFAULT;
133762306a36Sopenharmony_ci
133862306a36Sopenharmony_ci		mutex_lock(&xs->mutex);
133962306a36Sopenharmony_ci		if (xs->state != XSK_READY) {
134062306a36Sopenharmony_ci			mutex_unlock(&xs->mutex);
134162306a36Sopenharmony_ci			return -EBUSY;
134262306a36Sopenharmony_ci		}
134362306a36Sopenharmony_ci
134462306a36Sopenharmony_ci		q = (optname == XDP_UMEM_FILL_RING) ? &xs->fq_tmp :
134562306a36Sopenharmony_ci			&xs->cq_tmp;
134662306a36Sopenharmony_ci		err = xsk_init_queue(entries, q, true);
134762306a36Sopenharmony_ci		mutex_unlock(&xs->mutex);
134862306a36Sopenharmony_ci		return err;
134962306a36Sopenharmony_ci	}
135062306a36Sopenharmony_ci	default:
135162306a36Sopenharmony_ci		break;
135262306a36Sopenharmony_ci	}
135362306a36Sopenharmony_ci
135462306a36Sopenharmony_ci	return -ENOPROTOOPT;
135562306a36Sopenharmony_ci}
135662306a36Sopenharmony_ci
135762306a36Sopenharmony_cistatic void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring)
135862306a36Sopenharmony_ci{
135962306a36Sopenharmony_ci	ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
136062306a36Sopenharmony_ci	ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
136162306a36Sopenharmony_ci	ring->desc = offsetof(struct xdp_rxtx_ring, desc);
136262306a36Sopenharmony_ci}
136362306a36Sopenharmony_ci
136462306a36Sopenharmony_cistatic void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring)
136562306a36Sopenharmony_ci{
136662306a36Sopenharmony_ci	ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer);
136762306a36Sopenharmony_ci	ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
136862306a36Sopenharmony_ci	ring->desc = offsetof(struct xdp_umem_ring, desc);
136962306a36Sopenharmony_ci}
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_cistruct xdp_statistics_v1 {
137262306a36Sopenharmony_ci	__u64 rx_dropped;
137362306a36Sopenharmony_ci	__u64 rx_invalid_descs;
137462306a36Sopenharmony_ci	__u64 tx_invalid_descs;
137562306a36Sopenharmony_ci};
137662306a36Sopenharmony_ci
137762306a36Sopenharmony_cistatic int xsk_getsockopt(struct socket *sock, int level, int optname,
137862306a36Sopenharmony_ci			  char __user *optval, int __user *optlen)
137962306a36Sopenharmony_ci{
138062306a36Sopenharmony_ci	struct sock *sk = sock->sk;
138162306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(sk);
138262306a36Sopenharmony_ci	int len;
138362306a36Sopenharmony_ci
138462306a36Sopenharmony_ci	if (level != SOL_XDP)
138562306a36Sopenharmony_ci		return -ENOPROTOOPT;
138662306a36Sopenharmony_ci
138762306a36Sopenharmony_ci	if (get_user(len, optlen))
138862306a36Sopenharmony_ci		return -EFAULT;
138962306a36Sopenharmony_ci	if (len < 0)
139062306a36Sopenharmony_ci		return -EINVAL;
139162306a36Sopenharmony_ci
139262306a36Sopenharmony_ci	switch (optname) {
139362306a36Sopenharmony_ci	case XDP_STATISTICS:
139462306a36Sopenharmony_ci	{
139562306a36Sopenharmony_ci		struct xdp_statistics stats = {};
139662306a36Sopenharmony_ci		bool extra_stats = true;
139762306a36Sopenharmony_ci		size_t stats_size;
139862306a36Sopenharmony_ci
139962306a36Sopenharmony_ci		if (len < sizeof(struct xdp_statistics_v1)) {
140062306a36Sopenharmony_ci			return -EINVAL;
140162306a36Sopenharmony_ci		} else if (len < sizeof(stats)) {
140262306a36Sopenharmony_ci			extra_stats = false;
140362306a36Sopenharmony_ci			stats_size = sizeof(struct xdp_statistics_v1);
140462306a36Sopenharmony_ci		} else {
140562306a36Sopenharmony_ci			stats_size = sizeof(stats);
140662306a36Sopenharmony_ci		}
140762306a36Sopenharmony_ci
140862306a36Sopenharmony_ci		mutex_lock(&xs->mutex);
140962306a36Sopenharmony_ci		stats.rx_dropped = xs->rx_dropped;
141062306a36Sopenharmony_ci		if (extra_stats) {
141162306a36Sopenharmony_ci			stats.rx_ring_full = xs->rx_queue_full;
141262306a36Sopenharmony_ci			stats.rx_fill_ring_empty_descs =
141362306a36Sopenharmony_ci				xs->pool ? xskq_nb_queue_empty_descs(xs->pool->fq) : 0;
141462306a36Sopenharmony_ci			stats.tx_ring_empty_descs = xskq_nb_queue_empty_descs(xs->tx);
141562306a36Sopenharmony_ci		} else {
141662306a36Sopenharmony_ci			stats.rx_dropped += xs->rx_queue_full;
141762306a36Sopenharmony_ci		}
141862306a36Sopenharmony_ci		stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx);
141962306a36Sopenharmony_ci		stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx);
142062306a36Sopenharmony_ci		mutex_unlock(&xs->mutex);
142162306a36Sopenharmony_ci
142262306a36Sopenharmony_ci		if (copy_to_user(optval, &stats, stats_size))
142362306a36Sopenharmony_ci			return -EFAULT;
142462306a36Sopenharmony_ci		if (put_user(stats_size, optlen))
142562306a36Sopenharmony_ci			return -EFAULT;
142662306a36Sopenharmony_ci
142762306a36Sopenharmony_ci		return 0;
142862306a36Sopenharmony_ci	}
142962306a36Sopenharmony_ci	case XDP_MMAP_OFFSETS:
143062306a36Sopenharmony_ci	{
143162306a36Sopenharmony_ci		struct xdp_mmap_offsets off;
143262306a36Sopenharmony_ci		struct xdp_mmap_offsets_v1 off_v1;
143362306a36Sopenharmony_ci		bool flags_supported = true;
143462306a36Sopenharmony_ci		void *to_copy;
143562306a36Sopenharmony_ci
143662306a36Sopenharmony_ci		if (len < sizeof(off_v1))
143762306a36Sopenharmony_ci			return -EINVAL;
143862306a36Sopenharmony_ci		else if (len < sizeof(off))
143962306a36Sopenharmony_ci			flags_supported = false;
144062306a36Sopenharmony_ci
144162306a36Sopenharmony_ci		if (flags_supported) {
144262306a36Sopenharmony_ci			/* xdp_ring_offset is identical to xdp_ring_offset_v1
144362306a36Sopenharmony_ci			 * except for the flags field added to the end.
144462306a36Sopenharmony_ci			 */
144562306a36Sopenharmony_ci			xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
144662306a36Sopenharmony_ci					       &off.rx);
144762306a36Sopenharmony_ci			xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
144862306a36Sopenharmony_ci					       &off.tx);
144962306a36Sopenharmony_ci			xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
145062306a36Sopenharmony_ci					       &off.fr);
145162306a36Sopenharmony_ci			xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
145262306a36Sopenharmony_ci					       &off.cr);
145362306a36Sopenharmony_ci			off.rx.flags = offsetof(struct xdp_rxtx_ring,
145462306a36Sopenharmony_ci						ptrs.flags);
145562306a36Sopenharmony_ci			off.tx.flags = offsetof(struct xdp_rxtx_ring,
145662306a36Sopenharmony_ci						ptrs.flags);
145762306a36Sopenharmony_ci			off.fr.flags = offsetof(struct xdp_umem_ring,
145862306a36Sopenharmony_ci						ptrs.flags);
145962306a36Sopenharmony_ci			off.cr.flags = offsetof(struct xdp_umem_ring,
146062306a36Sopenharmony_ci						ptrs.flags);
146162306a36Sopenharmony_ci
146262306a36Sopenharmony_ci			len = sizeof(off);
146362306a36Sopenharmony_ci			to_copy = &off;
146462306a36Sopenharmony_ci		} else {
146562306a36Sopenharmony_ci			xsk_enter_rxtx_offsets(&off_v1.rx);
146662306a36Sopenharmony_ci			xsk_enter_rxtx_offsets(&off_v1.tx);
146762306a36Sopenharmony_ci			xsk_enter_umem_offsets(&off_v1.fr);
146862306a36Sopenharmony_ci			xsk_enter_umem_offsets(&off_v1.cr);
146962306a36Sopenharmony_ci
147062306a36Sopenharmony_ci			len = sizeof(off_v1);
147162306a36Sopenharmony_ci			to_copy = &off_v1;
147262306a36Sopenharmony_ci		}
147362306a36Sopenharmony_ci
147462306a36Sopenharmony_ci		if (copy_to_user(optval, to_copy, len))
147562306a36Sopenharmony_ci			return -EFAULT;
147662306a36Sopenharmony_ci		if (put_user(len, optlen))
147762306a36Sopenharmony_ci			return -EFAULT;
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci		return 0;
148062306a36Sopenharmony_ci	}
148162306a36Sopenharmony_ci	case XDP_OPTIONS:
148262306a36Sopenharmony_ci	{
148362306a36Sopenharmony_ci		struct xdp_options opts = {};
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_ci		if (len < sizeof(opts))
148662306a36Sopenharmony_ci			return -EINVAL;
148762306a36Sopenharmony_ci
148862306a36Sopenharmony_ci		mutex_lock(&xs->mutex);
148962306a36Sopenharmony_ci		if (xs->zc)
149062306a36Sopenharmony_ci			opts.flags |= XDP_OPTIONS_ZEROCOPY;
149162306a36Sopenharmony_ci		mutex_unlock(&xs->mutex);
149262306a36Sopenharmony_ci
149362306a36Sopenharmony_ci		len = sizeof(opts);
149462306a36Sopenharmony_ci		if (copy_to_user(optval, &opts, len))
149562306a36Sopenharmony_ci			return -EFAULT;
149662306a36Sopenharmony_ci		if (put_user(len, optlen))
149762306a36Sopenharmony_ci			return -EFAULT;
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_ci		return 0;
150062306a36Sopenharmony_ci	}
150162306a36Sopenharmony_ci	default:
150262306a36Sopenharmony_ci		break;
150362306a36Sopenharmony_ci	}
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_ci	return -EOPNOTSUPP;
150662306a36Sopenharmony_ci}
150762306a36Sopenharmony_ci
150862306a36Sopenharmony_cistatic int xsk_mmap(struct file *file, struct socket *sock,
150962306a36Sopenharmony_ci		    struct vm_area_struct *vma)
151062306a36Sopenharmony_ci{
151162306a36Sopenharmony_ci	loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
151262306a36Sopenharmony_ci	unsigned long size = vma->vm_end - vma->vm_start;
151362306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(sock->sk);
151462306a36Sopenharmony_ci	int state = READ_ONCE(xs->state);
151562306a36Sopenharmony_ci	struct xsk_queue *q = NULL;
151662306a36Sopenharmony_ci
151762306a36Sopenharmony_ci	if (state != XSK_READY && state != XSK_BOUND)
151862306a36Sopenharmony_ci		return -EBUSY;
151962306a36Sopenharmony_ci
152062306a36Sopenharmony_ci	if (offset == XDP_PGOFF_RX_RING) {
152162306a36Sopenharmony_ci		q = READ_ONCE(xs->rx);
152262306a36Sopenharmony_ci	} else if (offset == XDP_PGOFF_TX_RING) {
152362306a36Sopenharmony_ci		q = READ_ONCE(xs->tx);
152462306a36Sopenharmony_ci	} else {
152562306a36Sopenharmony_ci		/* Matches the smp_wmb() in XDP_UMEM_REG */
152662306a36Sopenharmony_ci		smp_rmb();
152762306a36Sopenharmony_ci		if (offset == XDP_UMEM_PGOFF_FILL_RING)
152862306a36Sopenharmony_ci			q = state == XSK_READY ? READ_ONCE(xs->fq_tmp) :
152962306a36Sopenharmony_ci						 READ_ONCE(xs->pool->fq);
153062306a36Sopenharmony_ci		else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
153162306a36Sopenharmony_ci			q = state == XSK_READY ? READ_ONCE(xs->cq_tmp) :
153262306a36Sopenharmony_ci						 READ_ONCE(xs->pool->cq);
153362306a36Sopenharmony_ci	}
153462306a36Sopenharmony_ci
153562306a36Sopenharmony_ci	if (!q)
153662306a36Sopenharmony_ci		return -EINVAL;
153762306a36Sopenharmony_ci
153862306a36Sopenharmony_ci	/* Matches the smp_wmb() in xsk_init_queue */
153962306a36Sopenharmony_ci	smp_rmb();
154062306a36Sopenharmony_ci	if (size > q->ring_vmalloc_size)
154162306a36Sopenharmony_ci		return -EINVAL;
154262306a36Sopenharmony_ci
154362306a36Sopenharmony_ci	return remap_vmalloc_range(vma, q->ring, 0);
154462306a36Sopenharmony_ci}
154562306a36Sopenharmony_ci
154662306a36Sopenharmony_cistatic int xsk_notifier(struct notifier_block *this,
154762306a36Sopenharmony_ci			unsigned long msg, void *ptr)
154862306a36Sopenharmony_ci{
154962306a36Sopenharmony_ci	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
155062306a36Sopenharmony_ci	struct net *net = dev_net(dev);
155162306a36Sopenharmony_ci	struct sock *sk;
155262306a36Sopenharmony_ci
155362306a36Sopenharmony_ci	switch (msg) {
155462306a36Sopenharmony_ci	case NETDEV_UNREGISTER:
155562306a36Sopenharmony_ci		mutex_lock(&net->xdp.lock);
155662306a36Sopenharmony_ci		sk_for_each(sk, &net->xdp.list) {
155762306a36Sopenharmony_ci			struct xdp_sock *xs = xdp_sk(sk);
155862306a36Sopenharmony_ci
155962306a36Sopenharmony_ci			mutex_lock(&xs->mutex);
156062306a36Sopenharmony_ci			if (xs->dev == dev) {
156162306a36Sopenharmony_ci				sk->sk_err = ENETDOWN;
156262306a36Sopenharmony_ci				if (!sock_flag(sk, SOCK_DEAD))
156362306a36Sopenharmony_ci					sk_error_report(sk);
156462306a36Sopenharmony_ci
156562306a36Sopenharmony_ci				xsk_unbind_dev(xs);
156662306a36Sopenharmony_ci
156762306a36Sopenharmony_ci				/* Clear device references. */
156862306a36Sopenharmony_ci				xp_clear_dev(xs->pool);
156962306a36Sopenharmony_ci			}
157062306a36Sopenharmony_ci			mutex_unlock(&xs->mutex);
157162306a36Sopenharmony_ci		}
157262306a36Sopenharmony_ci		mutex_unlock(&net->xdp.lock);
157362306a36Sopenharmony_ci		break;
157462306a36Sopenharmony_ci	}
157562306a36Sopenharmony_ci	return NOTIFY_DONE;
157662306a36Sopenharmony_ci}
157762306a36Sopenharmony_ci
157862306a36Sopenharmony_cistatic struct proto xsk_proto = {
157962306a36Sopenharmony_ci	.name =		"XDP",
158062306a36Sopenharmony_ci	.owner =	THIS_MODULE,
158162306a36Sopenharmony_ci	.obj_size =	sizeof(struct xdp_sock),
158262306a36Sopenharmony_ci};
158362306a36Sopenharmony_ci
158462306a36Sopenharmony_cistatic const struct proto_ops xsk_proto_ops = {
158562306a36Sopenharmony_ci	.family		= PF_XDP,
158662306a36Sopenharmony_ci	.owner		= THIS_MODULE,
158762306a36Sopenharmony_ci	.release	= xsk_release,
158862306a36Sopenharmony_ci	.bind		= xsk_bind,
158962306a36Sopenharmony_ci	.connect	= sock_no_connect,
159062306a36Sopenharmony_ci	.socketpair	= sock_no_socketpair,
159162306a36Sopenharmony_ci	.accept		= sock_no_accept,
159262306a36Sopenharmony_ci	.getname	= sock_no_getname,
159362306a36Sopenharmony_ci	.poll		= xsk_poll,
159462306a36Sopenharmony_ci	.ioctl		= sock_no_ioctl,
159562306a36Sopenharmony_ci	.listen		= sock_no_listen,
159662306a36Sopenharmony_ci	.shutdown	= sock_no_shutdown,
159762306a36Sopenharmony_ci	.setsockopt	= xsk_setsockopt,
159862306a36Sopenharmony_ci	.getsockopt	= xsk_getsockopt,
159962306a36Sopenharmony_ci	.sendmsg	= xsk_sendmsg,
160062306a36Sopenharmony_ci	.recvmsg	= xsk_recvmsg,
160162306a36Sopenharmony_ci	.mmap		= xsk_mmap,
160262306a36Sopenharmony_ci};
160362306a36Sopenharmony_ci
160462306a36Sopenharmony_cistatic void xsk_destruct(struct sock *sk)
160562306a36Sopenharmony_ci{
160662306a36Sopenharmony_ci	struct xdp_sock *xs = xdp_sk(sk);
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_ci	if (!sock_flag(sk, SOCK_DEAD))
160962306a36Sopenharmony_ci		return;
161062306a36Sopenharmony_ci
161162306a36Sopenharmony_ci	if (!xp_put_pool(xs->pool))
161262306a36Sopenharmony_ci		xdp_put_umem(xs->umem, !xs->pool);
161362306a36Sopenharmony_ci}
161462306a36Sopenharmony_ci
161562306a36Sopenharmony_cistatic int xsk_create(struct net *net, struct socket *sock, int protocol,
161662306a36Sopenharmony_ci		      int kern)
161762306a36Sopenharmony_ci{
161862306a36Sopenharmony_ci	struct xdp_sock *xs;
161962306a36Sopenharmony_ci	struct sock *sk;
162062306a36Sopenharmony_ci
162162306a36Sopenharmony_ci	if (!ns_capable(net->user_ns, CAP_NET_RAW))
162262306a36Sopenharmony_ci		return -EPERM;
162362306a36Sopenharmony_ci	if (sock->type != SOCK_RAW)
162462306a36Sopenharmony_ci		return -ESOCKTNOSUPPORT;
162562306a36Sopenharmony_ci
162662306a36Sopenharmony_ci	if (protocol)
162762306a36Sopenharmony_ci		return -EPROTONOSUPPORT;
162862306a36Sopenharmony_ci
162962306a36Sopenharmony_ci	sock->state = SS_UNCONNECTED;
163062306a36Sopenharmony_ci
163162306a36Sopenharmony_ci	sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern);
163262306a36Sopenharmony_ci	if (!sk)
163362306a36Sopenharmony_ci		return -ENOBUFS;
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_ci	sock->ops = &xsk_proto_ops;
163662306a36Sopenharmony_ci
163762306a36Sopenharmony_ci	sock_init_data(sock, sk);
163862306a36Sopenharmony_ci
163962306a36Sopenharmony_ci	sk->sk_family = PF_XDP;
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_ci	sk->sk_destruct = xsk_destruct;
164262306a36Sopenharmony_ci
164362306a36Sopenharmony_ci	sock_set_flag(sk, SOCK_RCU_FREE);
164462306a36Sopenharmony_ci
164562306a36Sopenharmony_ci	xs = xdp_sk(sk);
164662306a36Sopenharmony_ci	xs->state = XSK_READY;
164762306a36Sopenharmony_ci	mutex_init(&xs->mutex);
164862306a36Sopenharmony_ci	spin_lock_init(&xs->rx_lock);
164962306a36Sopenharmony_ci
165062306a36Sopenharmony_ci	INIT_LIST_HEAD(&xs->map_list);
165162306a36Sopenharmony_ci	spin_lock_init(&xs->map_list_lock);
165262306a36Sopenharmony_ci
165362306a36Sopenharmony_ci	mutex_lock(&net->xdp.lock);
165462306a36Sopenharmony_ci	sk_add_node_rcu(sk, &net->xdp.list);
165562306a36Sopenharmony_ci	mutex_unlock(&net->xdp.lock);
165662306a36Sopenharmony_ci
165762306a36Sopenharmony_ci	sock_prot_inuse_add(net, &xsk_proto, 1);
165862306a36Sopenharmony_ci
165962306a36Sopenharmony_ci	return 0;
166062306a36Sopenharmony_ci}
166162306a36Sopenharmony_ci
166262306a36Sopenharmony_cistatic const struct net_proto_family xsk_family_ops = {
166362306a36Sopenharmony_ci	.family = PF_XDP,
166462306a36Sopenharmony_ci	.create = xsk_create,
166562306a36Sopenharmony_ci	.owner	= THIS_MODULE,
166662306a36Sopenharmony_ci};
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_cistatic struct notifier_block xsk_netdev_notifier = {
166962306a36Sopenharmony_ci	.notifier_call	= xsk_notifier,
167062306a36Sopenharmony_ci};
167162306a36Sopenharmony_ci
167262306a36Sopenharmony_cistatic int __net_init xsk_net_init(struct net *net)
167362306a36Sopenharmony_ci{
167462306a36Sopenharmony_ci	mutex_init(&net->xdp.lock);
167562306a36Sopenharmony_ci	INIT_HLIST_HEAD(&net->xdp.list);
167662306a36Sopenharmony_ci	return 0;
167762306a36Sopenharmony_ci}
167862306a36Sopenharmony_ci
167962306a36Sopenharmony_cistatic void __net_exit xsk_net_exit(struct net *net)
168062306a36Sopenharmony_ci{
168162306a36Sopenharmony_ci	WARN_ON_ONCE(!hlist_empty(&net->xdp.list));
168262306a36Sopenharmony_ci}
168362306a36Sopenharmony_ci
168462306a36Sopenharmony_cistatic struct pernet_operations xsk_net_ops = {
168562306a36Sopenharmony_ci	.init = xsk_net_init,
168662306a36Sopenharmony_ci	.exit = xsk_net_exit,
168762306a36Sopenharmony_ci};
168862306a36Sopenharmony_ci
168962306a36Sopenharmony_cistatic int __init xsk_init(void)
169062306a36Sopenharmony_ci{
169162306a36Sopenharmony_ci	int err, cpu;
169262306a36Sopenharmony_ci
169362306a36Sopenharmony_ci	err = proto_register(&xsk_proto, 0 /* no slab */);
169462306a36Sopenharmony_ci	if (err)
169562306a36Sopenharmony_ci		goto out;
169662306a36Sopenharmony_ci
169762306a36Sopenharmony_ci	err = sock_register(&xsk_family_ops);
169862306a36Sopenharmony_ci	if (err)
169962306a36Sopenharmony_ci		goto out_proto;
170062306a36Sopenharmony_ci
170162306a36Sopenharmony_ci	err = register_pernet_subsys(&xsk_net_ops);
170262306a36Sopenharmony_ci	if (err)
170362306a36Sopenharmony_ci		goto out_sk;
170462306a36Sopenharmony_ci
170562306a36Sopenharmony_ci	err = register_netdevice_notifier(&xsk_netdev_notifier);
170662306a36Sopenharmony_ci	if (err)
170762306a36Sopenharmony_ci		goto out_pernet;
170862306a36Sopenharmony_ci
170962306a36Sopenharmony_ci	for_each_possible_cpu(cpu)
171062306a36Sopenharmony_ci		INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
171162306a36Sopenharmony_ci	return 0;
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ciout_pernet:
171462306a36Sopenharmony_ci	unregister_pernet_subsys(&xsk_net_ops);
171562306a36Sopenharmony_ciout_sk:
171662306a36Sopenharmony_ci	sock_unregister(PF_XDP);
171762306a36Sopenharmony_ciout_proto:
171862306a36Sopenharmony_ci	proto_unregister(&xsk_proto);
171962306a36Sopenharmony_ciout:
172062306a36Sopenharmony_ci	return err;
172162306a36Sopenharmony_ci}
172262306a36Sopenharmony_ci
172362306a36Sopenharmony_cifs_initcall(xsk_init);
1724