162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/* XSKMAP used for AF_XDP sockets
362306a36Sopenharmony_ci * Copyright(c) 2018 Intel Corporation.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/bpf.h>
762306a36Sopenharmony_ci#include <linux/filter.h>
862306a36Sopenharmony_ci#include <net/xdp_sock.h>
962306a36Sopenharmony_ci#include <linux/slab.h>
1062306a36Sopenharmony_ci#include <linux/sched.h>
1162306a36Sopenharmony_ci#include <linux/btf_ids.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include "xsk.h"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_cistatic struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
1662306a36Sopenharmony_ci					       struct xdp_sock __rcu **map_entry)
1762306a36Sopenharmony_ci{
1862306a36Sopenharmony_ci	struct xsk_map_node *node;
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci	node = bpf_map_kzalloc(&map->map, sizeof(*node),
2162306a36Sopenharmony_ci			       GFP_ATOMIC | __GFP_NOWARN);
2262306a36Sopenharmony_ci	if (!node)
2362306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci	bpf_map_inc(&map->map);
2662306a36Sopenharmony_ci	atomic_inc(&map->count);
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci	node->map = map;
2962306a36Sopenharmony_ci	node->map_entry = map_entry;
3062306a36Sopenharmony_ci	return node;
3162306a36Sopenharmony_ci}
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_cistatic void xsk_map_node_free(struct xsk_map_node *node)
3462306a36Sopenharmony_ci{
3562306a36Sopenharmony_ci	struct xsk_map *map = node->map;
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci	bpf_map_put(&node->map->map);
3862306a36Sopenharmony_ci	kfree(node);
3962306a36Sopenharmony_ci	atomic_dec(&map->count);
4062306a36Sopenharmony_ci}
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cistatic void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
4362306a36Sopenharmony_ci{
4462306a36Sopenharmony_ci	spin_lock_bh(&xs->map_list_lock);
4562306a36Sopenharmony_ci	list_add_tail(&node->node, &xs->map_list);
4662306a36Sopenharmony_ci	spin_unlock_bh(&xs->map_list_lock);
4762306a36Sopenharmony_ci}
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_cistatic void xsk_map_sock_delete(struct xdp_sock *xs,
5062306a36Sopenharmony_ci				struct xdp_sock __rcu **map_entry)
5162306a36Sopenharmony_ci{
5262306a36Sopenharmony_ci	struct xsk_map_node *n, *tmp;
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	spin_lock_bh(&xs->map_list_lock);
5562306a36Sopenharmony_ci	list_for_each_entry_safe(n, tmp, &xs->map_list, node) {
5662306a36Sopenharmony_ci		if (map_entry == n->map_entry) {
5762306a36Sopenharmony_ci			list_del(&n->node);
5862306a36Sopenharmony_ci			xsk_map_node_free(n);
5962306a36Sopenharmony_ci		}
6062306a36Sopenharmony_ci	}
6162306a36Sopenharmony_ci	spin_unlock_bh(&xs->map_list_lock);
6262306a36Sopenharmony_ci}
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_cistatic struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
6562306a36Sopenharmony_ci{
6662306a36Sopenharmony_ci	struct xsk_map *m;
6762306a36Sopenharmony_ci	int numa_node;
6862306a36Sopenharmony_ci	u64 size;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	if (attr->max_entries == 0 || attr->key_size != 4 ||
7162306a36Sopenharmony_ci	    attr->value_size != 4 ||
7262306a36Sopenharmony_ci	    attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
7362306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	numa_node = bpf_map_attr_numa_node(attr);
7662306a36Sopenharmony_ci	size = struct_size(m, xsk_map, attr->max_entries);
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	m = bpf_map_area_alloc(size, numa_node);
7962306a36Sopenharmony_ci	if (!m)
8062306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	bpf_map_init_from_attr(&m->map, attr);
8362306a36Sopenharmony_ci	spin_lock_init(&m->lock);
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	return &m->map;
8662306a36Sopenharmony_ci}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_cistatic u64 xsk_map_mem_usage(const struct bpf_map *map)
8962306a36Sopenharmony_ci{
9062306a36Sopenharmony_ci	struct xsk_map *m = container_of(map, struct xsk_map, map);
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	return struct_size(m, xsk_map, map->max_entries) +
9362306a36Sopenharmony_ci		   (u64)atomic_read(&m->count) * sizeof(struct xsk_map_node);
9462306a36Sopenharmony_ci}
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_cistatic void xsk_map_free(struct bpf_map *map)
9762306a36Sopenharmony_ci{
9862306a36Sopenharmony_ci	struct xsk_map *m = container_of(map, struct xsk_map, map);
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	synchronize_net();
10162306a36Sopenharmony_ci	bpf_map_area_free(m);
10262306a36Sopenharmony_ci}
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_cistatic int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
10562306a36Sopenharmony_ci{
10662306a36Sopenharmony_ci	struct xsk_map *m = container_of(map, struct xsk_map, map);
10762306a36Sopenharmony_ci	u32 index = key ? *(u32 *)key : U32_MAX;
10862306a36Sopenharmony_ci	u32 *next = next_key;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	if (index >= m->map.max_entries) {
11162306a36Sopenharmony_ci		*next = 0;
11262306a36Sopenharmony_ci		return 0;
11362306a36Sopenharmony_ci	}
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	if (index == m->map.max_entries - 1)
11662306a36Sopenharmony_ci		return -ENOENT;
11762306a36Sopenharmony_ci	*next = index + 1;
11862306a36Sopenharmony_ci	return 0;
11962306a36Sopenharmony_ci}
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_cistatic int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
12262306a36Sopenharmony_ci{
12362306a36Sopenharmony_ci	const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
12462306a36Sopenharmony_ci	struct bpf_insn *insn = insn_buf;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
12762306a36Sopenharmony_ci	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
12862306a36Sopenharmony_ci	*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *)));
12962306a36Sopenharmony_ci	*insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map));
13062306a36Sopenharmony_ci	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp);
13162306a36Sopenharmony_ci	*insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0);
13262306a36Sopenharmony_ci	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
13362306a36Sopenharmony_ci	*insn++ = BPF_MOV64_IMM(ret, 0);
13462306a36Sopenharmony_ci	return insn - insn_buf;
13562306a36Sopenharmony_ci}
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
13862306a36Sopenharmony_ci * by local_bh_disable() (from XDP calls inside NAPI). The
13962306a36Sopenharmony_ci * rcu_read_lock_bh_held() below makes lockdep accept both.
14062306a36Sopenharmony_ci */
14162306a36Sopenharmony_cistatic void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
14262306a36Sopenharmony_ci{
14362306a36Sopenharmony_ci	struct xsk_map *m = container_of(map, struct xsk_map, map);
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	if (key >= map->max_entries)
14662306a36Sopenharmony_ci		return NULL;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	return rcu_dereference_check(m->xsk_map[key], rcu_read_lock_bh_held());
14962306a36Sopenharmony_ci}
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_cistatic void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
15262306a36Sopenharmony_ci{
15362306a36Sopenharmony_ci	return __xsk_map_lookup_elem(map, *(u32 *)key);
15462306a36Sopenharmony_ci}
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_cistatic void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
15762306a36Sopenharmony_ci{
15862306a36Sopenharmony_ci	return ERR_PTR(-EOPNOTSUPP);
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_cistatic long xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
16262306a36Sopenharmony_ci				u64 map_flags)
16362306a36Sopenharmony_ci{
16462306a36Sopenharmony_ci	struct xsk_map *m = container_of(map, struct xsk_map, map);
16562306a36Sopenharmony_ci	struct xdp_sock __rcu **map_entry;
16662306a36Sopenharmony_ci	struct xdp_sock *xs, *old_xs;
16762306a36Sopenharmony_ci	u32 i = *(u32 *)key, fd = *(u32 *)value;
16862306a36Sopenharmony_ci	struct xsk_map_node *node;
16962306a36Sopenharmony_ci	struct socket *sock;
17062306a36Sopenharmony_ci	int err;
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	if (unlikely(map_flags > BPF_EXIST))
17362306a36Sopenharmony_ci		return -EINVAL;
17462306a36Sopenharmony_ci	if (unlikely(i >= m->map.max_entries))
17562306a36Sopenharmony_ci		return -E2BIG;
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	sock = sockfd_lookup(fd, &err);
17862306a36Sopenharmony_ci	if (!sock)
17962306a36Sopenharmony_ci		return err;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	if (sock->sk->sk_family != PF_XDP) {
18262306a36Sopenharmony_ci		sockfd_put(sock);
18362306a36Sopenharmony_ci		return -EOPNOTSUPP;
18462306a36Sopenharmony_ci	}
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	xs = (struct xdp_sock *)sock->sk;
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	map_entry = &m->xsk_map[i];
18962306a36Sopenharmony_ci	node = xsk_map_node_alloc(m, map_entry);
19062306a36Sopenharmony_ci	if (IS_ERR(node)) {
19162306a36Sopenharmony_ci		sockfd_put(sock);
19262306a36Sopenharmony_ci		return PTR_ERR(node);
19362306a36Sopenharmony_ci	}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	spin_lock_bh(&m->lock);
19662306a36Sopenharmony_ci	old_xs = rcu_dereference_protected(*map_entry, lockdep_is_held(&m->lock));
19762306a36Sopenharmony_ci	if (old_xs == xs) {
19862306a36Sopenharmony_ci		err = 0;
19962306a36Sopenharmony_ci		goto out;
20062306a36Sopenharmony_ci	} else if (old_xs && map_flags == BPF_NOEXIST) {
20162306a36Sopenharmony_ci		err = -EEXIST;
20262306a36Sopenharmony_ci		goto out;
20362306a36Sopenharmony_ci	} else if (!old_xs && map_flags == BPF_EXIST) {
20462306a36Sopenharmony_ci		err = -ENOENT;
20562306a36Sopenharmony_ci		goto out;
20662306a36Sopenharmony_ci	}
20762306a36Sopenharmony_ci	xsk_map_sock_add(xs, node);
20862306a36Sopenharmony_ci	rcu_assign_pointer(*map_entry, xs);
20962306a36Sopenharmony_ci	if (old_xs)
21062306a36Sopenharmony_ci		xsk_map_sock_delete(old_xs, map_entry);
21162306a36Sopenharmony_ci	spin_unlock_bh(&m->lock);
21262306a36Sopenharmony_ci	sockfd_put(sock);
21362306a36Sopenharmony_ci	return 0;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ciout:
21662306a36Sopenharmony_ci	spin_unlock_bh(&m->lock);
21762306a36Sopenharmony_ci	sockfd_put(sock);
21862306a36Sopenharmony_ci	xsk_map_node_free(node);
21962306a36Sopenharmony_ci	return err;
22062306a36Sopenharmony_ci}
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_cistatic long xsk_map_delete_elem(struct bpf_map *map, void *key)
22362306a36Sopenharmony_ci{
22462306a36Sopenharmony_ci	struct xsk_map *m = container_of(map, struct xsk_map, map);
22562306a36Sopenharmony_ci	struct xdp_sock __rcu **map_entry;
22662306a36Sopenharmony_ci	struct xdp_sock *old_xs;
22762306a36Sopenharmony_ci	int k = *(u32 *)key;
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci	if (k >= map->max_entries)
23062306a36Sopenharmony_ci		return -EINVAL;
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	spin_lock_bh(&m->lock);
23362306a36Sopenharmony_ci	map_entry = &m->xsk_map[k];
23462306a36Sopenharmony_ci	old_xs = unrcu_pointer(xchg(map_entry, NULL));
23562306a36Sopenharmony_ci	if (old_xs)
23662306a36Sopenharmony_ci		xsk_map_sock_delete(old_xs, map_entry);
23762306a36Sopenharmony_ci	spin_unlock_bh(&m->lock);
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	return 0;
24062306a36Sopenharmony_ci}
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_cistatic long xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags)
24362306a36Sopenharmony_ci{
24462306a36Sopenharmony_ci	return __bpf_xdp_redirect_map(map, index, flags, 0,
24562306a36Sopenharmony_ci				      __xsk_map_lookup_elem);
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_civoid xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
24962306a36Sopenharmony_ci			     struct xdp_sock __rcu **map_entry)
25062306a36Sopenharmony_ci{
25162306a36Sopenharmony_ci	spin_lock_bh(&map->lock);
25262306a36Sopenharmony_ci	if (rcu_access_pointer(*map_entry) == xs) {
25362306a36Sopenharmony_ci		rcu_assign_pointer(*map_entry, NULL);
25462306a36Sopenharmony_ci		xsk_map_sock_delete(xs, map_entry);
25562306a36Sopenharmony_ci	}
25662306a36Sopenharmony_ci	spin_unlock_bh(&map->lock);
25762306a36Sopenharmony_ci}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_cistatic bool xsk_map_meta_equal(const struct bpf_map *meta0,
26062306a36Sopenharmony_ci			       const struct bpf_map *meta1)
26162306a36Sopenharmony_ci{
26262306a36Sopenharmony_ci	return meta0->max_entries == meta1->max_entries &&
26362306a36Sopenharmony_ci		bpf_map_meta_equal(meta0, meta1);
26462306a36Sopenharmony_ci}
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ciBTF_ID_LIST_SINGLE(xsk_map_btf_ids, struct, xsk_map)
26762306a36Sopenharmony_ciconst struct bpf_map_ops xsk_map_ops = {
26862306a36Sopenharmony_ci	.map_meta_equal = xsk_map_meta_equal,
26962306a36Sopenharmony_ci	.map_alloc = xsk_map_alloc,
27062306a36Sopenharmony_ci	.map_free = xsk_map_free,
27162306a36Sopenharmony_ci	.map_get_next_key = xsk_map_get_next_key,
27262306a36Sopenharmony_ci	.map_lookup_elem = xsk_map_lookup_elem,
27362306a36Sopenharmony_ci	.map_gen_lookup = xsk_map_gen_lookup,
27462306a36Sopenharmony_ci	.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
27562306a36Sopenharmony_ci	.map_update_elem = xsk_map_update_elem,
27662306a36Sopenharmony_ci	.map_delete_elem = xsk_map_delete_elem,
27762306a36Sopenharmony_ci	.map_check_btf = map_check_no_btf,
27862306a36Sopenharmony_ci	.map_mem_usage = xsk_map_mem_usage,
27962306a36Sopenharmony_ci	.map_btf_id = &xsk_map_btf_ids[0],
28062306a36Sopenharmony_ci	.map_redirect = xsk_map_redirect,
28162306a36Sopenharmony_ci};
282