18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* XSKMAP used for AF_XDP sockets 38c2ecf20Sopenharmony_ci * Copyright(c) 2018 Intel Corporation. 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <linux/bpf.h> 78c2ecf20Sopenharmony_ci#include <linux/capability.h> 88c2ecf20Sopenharmony_ci#include <net/xdp_sock.h> 98c2ecf20Sopenharmony_ci#include <linux/slab.h> 108c2ecf20Sopenharmony_ci#include <linux/sched.h> 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include "xsk.h" 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ciint xsk_map_inc(struct xsk_map *map) 158c2ecf20Sopenharmony_ci{ 168c2ecf20Sopenharmony_ci bpf_map_inc(&map->map); 178c2ecf20Sopenharmony_ci return 0; 188c2ecf20Sopenharmony_ci} 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_civoid xsk_map_put(struct xsk_map *map) 218c2ecf20Sopenharmony_ci{ 228c2ecf20Sopenharmony_ci bpf_map_put(&map->map); 238c2ecf20Sopenharmony_ci} 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_cistatic struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, 268c2ecf20Sopenharmony_ci struct xdp_sock **map_entry) 278c2ecf20Sopenharmony_ci{ 288c2ecf20Sopenharmony_ci struct xsk_map_node *node; 298c2ecf20Sopenharmony_ci int err; 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN); 328c2ecf20Sopenharmony_ci if (!node) 338c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci err = xsk_map_inc(map); 368c2ecf20Sopenharmony_ci if (err) { 378c2ecf20Sopenharmony_ci kfree(node); 388c2ecf20Sopenharmony_ci return ERR_PTR(err); 398c2ecf20Sopenharmony_ci } 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci node->map = map; 428c2ecf20Sopenharmony_ci node->map_entry = map_entry; 438c2ecf20Sopenharmony_ci return node; 448c2ecf20Sopenharmony_ci} 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_cistatic void xsk_map_node_free(struct xsk_map_node *node) 478c2ecf20Sopenharmony_ci{ 488c2ecf20Sopenharmony_ci xsk_map_put(node->map); 498c2ecf20Sopenharmony_ci kfree(node); 508c2ecf20Sopenharmony_ci} 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_cistatic void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node) 538c2ecf20Sopenharmony_ci{ 548c2ecf20Sopenharmony_ci spin_lock_bh(&xs->map_list_lock); 558c2ecf20Sopenharmony_ci list_add_tail(&node->node, &xs->map_list); 568c2ecf20Sopenharmony_ci spin_unlock_bh(&xs->map_list_lock); 578c2ecf20Sopenharmony_ci} 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_cistatic void xsk_map_sock_delete(struct xdp_sock *xs, 608c2ecf20Sopenharmony_ci struct xdp_sock **map_entry) 618c2ecf20Sopenharmony_ci{ 628c2ecf20Sopenharmony_ci struct xsk_map_node *n, *tmp; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci spin_lock_bh(&xs->map_list_lock); 658c2ecf20Sopenharmony_ci list_for_each_entry_safe(n, tmp, &xs->map_list, node) { 668c2ecf20Sopenharmony_ci if (map_entry == n->map_entry) { 678c2ecf20Sopenharmony_ci list_del(&n->node); 688c2ecf20Sopenharmony_ci xsk_map_node_free(n); 698c2ecf20Sopenharmony_ci } 708c2ecf20Sopenharmony_ci } 718c2ecf20Sopenharmony_ci spin_unlock_bh(&xs->map_list_lock); 728c2ecf20Sopenharmony_ci} 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_cistatic struct bpf_map *xsk_map_alloc(union bpf_attr *attr) 758c2ecf20Sopenharmony_ci{ 768c2ecf20Sopenharmony_ci struct bpf_map_memory mem; 778c2ecf20Sopenharmony_ci int err, numa_node; 788c2ecf20Sopenharmony_ci struct xsk_map *m; 798c2ecf20Sopenharmony_ci u64 size; 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci if (!capable(CAP_NET_ADMIN)) 828c2ecf20Sopenharmony_ci return ERR_PTR(-EPERM); 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci if (attr->max_entries == 0 || attr->key_size != 4 || 858c2ecf20Sopenharmony_ci attr->value_size != 4 || 868c2ecf20Sopenharmony_ci attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) 878c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci numa_node = bpf_map_attr_numa_node(attr); 908c2ecf20Sopenharmony_ci size = struct_size(m, xsk_map, attr->max_entries); 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci err = bpf_map_charge_init(&mem, size); 938c2ecf20Sopenharmony_ci if (err < 0) 948c2ecf20Sopenharmony_ci return ERR_PTR(err); 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci m = bpf_map_area_alloc(size, numa_node); 978c2ecf20Sopenharmony_ci if (!m) { 988c2ecf20Sopenharmony_ci bpf_map_charge_finish(&mem); 998c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 1008c2ecf20Sopenharmony_ci } 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci bpf_map_init_from_attr(&m->map, attr); 1038c2ecf20Sopenharmony_ci bpf_map_charge_move(&m->map.memory, &mem); 1048c2ecf20Sopenharmony_ci spin_lock_init(&m->lock); 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci return &m->map; 1078c2ecf20Sopenharmony_ci} 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_cistatic void xsk_map_free(struct bpf_map *map) 1108c2ecf20Sopenharmony_ci{ 1118c2ecf20Sopenharmony_ci struct xsk_map *m = container_of(map, struct xsk_map, map); 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci bpf_clear_redirect_map(map); 1148c2ecf20Sopenharmony_ci synchronize_net(); 1158c2ecf20Sopenharmony_ci bpf_map_area_free(m); 1168c2ecf20Sopenharmony_ci} 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_cistatic int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 1198c2ecf20Sopenharmony_ci{ 1208c2ecf20Sopenharmony_ci struct xsk_map *m = container_of(map, struct xsk_map, map); 1218c2ecf20Sopenharmony_ci u32 index = key ? *(u32 *)key : U32_MAX; 1228c2ecf20Sopenharmony_ci u32 *next = next_key; 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci if (index >= m->map.max_entries) { 1258c2ecf20Sopenharmony_ci *next = 0; 1268c2ecf20Sopenharmony_ci return 0; 1278c2ecf20Sopenharmony_ci } 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci if (index == m->map.max_entries - 1) 1308c2ecf20Sopenharmony_ci return -ENOENT; 1318c2ecf20Sopenharmony_ci *next = index + 1; 1328c2ecf20Sopenharmony_ci return 0; 1338c2ecf20Sopenharmony_ci} 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_cistatic int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 1368c2ecf20Sopenharmony_ci{ 1378c2ecf20Sopenharmony_ci const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2; 1388c2ecf20Sopenharmony_ci struct bpf_insn *insn = insn_buf; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 1418c2ecf20Sopenharmony_ci *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 1428c2ecf20Sopenharmony_ci *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *))); 1438c2ecf20Sopenharmony_ci *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map)); 1448c2ecf20Sopenharmony_ci *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp); 1458c2ecf20Sopenharmony_ci *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0); 1468c2ecf20Sopenharmony_ci *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1478c2ecf20Sopenharmony_ci *insn++ = BPF_MOV64_IMM(ret, 0); 1488c2ecf20Sopenharmony_ci return insn - insn_buf; 1498c2ecf20Sopenharmony_ci} 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_cistatic void *xsk_map_lookup_elem(struct bpf_map *map, void *key) 1528c2ecf20Sopenharmony_ci{ 1538c2ecf20Sopenharmony_ci WARN_ON_ONCE(!rcu_read_lock_held()); 1548c2ecf20Sopenharmony_ci return __xsk_map_lookup_elem(map, *(u32 *)key); 1558c2ecf20Sopenharmony_ci} 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_cistatic void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) 1588c2ecf20Sopenharmony_ci{ 1598c2ecf20Sopenharmony_ci return ERR_PTR(-EOPNOTSUPP); 1608c2ecf20Sopenharmony_ci} 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_cistatic int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, 1638c2ecf20Sopenharmony_ci u64 map_flags) 1648c2ecf20Sopenharmony_ci{ 1658c2ecf20Sopenharmony_ci struct xsk_map *m = container_of(map, struct xsk_map, map); 1668c2ecf20Sopenharmony_ci struct xdp_sock *xs, *old_xs, **map_entry; 1678c2ecf20Sopenharmony_ci u32 i = *(u32 *)key, fd = *(u32 *)value; 1688c2ecf20Sopenharmony_ci struct xsk_map_node *node; 1698c2ecf20Sopenharmony_ci struct socket *sock; 1708c2ecf20Sopenharmony_ci int err; 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci if (unlikely(map_flags > BPF_EXIST)) 1738c2ecf20Sopenharmony_ci return -EINVAL; 1748c2ecf20Sopenharmony_ci if (unlikely(i >= m->map.max_entries)) 1758c2ecf20Sopenharmony_ci return -E2BIG; 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci sock = sockfd_lookup(fd, &err); 1788c2ecf20Sopenharmony_ci if (!sock) 1798c2ecf20Sopenharmony_ci return err; 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci if (sock->sk->sk_family != PF_XDP) { 1828c2ecf20Sopenharmony_ci sockfd_put(sock); 1838c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 1848c2ecf20Sopenharmony_ci } 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci xs = (struct xdp_sock *)sock->sk; 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci map_entry = &m->xsk_map[i]; 1898c2ecf20Sopenharmony_ci node = xsk_map_node_alloc(m, map_entry); 1908c2ecf20Sopenharmony_ci if (IS_ERR(node)) { 1918c2ecf20Sopenharmony_ci sockfd_put(sock); 1928c2ecf20Sopenharmony_ci return PTR_ERR(node); 1938c2ecf20Sopenharmony_ci } 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci spin_lock_bh(&m->lock); 1968c2ecf20Sopenharmony_ci old_xs = READ_ONCE(*map_entry); 1978c2ecf20Sopenharmony_ci if (old_xs == xs) { 1988c2ecf20Sopenharmony_ci err = 0; 1998c2ecf20Sopenharmony_ci goto out; 2008c2ecf20Sopenharmony_ci } else if (old_xs && map_flags == BPF_NOEXIST) { 2018c2ecf20Sopenharmony_ci err = -EEXIST; 2028c2ecf20Sopenharmony_ci goto out; 2038c2ecf20Sopenharmony_ci } else if (!old_xs && map_flags == BPF_EXIST) { 2048c2ecf20Sopenharmony_ci err = -ENOENT; 2058c2ecf20Sopenharmony_ci goto out; 2068c2ecf20Sopenharmony_ci } 2078c2ecf20Sopenharmony_ci xsk_map_sock_add(xs, node); 2088c2ecf20Sopenharmony_ci WRITE_ONCE(*map_entry, xs); 2098c2ecf20Sopenharmony_ci if (old_xs) 2108c2ecf20Sopenharmony_ci xsk_map_sock_delete(old_xs, map_entry); 2118c2ecf20Sopenharmony_ci spin_unlock_bh(&m->lock); 2128c2ecf20Sopenharmony_ci sockfd_put(sock); 2138c2ecf20Sopenharmony_ci return 0; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ciout: 2168c2ecf20Sopenharmony_ci spin_unlock_bh(&m->lock); 2178c2ecf20Sopenharmony_ci sockfd_put(sock); 2188c2ecf20Sopenharmony_ci xsk_map_node_free(node); 2198c2ecf20Sopenharmony_ci return err; 2208c2ecf20Sopenharmony_ci} 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_cistatic int xsk_map_delete_elem(struct bpf_map *map, void *key) 2238c2ecf20Sopenharmony_ci{ 2248c2ecf20Sopenharmony_ci struct xsk_map *m = container_of(map, struct xsk_map, map); 2258c2ecf20Sopenharmony_ci struct xdp_sock *old_xs, **map_entry; 2268c2ecf20Sopenharmony_ci int k = *(u32 *)key; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci if (k >= map->max_entries) 2298c2ecf20Sopenharmony_ci return -EINVAL; 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci spin_lock_bh(&m->lock); 2328c2ecf20Sopenharmony_ci map_entry = &m->xsk_map[k]; 2338c2ecf20Sopenharmony_ci old_xs = xchg(map_entry, NULL); 2348c2ecf20Sopenharmony_ci if (old_xs) 2358c2ecf20Sopenharmony_ci xsk_map_sock_delete(old_xs, map_entry); 2368c2ecf20Sopenharmony_ci spin_unlock_bh(&m->lock); 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci return 0; 2398c2ecf20Sopenharmony_ci} 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_civoid xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, 2428c2ecf20Sopenharmony_ci struct xdp_sock **map_entry) 2438c2ecf20Sopenharmony_ci{ 2448c2ecf20Sopenharmony_ci spin_lock_bh(&map->lock); 2458c2ecf20Sopenharmony_ci if (READ_ONCE(*map_entry) == xs) { 2468c2ecf20Sopenharmony_ci WRITE_ONCE(*map_entry, NULL); 2478c2ecf20Sopenharmony_ci xsk_map_sock_delete(xs, map_entry); 2488c2ecf20Sopenharmony_ci } 2498c2ecf20Sopenharmony_ci spin_unlock_bh(&map->lock); 2508c2ecf20Sopenharmony_ci} 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_cistatic bool xsk_map_meta_equal(const struct bpf_map *meta0, 2538c2ecf20Sopenharmony_ci const struct bpf_map *meta1) 2548c2ecf20Sopenharmony_ci{ 2558c2ecf20Sopenharmony_ci return meta0->max_entries == meta1->max_entries && 2568c2ecf20Sopenharmony_ci bpf_map_meta_equal(meta0, meta1); 2578c2ecf20Sopenharmony_ci} 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_cistatic int xsk_map_btf_id; 2608c2ecf20Sopenharmony_ciconst struct bpf_map_ops xsk_map_ops = { 2618c2ecf20Sopenharmony_ci .map_meta_equal = xsk_map_meta_equal, 2628c2ecf20Sopenharmony_ci .map_alloc = xsk_map_alloc, 2638c2ecf20Sopenharmony_ci .map_free = xsk_map_free, 2648c2ecf20Sopenharmony_ci .map_get_next_key = xsk_map_get_next_key, 2658c2ecf20Sopenharmony_ci .map_lookup_elem = xsk_map_lookup_elem, 2668c2ecf20Sopenharmony_ci .map_gen_lookup = xsk_map_gen_lookup, 2678c2ecf20Sopenharmony_ci .map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only, 2688c2ecf20Sopenharmony_ci .map_update_elem = xsk_map_update_elem, 2698c2ecf20Sopenharmony_ci .map_delete_elem = xsk_map_delete_elem, 2708c2ecf20Sopenharmony_ci .map_check_btf = map_check_no_btf, 2718c2ecf20Sopenharmony_ci .map_btf_name = "xsk_map", 2728c2ecf20Sopenharmony_ci .map_btf_id = &xsk_map_btf_id, 2738c2ecf20Sopenharmony_ci}; 274