1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 */
4#include <linux/bpf.h>
5#include <linux/bpf_trace.h>
6#include <linux/bpf_lirc.h>
7#include <linux/bpf_verifier.h>
8#include <linux/btf.h>
9#include <linux/syscalls.h>
10#include <linux/slab.h>
11#include <linux/sched/signal.h>
12#include <linux/vmalloc.h>
13#include <linux/mmzone.h>
14#include <linux/anon_inodes.h>
15#include <linux/fdtable.h>
16#include <linux/file.h>
17#include <linux/fs.h>
18#include <linux/license.h>
19#include <linux/filter.h>
20#include <linux/version.h>
21#include <linux/kernel.h>
22#include <linux/idr.h>
23#include <linux/cred.h>
24#include <linux/timekeeping.h>
25#include <linux/ctype.h>
26#include <linux/nospec.h>
27#include <linux/audit.h>
28#include <uapi/linux/btf.h>
29#include <linux/pgtable.h>
30#include <linux/bpf_lsm.h>
31#include <linux/poll.h>
32#include <linux/bpf-netns.h>
33#include <linux/rcupdate_trace.h>
34
35#define IS_FD_ARRAY(map)                                                                                               \
36    ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||               \
37     (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
38#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
39#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
40#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || IS_FD_HASH(map))
41
42#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
43
44DEFINE_PER_CPU(int, bpf_prog_active);
45static DEFINE_IDR(prog_idr);
46static DEFINE_SPINLOCK(prog_idr_lock);
47static DEFINE_IDR(map_idr);
48static DEFINE_SPINLOCK(map_idr_lock);
49static DEFINE_IDR(link_idr);
50static DEFINE_SPINLOCK(link_idr_lock);
51
52int sysctl_unprivileged_bpf_disabled __read_mostly = IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
53
54static const struct bpf_map_ops *const bpf_map_types[] = {
55#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
56#define BPF_MAP_TYPE(_id, _ops) [_id] = &(_ops),
57#define BPF_LINK_TYPE(_id, _name)
58#include <linux/bpf_types.h>
59#undef BPF_PROG_TYPE
60#undef BPF_MAP_TYPE
61#undef BPF_LINK_TYPE
62};
63
64/*
65 * If we're handed a bigger struct than we know of, ensure all the unknown bits
66 * are 0 - i.e. new user-space does not rely on any kernel feature extensions
67 * we don't know about yet.
68 *
69 * There is a ToCToU between this function call and the following
70 * copy_from_user() call. However, this is not a concern since this function is
71 * meant to be a future-proofing of bits.
72 */
73int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size, size_t actual_size)
74{
75    unsigned char __user *addr = uaddr + expected_size;
76    int res;
77
78    if (unlikely(actual_size > PAGE_SIZE)) { /* silly large */
79        return -E2BIG;
80    }
81
82    if (actual_size <= expected_size) {
83        return 0;
84    }
85
86    res = check_zeroed_user(addr, actual_size - expected_size);
87    if (res < 0) {
88        return res;
89    }
90    return res ? 0 : -E2BIG;
91}
92
93const struct bpf_map_ops bpf_map_offload_ops = {
94    .map_meta_equal = bpf_map_meta_equal,
95    .map_alloc = bpf_map_offload_map_alloc,
96    .map_free = bpf_map_offload_map_free,
97    .map_check_btf = map_check_no_btf,
98};
99
100static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
101{
102    const struct bpf_map_ops *ops;
103    u32 type = attr->map_type;
104    struct bpf_map *map;
105    int err;
106
107    if (type >= ARRAY_SIZE(bpf_map_types)) {
108        return ERR_PTR(-EINVAL);
109    }
110    type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
111    ops = bpf_map_types[type];
112    if (!ops) {
113        return ERR_PTR(-EINVAL);
114    }
115
116    if (ops->map_alloc_check) {
117        err = ops->map_alloc_check(attr);
118        if (err) {
119            return ERR_PTR(err);
120        }
121    }
122    if (attr->map_ifindex) {
123        ops = &bpf_map_offload_ops;
124    }
125    map = ops->map_alloc(attr);
126    if (IS_ERR(map)) {
127        return map;
128    }
129    map->ops = ops;
130    map->map_type = type;
131    return map;
132}
133
134static u32 bpf_map_value_size(struct bpf_map *map)
135{
136    if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
137        map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
138        return round_up(map->value_size, 0x8) * num_possible_cpus();
139    } else if (IS_FD_MAP(map)) {
140        return sizeof(u32);
141    } else {
142        return map->value_size;
143    }
144}
145
146static void maybe_wait_bpf_programs(struct bpf_map *map)
147{
148    /* Wait for any running BPF programs to complete so that
149     * userspace, when we return to it, knows that all programs
150     * that could be running use the new map value.
151     */
152    if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
153        synchronize_rcu();
154    }
155}
156
157static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, void *value, __u64 flags)
158{
159    int err;
160
161    /* Need to create a kthread, thus must support schedule */
162    if (bpf_map_is_dev_bound(map)) {
163        return bpf_map_offload_update_elem(map, key, value, flags);
164    } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
165        return map->ops->map_update_elem(map, key, value, flags);
166    } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || map->map_type == BPF_MAP_TYPE_SOCKMAP) {
167        return sock_map_update_elem_sys(map, key, value, flags);
168    } else if (IS_FD_PROG_ARRAY(map)) {
169        return bpf_fd_array_map_update_elem(map, f.file, key, value, flags);
170    }
171
172    bpf_disable_instrumentation();
173    if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
174        err = bpf_percpu_hash_update(map, key, value, flags);
175    } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
176        err = bpf_percpu_array_update(map, key, value, flags);
177    } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
178        err = bpf_percpu_cgroup_storage_update(map, key, value, flags);
179    } else if (IS_FD_ARRAY(map)) {
180        rcu_read_lock();
181        err = bpf_fd_array_map_update_elem(map, f.file, key, value, flags);
182        rcu_read_unlock();
183    } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
184        rcu_read_lock();
185        err = bpf_fd_htab_map_update_elem(map, f.file, key, value, flags);
186        rcu_read_unlock();
187    } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
188        /* rcu_read_lock() is not needed */
189        err = bpf_fd_reuseport_array_update_elem(map, key, value, flags);
190    } else if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) {
191        err = map->ops->map_push_elem(map, value, flags);
192    } else {
193        rcu_read_lock();
194        err = map->ops->map_update_elem(map, key, value, flags);
195        rcu_read_unlock();
196    }
197    bpf_enable_instrumentation();
198    maybe_wait_bpf_programs(map);
199
200    return err;
201}
202
203static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, __u64 flags)
204{
205    void *ptr;
206    int err;
207
208    if (bpf_map_is_dev_bound(map)) {
209        return bpf_map_offload_lookup_elem(map, key, value);
210    }
211
212    bpf_disable_instrumentation();
213    if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
214        err = bpf_percpu_hash_copy(map, key, value);
215    } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
216        err = bpf_percpu_array_copy(map, key, value);
217    } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
218        err = bpf_percpu_cgroup_storage_copy(map, key, value);
219    } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
220        err = bpf_stackmap_copy(map, key, value);
221    } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
222        err = bpf_fd_array_map_lookup_elem(map, key, value);
223    } else if (IS_FD_HASH(map)) {
224        err = bpf_fd_htab_map_lookup_elem(map, key, value);
225    } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
226        err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
227    } else if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) {
228        err = map->ops->map_peek_elem(map, value);
229    } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
230        /* struct_ops map requires directly updating "value" */
231        err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
232    } else {
233        rcu_read_lock();
234        if (map->ops->map_lookup_elem_sys_only) {
235            ptr = map->ops->map_lookup_elem_sys_only(map, key);
236        } else {
237            ptr = map->ops->map_lookup_elem(map, key);
238        }
239        if (IS_ERR(ptr)) {
240            err = PTR_ERR(ptr);
241        } else if (!ptr) {
242            err = -ENOENT;
243        } else {
244            err = 0;
245            if (flags & BPF_F_LOCK) {
246                /* lock 'ptr' and copy everything but lock */
247                copy_map_value_locked(map, value, ptr, true);
248            } else {
249                copy_map_value(map, value, ptr);
250            }
251            /* mask lock, since value wasn't zero inited */
252            check_and_init_map_lock(map, value);
253        }
254        rcu_read_unlock();
255    }
256
257    bpf_enable_instrumentation();
258    maybe_wait_bpf_programs(map);
259
260    return err;
261}
262
263static void *_bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
264{
265    /* We really just want to fail instead of triggering OOM killer
266     * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
267     * which is used for lower order allocation requests.
268     *
269     * It has been observed that higher order allocation requests done by
270     * vmalloc with __GFP_NORETRY being set might fail due to not trying
271     * to reclaim memory from the page cache, thus we set
272     * __GFP_RETRY_MAYFAIL to avoid such situations.
273     */
274
275    const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO;
276    unsigned int flags = 0;
277    unsigned long align = 1;
278    void *area;
279
280    if (size >= SIZE_MAX) {
281        return NULL;
282    }
283
284    /* kmalloc()'ed memory can't be mmap()'ed */
285    if (mmapable) {
286        BUG_ON(!PAGE_ALIGNED(size));
287        align = SHMLBA;
288        flags = VM_USERMAP;
289    } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
290        area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, numa_node);
291        if (area != NULL) {
292            return area;
293        }
294    }
295
296    return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL,
297                                PAGE_KERNEL, flags, numa_node, __builtin_return_address(0));
298}
299
300void *bpf_map_area_alloc(u64 size, int numa_node)
301{
302    return _bpf_map_area_alloc(size, numa_node, false);
303}
304
305void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
306{
307    return _bpf_map_area_alloc(size, numa_node, true);
308}
309
310void bpf_map_area_free(void *area)
311{
312    kvfree(area);
313}
314
315static u32 bpf_map_flags_retain_permanent(u32 flags)
316{
317    /* Some map creation flags are not tied to the map object but
318     * rather to the map fd instead, so they have no meaning upon
319     * map object inspection since multiple file descriptors with
320     * different (access) properties can exist here. Thus, given
321     * this has zero meaning for the map itself, lets clear these
322     * from here.
323     */
324    return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
325}
326
327void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
328{
329    map->map_type = attr->map_type;
330    map->key_size = attr->key_size;
331    map->value_size = attr->value_size;
332    map->max_entries = attr->max_entries;
333    map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
334    map->numa_node = bpf_map_attr_numa_node(attr);
335}
336
337static int bpf_charge_memlock(struct user_struct *user, u32 pages)
338{
339    unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
340    if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
341        atomic_long_sub(pages, &user->locked_vm);
342        return -EPERM;
343    }
344    return 0;
345}
346
347static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
348{
349    if (user) {
350        atomic_long_sub(pages, &user->locked_vm);
351    }
352}
353
354int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
355{
356    u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
357    struct user_struct *user;
358    int ret;
359
360    if (size >= U32_MAX - PAGE_SIZE) {
361        return -E2BIG;
362    }
363
364    user = get_current_user();
365    ret = bpf_charge_memlock(user, pages);
366    if (ret) {
367        free_uid(user);
368        return ret;
369    }
370
371    mem->pages = pages;
372    mem->user = user;
373
374    return 0;
375}
376
377void bpf_map_charge_finish(struct bpf_map_memory *mem)
378{
379    bpf_uncharge_memlock(mem->user, mem->pages);
380    free_uid(mem->user);
381}
382
383void bpf_map_charge_move(struct bpf_map_memory *dst, struct bpf_map_memory *src)
384{
385    *dst = *src;
386
387    /* Make sure src will not be used for the redundant uncharging. */
388    memset(src, 0, sizeof(struct bpf_map_memory));
389}
390
391int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
392{
393    int ret;
394
395    ret = bpf_charge_memlock(map->memory.user, pages);
396    if (ret) {
397        return ret;
398    }
399    map->memory.pages += pages;
400    return ret;
401}
402
403void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
404{
405    bpf_uncharge_memlock(map->memory.user, pages);
406    map->memory.pages -= pages;
407}
408
409static int bpf_map_alloc_id(struct bpf_map *map)
410{
411    int id;
412
413    idr_preload(GFP_KERNEL);
414    spin_lock_bh(&map_idr_lock);
415    id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
416    if (id > 0) {
417        map->id = id;
418    }
419    spin_unlock_bh(&map_idr_lock);
420    idr_preload_end();
421
422    if (WARN_ON_ONCE(!id)) {
423        return -ENOSPC;
424    }
425
426    return id > 0 ? 0 : id;
427}
428
429void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
430{
431    unsigned long flags;
432
433    /* Offloaded maps are removed from the IDR store when their device
434     * disappears - even if someone holds an fd to them they are unusable,
435     * the memory is gone, all ops will fail; they are simply waiting for
436     * refcnt to drop to be freed.
437     */
438    if (!map->id) {
439        return;
440    }
441
442    if (do_idr_lock) {
443        spin_lock_irqsave(&map_idr_lock, flags);
444    } else {
445        __acquire(&map_idr_lock);
446    }
447
448    idr_remove(&map_idr, map->id);
449    map->id = 0;
450
451    if (do_idr_lock) {
452        spin_unlock_irqrestore(&map_idr_lock, flags);
453    } else {
454        __release(&map_idr_lock);
455    }
456}
457
458/* called from workqueue */
459static void bpf_map_free_deferred(struct work_struct *work)
460{
461    struct bpf_map *map = container_of(work, struct bpf_map, work);
462    struct bpf_map_memory mem;
463
464    bpf_map_charge_move(&mem, &map->memory);
465    security_bpf_map_free(map);
466    /* implementation dependent freeing */
467    map->ops->map_free(map);
468    bpf_map_charge_finish(&mem);
469}
470
471static void bpf_map_put_uref(struct bpf_map *map)
472{
473    if (atomic64_dec_and_test(&map->usercnt)) {
474        if (map->ops->map_release_uref) {
475            map->ops->map_release_uref(map);
476        }
477    }
478}
479
480/* decrement map refcnt and schedule it for freeing via workqueue
481 * (unrelying map implementation ops->map_free() might sleep)
482 */
483static void _bpf_map_put(struct bpf_map *map, bool do_idr_lock)
484{
485    if (atomic64_dec_and_test(&map->refcnt)) {
486        /* bpf_map_free_id() must be called first */
487        bpf_map_free_id(map, do_idr_lock);
488        btf_put(map->btf);
489        INIT_WORK(&map->work, bpf_map_free_deferred);
490        schedule_work(&map->work);
491    }
492}
493
494void bpf_map_put(struct bpf_map *map)
495{
496    _bpf_map_put(map, true);
497}
498EXPORT_SYMBOL_GPL(bpf_map_put);
499
500void bpf_map_put_with_uref(struct bpf_map *map)
501{
502    bpf_map_put_uref(map);
503    bpf_map_put(map);
504}
505
506static int bpf_map_release(struct inode *inode, struct file *filp)
507{
508    struct bpf_map *map = filp->private_data;
509
510    if (map->ops->map_release) {
511        map->ops->map_release(map, filp);
512    }
513
514    bpf_map_put_with_uref(map);
515    return 0;
516}
517
518static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
519{
520    fmode_t mode = f.file->f_mode;
521
522    /* Our file permissions may have been overridden by global
523     * map permissions facing syscall side.
524     */
525    if (READ_ONCE(map->frozen)) {
526        mode &= ~FMODE_CAN_WRITE;
527    }
528    return mode;
529}
530
531#ifdef CONFIG_PROC_FS
532static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
533{
534    const struct bpf_map *map = filp->private_data;
535    const struct bpf_array *array;
536    u32 type = 0, jited = 0;
537
538    if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
539        array = container_of(map, struct bpf_array, map);
540        spin_lock(&array->aux->owner.lock);
541        type = array->aux->owner.type;
542        jited = array->aux->owner.jited;
543        spin_unlock(&array->aux->owner.lock);
544    }
545
546    seq_printf(m,
547               "map_type:\t%u\n"
548               "key_size:\t%u\n"
549               "value_size:\t%u\n"
550               "max_entries:\t%u\n"
551               "map_flags:\t%#x\n"
552               "memlock:\t%llu\n"
553               "map_id:\t%u\n"
554               "frozen:\t%u\n",
555               map->map_type, map->key_size, map->value_size, map->max_entries, map->map_flags,
556               map->memory.pages * 1ULL << PAGE_SHIFT, map->id, READ_ONCE(map->frozen));
557    if (type) {
558        seq_printf(m, "owner_prog_type:\t%u\n", type);
559        seq_printf(m, "owner_jited:\t%u\n", jited);
560    }
561}
562#endif
563
564static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
565{
566    /* We need this handler such that alloc_file() enables
567     * f_mode with FMODE_CAN_READ.
568     */
569    return -EINVAL;
570}
571
572static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, size_t siz, loff_t *ppos)
573{
574    /* We need this handler such that alloc_file() enables
575     * f_mode with FMODE_CAN_WRITE.
576     */
577    return -EINVAL;
578}
579
580/* called for any extra memory-mapped regions (except initial) */
581static void bpf_map_mmap_open(struct vm_area_struct *vma)
582{
583    struct bpf_map *map = vma->vm_file->private_data;
584
585    if (vma->vm_flags & VM_MAYWRITE) {
586        mutex_lock(&map->freeze_mutex);
587        map->writecnt++;
588        mutex_unlock(&map->freeze_mutex);
589    }
590}
591
592/* called for all unmapped memory region (including initial) */
593static void bpf_map_mmap_close(struct vm_area_struct *vma)
594{
595    struct bpf_map *map = vma->vm_file->private_data;
596
597    if (vma->vm_flags & VM_MAYWRITE) {
598        mutex_lock(&map->freeze_mutex);
599        map->writecnt--;
600        mutex_unlock(&map->freeze_mutex);
601    }
602}
603
604static const struct vm_operations_struct bpf_map_default_vmops = {
605    .open = bpf_map_mmap_open,
606    .close = bpf_map_mmap_close,
607};
608
609static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
610{
611    struct bpf_map *map = filp->private_data;
612    int err;
613
614    if (!map->ops->map_mmap || map_value_has_spin_lock(map)) {
615        return -ENOTSUPP;
616    }
617
618    if (!(vma->vm_flags & VM_SHARED)) {
619        return -EINVAL;
620    }
621
622    mutex_lock(&map->freeze_mutex);
623
624    if (vma->vm_flags & VM_WRITE) {
625        if (map->frozen) {
626            err = -EPERM;
627            goto out;
628        }
629        /* map is meant to be read-only, so do not allow mapping as
630         * writable, because it's possible to leak a writable page
631         * reference and allows user-space to still modify it after
632         * freezing, while verifier will assume contents do not change
633         */
634        if (map->map_flags & BPF_F_RDONLY_PROG) {
635            err = -EACCES;
636            goto out;
637        }
638    }
639
640    /* set default open/close callbacks */
641    vma->vm_ops = &bpf_map_default_vmops;
642    vma->vm_private_data = map;
643    vma->vm_flags &= ~VM_MAYEXEC;
644    if (!(vma->vm_flags & VM_WRITE)) {
645        /* disallow re-mapping with PROT_WRITE */
646        vma->vm_flags &= ~VM_MAYWRITE;
647    }
648
649    err = map->ops->map_mmap(map, vma);
650    if (err) {
651        goto out;
652    }
653
654    if (vma->vm_flags & VM_MAYWRITE) {
655        map->writecnt++;
656    }
657out:
658    mutex_unlock(&map->freeze_mutex);
659    return err;
660}
661
662static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts)
663{
664    struct bpf_map *map = filp->private_data;
665
666    if (map->ops->map_poll) {
667        return map->ops->map_poll(map, filp, pts);
668    }
669
670    return EPOLLERR;
671}
672
673const struct file_operations bpf_map_fops = {
674#ifdef CONFIG_PROC_FS
675    .show_fdinfo = bpf_map_show_fdinfo,
676#endif
677    .release = bpf_map_release,
678    .read = bpf_dummy_read,
679    .write = bpf_dummy_write,
680    .mmap = bpf_map_mmap,
681    .poll = bpf_map_poll,
682};
683
684int bpf_map_new_fd(struct bpf_map *map, int flags)
685{
686    int ret;
687
688    ret = security_bpf_map(map, OPEN_FMODE(flags));
689    if (ret < 0) {
690        return ret;
691    }
692
693    return anon_inode_getfd("bpf-map", &bpf_map_fops, map, flags | O_CLOEXEC);
694}
695
696int bpf_get_file_flag(int flags)
697{
698    if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) {
699        return -EINVAL;
700    }
701    if (flags & BPF_F_RDONLY) {
702        return O_RDONLY;
703    }
704    if (flags & BPF_F_WRONLY) {
705        return O_WRONLY;
706    }
707    return O_RDWR;
708}
709
710/* helper macro to check that unused fields 'union bpf_attr' are zero */
711#define CHECK_ATTR(CMD)                                                                                                \
712    memchr_inv((void *)&attr->CMD##_LAST_FIELD + sizeof(attr->CMD##_LAST_FIELD), 0,                                    \
713               sizeof(*attr) - offsetof(union bpf_attr, CMD##_LAST_FIELD) - sizeof(attr->CMD##_LAST_FIELD)) != NULL
714
715/* dst and src must have at least "size" number of bytes.
716 * Return strlen on success and < 0 on error.
717 */
718int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
719{
720    const char *end = src + size;
721    const char *orig_src = src;
722
723    memset(dst, 0, size);
724    /* Copy all isalnum(), '_' and '.' chars. */
725    while (src < end && *src) {
726        if (!isalnum(*src) && *src != '_' && *src != '.') {
727            return -EINVAL;
728        }
729        *dst++ = *src++;
730    }
731
732    /* No '\0' found in "size" number of bytes */
733    if (src == end) {
734        return -EINVAL;
735    }
736
737    return src - orig_src;
738}
739
740int map_check_no_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type,
741                     const struct btf_type *value_type)
742{
743    return -ENOTSUPP;
744}
745
746static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 btf_key_id, u32 btf_value_id)
747{
748    const struct btf_type *key_type, *value_type;
749    u32 key_size, value_size;
750    int ret = 0;
751
752    /* Some maps allow key to be unspecified. */
753    if (btf_key_id) {
754        key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
755        if (!key_type || key_size != map->key_size) {
756            return -EINVAL;
757        }
758    } else {
759        key_type = btf_type_by_id(btf, 0);
760        if (!map->ops->map_check_btf) {
761            return -EINVAL;
762        }
763    }
764
765    value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
766    if (!value_type || value_size != map->value_size) {
767        return -EINVAL;
768    }
769
770    map->spin_lock_off = btf_find_spin_lock(btf, value_type);
771
772    if (map_value_has_spin_lock(map)) {
773        if (map->map_flags & BPF_F_RDONLY_PROG) {
774            return -EACCES;
775        }
776        if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_ARRAY &&
777            map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
778            map->map_type != BPF_MAP_TYPE_INODE_STORAGE) {
779            return -ENOTSUPP;
780        }
781        if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > map->value_size) {
782            WARN_ONCE(1, "verifier bug spin_lock_off %d value_size %d\n", map->spin_lock_off, map->value_size);
783            return -EFAULT;
784        }
785    }
786
787    if (map->ops->map_check_btf) {
788        ret = map->ops->map_check_btf(map, btf, key_type, value_type);
789    }
790
791    return ret;
792}
793
794#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id
795/* called via syscall */
796static int map_create(union bpf_attr *attr)
797{
798    int numa_node = bpf_map_attr_numa_node(attr);
799    struct bpf_map_memory mem;
800    struct bpf_map *map;
801    int f_flags;
802    int err;
803
804    err = CHECK_ATTR(BPF_MAP_CREATE);
805    if (err) {
806        return -EINVAL;
807    }
808
809    if (attr->btf_vmlinux_value_type_id) {
810        if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || attr->btf_key_type_id || attr->btf_value_type_id) {
811            return -EINVAL;
812        }
813    } else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
814        return -EINVAL;
815    }
816
817    f_flags = bpf_get_file_flag(attr->map_flags);
818    if (f_flags < 0) {
819        return f_flags;
820    }
821
822    if (numa_node != NUMA_NO_NODE && ((unsigned int)numa_node >= nr_node_ids || !node_online(numa_node))) {
823        return -EINVAL;
824    }
825
826    /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
827    map = find_and_alloc_map(attr);
828    if (IS_ERR(map)) {
829        return PTR_ERR(map);
830    }
831
832    err = bpf_obj_name_cpy(map->name, attr->map_name, sizeof(attr->map_name));
833    if (err < 0) {
834        goto free_map;
835    }
836
837    atomic64_set(&map->refcnt, 1);
838    atomic64_set(&map->usercnt, 1);
839    mutex_init(&map->freeze_mutex);
840
841    map->spin_lock_off = -EINVAL;
842    if (attr->btf_key_type_id || attr->btf_value_type_id ||
843        /* Even the map's value is a kernel's struct,
844         * the bpf_prog.o must have BTF to begin with
845         * to figure out the corresponding kernel's
846         * counter part.  Thus, attr->btf_fd has
847         * to be valid also.
848         */
849        attr->btf_vmlinux_value_type_id) {
850        struct btf *btf;
851
852        btf = btf_get_by_fd(attr->btf_fd);
853        if (IS_ERR(btf)) {
854            err = PTR_ERR(btf);
855            goto free_map;
856        }
857        map->btf = btf;
858
859        if (attr->btf_value_type_id) {
860            err = map_check_btf(map, btf, attr->btf_key_type_id, attr->btf_value_type_id);
861            if (err) {
862                goto free_map;
863            }
864        }
865
866        map->btf_key_type_id = attr->btf_key_type_id;
867        map->btf_value_type_id = attr->btf_value_type_id;
868        map->btf_vmlinux_value_type_id = attr->btf_vmlinux_value_type_id;
869    }
870
871    err = security_bpf_map_alloc(map);
872    if (err) {
873        goto free_map;
874    }
875
876    err = bpf_map_alloc_id(map);
877    if (err) {
878        goto free_map_sec;
879    }
880
881    err = bpf_map_new_fd(map, f_flags);
882    if (err < 0) {
883        /* failed to allocate fd.
884         * bpf_map_put_with_uref() is needed because the above
885         * bpf_map_alloc_id() has published the map
886         * to the userspace and the userspace may
887         * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
888         */
889        bpf_map_put_with_uref(map);
890        return err;
891    }
892
893    return err;
894
895free_map_sec:
896    security_bpf_map_free(map);
897free_map:
898    btf_put(map->btf);
899    bpf_map_charge_move(&mem, &map->memory);
900    map->ops->map_free(map);
901    bpf_map_charge_finish(&mem);
902    return err;
903}
904
905/* if error is returned, fd is released.
906 * On success caller should complete fd access with matching fdput()
907 */
908struct bpf_map *__bpf_map_get(struct fd f)
909{
910    if (!f.file) {
911        return ERR_PTR(-EBADF);
912    }
913    if (f.file->f_op != &bpf_map_fops) {
914        fdput(f);
915        return ERR_PTR(-EINVAL);
916    }
917
918    return f.file->private_data;
919}
920
921void bpf_map_inc(struct bpf_map *map)
922{
923    atomic64_inc(&map->refcnt);
924}
925EXPORT_SYMBOL_GPL(bpf_map_inc);
926
927void bpf_map_inc_with_uref(struct bpf_map *map)
928{
929    atomic64_inc(&map->refcnt);
930    atomic64_inc(&map->usercnt);
931}
932EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
933
934struct bpf_map *bpf_map_get(u32 ufd)
935{
936    struct fd f = fdget(ufd);
937    struct bpf_map *map;
938
939    map = __bpf_map_get(f);
940    if (IS_ERR(map)) {
941        return map;
942    }
943
944    bpf_map_inc(map);
945    fdput(f);
946
947    return map;
948}
949
950struct bpf_map *bpf_map_get_with_uref(u32 ufd)
951{
952    struct fd f = fdget(ufd);
953    struct bpf_map *map;
954
955    map = __bpf_map_get(f);
956    if (IS_ERR(map)) {
957        return map;
958    }
959
960    bpf_map_inc_with_uref(map);
961    fdput(f);
962
963    return map;
964}
965
966/* map_idr_lock should have been held */
967static struct bpf_map *_bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
968{
969    int refold;
970
971    refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
972    if (!refold) {
973        return ERR_PTR(-ENOENT);
974    }
975    if (uref) {
976        atomic64_inc(&map->usercnt);
977    }
978
979    return map;
980}
981
982struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
983{
984    spin_lock_bh(&map_idr_lock);
985    map = _bpf_map_inc_not_zero(map, false);
986    spin_unlock_bh(&map_idr_lock);
987
988    return map;
989}
990EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
991
992int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
993{
994    return -ENOTSUPP;
995}
996
997static void *__bpf_copy_key(void __user *ukey, u64 key_size)
998{
999    if (key_size) {
1000        return memdup_user(ukey, key_size);
1001    }
1002
1003    if (ukey) {
1004        return ERR_PTR(-EINVAL);
1005    }
1006
1007    return NULL;
1008}
1009
1010/* last field in 'union bpf_attr' used by this command */
1011#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
1012
1013static int map_lookup_elem(union bpf_attr *attr)
1014{
1015    void __user *ukey = u64_to_user_ptr(attr->key);
1016    void __user *uvalue = u64_to_user_ptr(attr->value);
1017    int ufd = attr->map_fd;
1018    struct bpf_map *map;
1019    void *key, *value;
1020    u32 value_size;
1021    struct fd f;
1022    int err;
1023
1024    if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) {
1025        return -EINVAL;
1026    }
1027
1028    if (attr->flags & ~BPF_F_LOCK) {
1029        return -EINVAL;
1030    }
1031
1032    f = fdget(ufd);
1033    map = __bpf_map_get(f);
1034    if (IS_ERR(map)) {
1035        return PTR_ERR(map);
1036    }
1037    if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
1038        err = -EPERM;
1039        goto err_put;
1040    }
1041
1042    if ((attr->flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) {
1043        err = -EINVAL;
1044        goto err_put;
1045    }
1046
1047    key = __bpf_copy_key(ukey, map->key_size);
1048    if (IS_ERR(key)) {
1049        err = PTR_ERR(key);
1050        goto err_put;
1051    }
1052
1053    value_size = bpf_map_value_size(map);
1054
1055    err = -ENOMEM;
1056    value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1057    if (!value) {
1058        goto free_key;
1059    }
1060
1061    err = bpf_map_copy_value(map, key, value, attr->flags);
1062    if (err) {
1063        goto free_value;
1064    }
1065
1066    err = -EFAULT;
1067    if (copy_to_user(uvalue, value, value_size) != 0) {
1068        goto free_value;
1069    }
1070
1071    err = 0;
1072
1073free_value:
1074    kfree(value);
1075free_key:
1076    kfree(key);
1077err_put:
1078    fdput(f);
1079    return err;
1080}
1081
1082#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
1083
1084static int map_update_elem(union bpf_attr *attr)
1085{
1086    void __user *ukey = u64_to_user_ptr(attr->key);
1087    void __user *uvalue = u64_to_user_ptr(attr->value);
1088    int ufd = attr->map_fd;
1089    struct bpf_map *map;
1090    void *key, *value;
1091    u32 value_size;
1092    struct fd f;
1093    int err;
1094
1095    if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) {
1096        return -EINVAL;
1097    }
1098
1099    f = fdget(ufd);
1100    map = __bpf_map_get(f);
1101    if (IS_ERR(map)) {
1102        return PTR_ERR(map);
1103    }
1104    if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1105        err = -EPERM;
1106        goto err_put;
1107    }
1108
1109    if ((attr->flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) {
1110        err = -EINVAL;
1111        goto err_put;
1112    }
1113
1114    key = __bpf_copy_key(ukey, map->key_size);
1115    if (IS_ERR(key)) {
1116        err = PTR_ERR(key);
1117        goto err_put;
1118    }
1119
1120    if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
1121        map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
1122        value_size = round_up(map->value_size, 0x8) * num_possible_cpus();
1123    } else {
1124        value_size = map->value_size;
1125    }
1126
1127    err = -ENOMEM;
1128    value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1129    if (!value) {
1130        goto free_key;
1131    }
1132
1133    err = -EFAULT;
1134    if (copy_from_user(value, uvalue, value_size) != 0) {
1135        goto free_value;
1136    }
1137
1138    err = bpf_map_update_value(map, f, key, value, attr->flags);
1139
1140free_value:
1141    kfree(value);
1142free_key:
1143    kfree(key);
1144err_put:
1145    fdput(f);
1146    return err;
1147}
1148
1149#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
1150
1151static int map_delete_elem(union bpf_attr *attr)
1152{
1153    void __user *ukey = u64_to_user_ptr(attr->key);
1154    int ufd = attr->map_fd;
1155    struct bpf_map *map;
1156    struct fd f;
1157    void *key;
1158    int err;
1159
1160    if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) {
1161        return -EINVAL;
1162    }
1163
1164    f = fdget(ufd);
1165    map = __bpf_map_get(f);
1166    if (IS_ERR(map)) {
1167        return PTR_ERR(map);
1168    }
1169    if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1170        err = -EPERM;
1171        goto err_put;
1172    }
1173
1174    key = __bpf_copy_key(ukey, map->key_size);
1175    if (IS_ERR(key)) {
1176        err = PTR_ERR(key);
1177        goto err_put;
1178    }
1179
1180    if (bpf_map_is_dev_bound(map)) {
1181        err = bpf_map_offload_delete_elem(map, key);
1182        goto out;
1183    } else if (IS_FD_PROG_ARRAY(map) || map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1184        /* These maps require sleepable context */
1185        err = map->ops->map_delete_elem(map, key);
1186        goto out;
1187    }
1188
1189    bpf_disable_instrumentation();
1190    rcu_read_lock();
1191    err = map->ops->map_delete_elem(map, key);
1192    rcu_read_unlock();
1193    bpf_enable_instrumentation();
1194    maybe_wait_bpf_programs(map);
1195out:
1196    kfree(key);
1197err_put:
1198    fdput(f);
1199    return err;
1200}
1201
1202/* last field in 'union bpf_attr' used by this command */
1203#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
1204
1205static int map_get_next_key(union bpf_attr *attr)
1206{
1207    void __user *ukey = u64_to_user_ptr(attr->key);
1208    void __user *unext_key = u64_to_user_ptr(attr->next_key);
1209    int ufd = attr->map_fd;
1210    struct bpf_map *map;
1211    void *key, *next_key;
1212    struct fd f;
1213    int err;
1214
1215    if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) {
1216        return -EINVAL;
1217    }
1218
1219    f = fdget(ufd);
1220    map = __bpf_map_get(f);
1221    if (IS_ERR(map)) {
1222        return PTR_ERR(map);
1223    }
1224    if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
1225        err = -EPERM;
1226        goto err_put;
1227    }
1228
1229    if (ukey) {
1230        key = __bpf_copy_key(ukey, map->key_size);
1231        if (IS_ERR(key)) {
1232            err = PTR_ERR(key);
1233            goto err_put;
1234        }
1235    } else {
1236        key = NULL;
1237    }
1238
1239    err = -ENOMEM;
1240    next_key = kmalloc(map->key_size, GFP_USER);
1241    if (!next_key) {
1242        goto free_key;
1243    }
1244
1245    if (bpf_map_is_dev_bound(map)) {
1246        err = bpf_map_offload_get_next_key(map, key, next_key);
1247        goto out;
1248    }
1249
1250    rcu_read_lock();
1251    err = map->ops->map_get_next_key(map, key, next_key);
1252    rcu_read_unlock();
1253out:
1254    if (err) {
1255        goto free_next_key;
1256    }
1257
1258    err = -EFAULT;
1259    if (copy_to_user(unext_key, next_key, map->key_size) != 0) {
1260        goto free_next_key;
1261    }
1262
1263    err = 0;
1264
1265free_next_key:
1266    kfree(next_key);
1267free_key:
1268    kfree(key);
1269err_put:
1270    fdput(f);
1271    return err;
1272}
1273
1274int generic_map_delete_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr)
1275{
1276    void __user *keys = u64_to_user_ptr(attr->batch.keys);
1277    u32 cp, max_count;
1278    int err = 0;
1279    void *key;
1280
1281    if (attr->batch.elem_flags & ~BPF_F_LOCK) {
1282        return -EINVAL;
1283    }
1284
1285    if ((attr->batch.elem_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) {
1286        return -EINVAL;
1287    }
1288
1289    max_count = attr->batch.count;
1290    if (!max_count) {
1291        return 0;
1292    }
1293
1294    key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1295    if (!key) {
1296        return -ENOMEM;
1297    }
1298
1299    for (cp = 0; cp < max_count; cp++) {
1300        err = -EFAULT;
1301        if (copy_from_user(key, keys + cp * map->key_size, map->key_size)) {
1302            break;
1303        }
1304
1305        if (bpf_map_is_dev_bound(map)) {
1306            err = bpf_map_offload_delete_elem(map, key);
1307            break;
1308        }
1309
1310        bpf_disable_instrumentation();
1311        rcu_read_lock();
1312        err = map->ops->map_delete_elem(map, key);
1313        rcu_read_unlock();
1314        bpf_enable_instrumentation();
1315        maybe_wait_bpf_programs(map);
1316        if (err) {
1317            break;
1318        }
1319    }
1320    if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) {
1321        err = -EFAULT;
1322    }
1323
1324    kfree(key);
1325    return err;
1326}
1327
1328int generic_map_update_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr)
1329{
1330    void __user *values = u64_to_user_ptr(attr->batch.values);
1331    void __user *keys = u64_to_user_ptr(attr->batch.keys);
1332    u32 value_size, cp, max_count;
1333    int ufd = attr->batch.map_fd;
1334    void *key, *value;
1335    struct fd f;
1336    int err = 0;
1337
1338    if (attr->batch.elem_flags & ~BPF_F_LOCK) {
1339        return -EINVAL;
1340    }
1341
1342    if ((attr->batch.elem_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) {
1343        return -EINVAL;
1344    }
1345
1346    value_size = bpf_map_value_size(map);
1347
1348    max_count = attr->batch.count;
1349    if (!max_count) {
1350        return 0;
1351    }
1352
1353    key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1354    if (!key) {
1355        return -ENOMEM;
1356    }
1357
1358    value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1359    if (!value) {
1360        kfree(key);
1361        return -ENOMEM;
1362    }
1363
1364    f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */
1365    for (cp = 0; cp < max_count; cp++) {
1366        err = -EFAULT;
1367        if (copy_from_user(key, keys + cp * map->key_size, map->key_size) ||
1368            copy_from_user(value, values + cp * value_size, value_size)) {
1369            break;
1370        }
1371
1372        err = bpf_map_update_value(map, f, key, value, attr->batch.elem_flags);
1373
1374        if (err) {
1375            break;
1376        cond_resched();
1377        }
1378    }
1379
1380    if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) {
1381        err = -EFAULT;
1382    }
1383
1384    kfree(value);
1385    kfree(key);
1386    fdput(f);
1387    return err;
1388}
1389
1390#define MAP_LOOKUP_RETRIES 3
1391
1392int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr)
1393{
1394    void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
1395    void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1396    void __user *values = u64_to_user_ptr(attr->batch.values);
1397    void __user *keys = u64_to_user_ptr(attr->batch.keys);
1398    void *buf, *buf_prevkey, *prev_key, *key, *value;
1399    int err, retry = MAP_LOOKUP_RETRIES;
1400    u32 value_size, cp, max_count;
1401
1402    if (attr->batch.elem_flags & ~BPF_F_LOCK) {
1403        return -EINVAL;
1404    }
1405
1406    if ((attr->batch.elem_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) {
1407        return -EINVAL;
1408    }
1409
1410    value_size = bpf_map_value_size(map);
1411
1412    max_count = attr->batch.count;
1413    if (!max_count) {
1414        return 0;
1415    }
1416
1417    if (put_user(0, &uattr->batch.count)) {
1418        return -EFAULT;
1419    }
1420
1421    buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1422    if (!buf_prevkey) {
1423        return -ENOMEM;
1424    }
1425
1426    buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
1427    if (!buf) {
1428        kfree(buf_prevkey);
1429        return -ENOMEM;
1430    }
1431
1432    err = -EFAULT;
1433    prev_key = NULL;
1434    if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) {
1435        goto free_buf;
1436    }
1437    key = buf;
1438    value = key + map->key_size;
1439    if (ubatch) {
1440        prev_key = buf_prevkey;
1441    }
1442
1443    for (cp = 0; cp < max_count;) {
1444        rcu_read_lock();
1445        err = map->ops->map_get_next_key(map, prev_key, key);
1446        rcu_read_unlock();
1447        if (err) {
1448            break;
1449        }
1450        err = bpf_map_copy_value(map, key, value, attr->batch.elem_flags);
1451
1452        if (err == -ENOENT) {
1453            if (retry) {
1454                retry--;
1455                continue;
1456            }
1457            err = -EINTR;
1458            break;
1459        }
1460
1461        if (err) {
1462            goto free_buf;
1463        }
1464
1465        if (copy_to_user(keys + cp * map->key_size, key, map->key_size)) {
1466            err = -EFAULT;
1467            goto free_buf;
1468        }
1469        if (copy_to_user(values + cp * value_size, value, value_size)) {
1470            err = -EFAULT;
1471            goto free_buf;
1472        }
1473
1474        if (!prev_key) {
1475            prev_key = buf_prevkey;
1476        }
1477
1478        swap(prev_key, key);
1479        retry = MAP_LOOKUP_RETRIES;
1480        cp++;
1481        cond_resched();
1482    }
1483
1484    if (err == -EFAULT) {
1485        goto free_buf;
1486    }
1487
1488    if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
1489         (cp && copy_to_user(uobatch, prev_key, map->key_size)))) {
1490        err = -EFAULT;
1491    }
1492
1493free_buf:
1494    kfree(buf_prevkey);
1495    kfree(buf);
1496    return err;
1497}
1498
1499#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
1500
1501static int map_lookup_and_delete_elem(union bpf_attr *attr)
1502{
1503    void __user *ukey = u64_to_user_ptr(attr->key);
1504    void __user *uvalue = u64_to_user_ptr(attr->value);
1505    int ufd = attr->map_fd;
1506    struct bpf_map *map;
1507    void *key, *value;
1508    u32 value_size;
1509    struct fd f;
1510    int err;
1511
1512    if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) {
1513        return -EINVAL;
1514    }
1515
1516    f = fdget(ufd);
1517    map = __bpf_map_get(f);
1518    if (IS_ERR(map)) {
1519        return PTR_ERR(map);
1520    }
1521    if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1522        err = -EPERM;
1523        goto err_put;
1524    }
1525
1526    key = __bpf_copy_key(ukey, map->key_size);
1527    if (IS_ERR(key)) {
1528        err = PTR_ERR(key);
1529        goto err_put;
1530    }
1531
1532    value_size = map->value_size;
1533
1534    err = -ENOMEM;
1535    value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1536    if (!value) {
1537        goto free_key;
1538    }
1539
1540    if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) {
1541        err = map->ops->map_pop_elem(map, value);
1542    } else {
1543        err = -ENOTSUPP;
1544    }
1545
1546    if (err) {
1547        goto free_value;
1548    }
1549
1550    if (copy_to_user(uvalue, value, value_size) != 0) {
1551        err = -EFAULT;
1552        goto free_value;
1553    }
1554
1555    err = 0;
1556
1557free_value:
1558    kfree(value);
1559free_key:
1560    kfree(key);
1561err_put:
1562    fdput(f);
1563    return err;
1564}
1565
1566#define BPF_MAP_FREEZE_LAST_FIELD map_fd
1567
1568static int map_freeze(const union bpf_attr *attr)
1569{
1570    int err = 0, ufd = attr->map_fd;
1571    struct bpf_map *map;
1572    struct fd f;
1573
1574    if (CHECK_ATTR(BPF_MAP_FREEZE)) {
1575        return -EINVAL;
1576    }
1577
1578    f = fdget(ufd);
1579    map = __bpf_map_get(f);
1580    if (IS_ERR(map)) {
1581        return PTR_ERR(map);
1582    }
1583
1584    if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1585        fdput(f);
1586        return -ENOTSUPP;
1587    }
1588
1589    mutex_lock(&map->freeze_mutex);
1590
1591    if (map->writecnt) {
1592        err = -EBUSY;
1593        goto err_put;
1594    }
1595    if (READ_ONCE(map->frozen)) {
1596        err = -EBUSY;
1597        goto err_put;
1598    }
1599    if (!bpf_capable()) {
1600        err = -EPERM;
1601        goto err_put;
1602    }
1603
1604    WRITE_ONCE(map->frozen, true);
1605err_put:
1606    mutex_unlock(&map->freeze_mutex);
1607    fdput(f);
1608    return err;
1609}
1610
1611static const struct bpf_prog_ops *const bpf_prog_types[] = {
1612#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) [_id] = &_name##_prog_ops,
1613#define BPF_MAP_TYPE(_id, _ops)
1614#define BPF_LINK_TYPE(_id, _name)
1615#include <linux/bpf_types.h>
1616#undef BPF_PROG_TYPE
1617#undef BPF_MAP_TYPE
1618#undef BPF_LINK_TYPE
1619};
1620
1621static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
1622{
1623    const struct bpf_prog_ops *ops;
1624
1625    if (type >= ARRAY_SIZE(bpf_prog_types)) {
1626        return -EINVAL;
1627    }
1628    type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
1629    ops = bpf_prog_types[type];
1630    if (!ops) {
1631        return -EINVAL;
1632    }
1633
1634    if (!bpf_prog_is_dev_bound(prog->aux)) {
1635        prog->aux->ops = ops;
1636    } else {
1637        prog->aux->ops = &bpf_offload_prog_ops;
1638    }
1639    prog->type = type;
1640    return 0;
1641}
1642
1643enum bpf_audit {
1644    BPF_AUDIT_LOAD,
1645    BPF_AUDIT_UNLOAD,
1646    BPF_AUDIT_MAX,
1647};
1648
1649static const char *const bpf_audit_str[BPF_AUDIT_MAX] = {
1650    [BPF_AUDIT_LOAD] = "LOAD",
1651    [BPF_AUDIT_UNLOAD] = "UNLOAD",
1652};
1653
1654static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
1655{
1656    struct audit_context *ctx = NULL;
1657    struct audit_buffer *ab;
1658
1659    if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) {
1660        return;
1661    }
1662    if (audit_enabled == AUDIT_OFF) {
1663        return;
1664    }
1665    if (op == BPF_AUDIT_LOAD) {
1666        ctx = audit_context();
1667    }
1668    ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
1669    if (unlikely(!ab)) {
1670        return;
1671    }
1672    audit_log_format(ab, "prog-id=%u op=%s", prog->aux->id, bpf_audit_str[op]);
1673    audit_log_end(ab);
1674}
1675
1676int __bpf_prog_charge(struct user_struct *user, u32 pages)
1677{
1678    unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1679    unsigned long user_bufs;
1680
1681    if (user) {
1682        user_bufs = atomic_long_add_return(pages, &user->locked_vm);
1683        if (user_bufs > memlock_limit) {
1684            atomic_long_sub(pages, &user->locked_vm);
1685            return -EPERM;
1686        }
1687    }
1688
1689    return 0;
1690}
1691
1692void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
1693{
1694    if (user) {
1695        atomic_long_sub(pages, &user->locked_vm);
1696    }
1697}
1698
1699static int bpf_prog_charge_memlock(struct bpf_prog *prog)
1700{
1701    struct user_struct *user = get_current_user();
1702    int ret;
1703
1704    ret = __bpf_prog_charge(user, prog->pages);
1705    if (ret) {
1706        free_uid(user);
1707        return ret;
1708    }
1709
1710    prog->aux->user = user;
1711    return 0;
1712}
1713
1714static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
1715{
1716    struct user_struct *user = prog->aux->user;
1717
1718    __bpf_prog_uncharge(user, prog->pages);
1719    free_uid(user);
1720}
1721
1722static int bpf_prog_alloc_id(struct bpf_prog *prog)
1723{
1724    int id;
1725
1726    idr_preload(GFP_KERNEL);
1727    spin_lock_bh(&prog_idr_lock);
1728    id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
1729    if (id > 0) {
1730        prog->aux->id = id;
1731    }
1732    spin_unlock_bh(&prog_idr_lock);
1733    idr_preload_end();
1734
1735    /* id is in [1, INT_MAX) */
1736    if (WARN_ON_ONCE(!id)) {
1737        return -ENOSPC;
1738    }
1739
1740    return id > 0 ? 0 : id;
1741}
1742
1743void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
1744{
1745    /* cBPF to eBPF migrations are currently not in the idr store.
1746     * Offloaded programs are removed from the store when their device
1747     * disappears - even if someone grabs an fd to them they are unusable,
1748     * simply waiting for refcnt to drop to be freed.
1749     */
1750    if (!prog->aux->id) {
1751        return;
1752    }
1753
1754    if (do_idr_lock) {
1755        spin_lock_bh(&prog_idr_lock);
1756    } else {
1757        __acquire(&prog_idr_lock);
1758    }
1759
1760    idr_remove(&prog_idr, prog->aux->id);
1761    prog->aux->id = 0;
1762
1763    if (do_idr_lock) {
1764        spin_unlock_bh(&prog_idr_lock);
1765    } else {
1766        __release(&prog_idr_lock);
1767    }
1768}
1769
1770static void _bpf_prog_put_rcu(struct rcu_head *rcu)
1771{
1772    struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
1773
1774    kvfree(aux->func_info);
1775    kfree(aux->func_info_aux);
1776    bpf_prog_uncharge_memlock(aux->prog);
1777    security_bpf_prog_free(aux);
1778    bpf_prog_free(aux->prog);
1779}
1780
1781static void _bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
1782{
1783    bpf_prog_kallsyms_del_all(prog);
1784    btf_put(prog->aux->btf);
1785    bpf_prog_free_linfo(prog);
1786
1787    if (deferred) {
1788        if (prog->aux->sleepable) {
1789            call_rcu_tasks_trace(&prog->aux->rcu, _bpf_prog_put_rcu);
1790        } else {
1791            call_rcu(&prog->aux->rcu, _bpf_prog_put_rcu);
1792        }
1793    } else {
1794        _bpf_prog_put_rcu(&prog->aux->rcu);
1795    }
1796}
1797
1798static void _bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
1799{
1800    if (atomic64_dec_and_test(&prog->aux->refcnt)) {
1801        perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
1802        bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
1803        /* bpf_prog_free_id() must be called first */
1804        bpf_prog_free_id(prog, do_idr_lock);
1805        _bpf_prog_put_noref(prog, true);
1806    }
1807}
1808
1809void bpf_prog_put(struct bpf_prog *prog)
1810{
1811    _bpf_prog_put(prog, true);
1812}
1813EXPORT_SYMBOL_GPL(bpf_prog_put);
1814
1815static int bpf_prog_release(struct inode *inode, struct file *filp)
1816{
1817    struct bpf_prog *prog = filp->private_data;
1818
1819    bpf_prog_put(prog);
1820    return 0;
1821}
1822
1823static void bpf_prog_get_stats(const struct bpf_prog *prog, struct bpf_prog_stats *stats)
1824{
1825    u64 nsecs = 0, cnt = 0;
1826    int cpu;
1827
1828    for_each_possible_cpu(cpu)
1829    {
1830        const struct bpf_prog_stats *st;
1831        unsigned int start;
1832        u64 tnsecs, tcnt;
1833
1834        st = per_cpu_ptr(prog->aux->stats, cpu);
1835        do {
1836            start = u64_stats_fetch_begin_irq(&st->syncp);
1837            tnsecs = st->nsecs;
1838            tcnt = st->cnt;
1839        } while (u64_stats_fetch_retry_irq(&st->syncp, start));
1840        nsecs += tnsecs;
1841        cnt += tcnt;
1842    }
1843    stats->nsecs = nsecs;
1844    stats->cnt = cnt;
1845}
1846
1847#ifdef CONFIG_PROC_FS
1848static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
1849{
1850    const struct bpf_prog *prog = filp->private_data;
1851    char prog_tag[sizeof(prog->tag) * 0x2 + 1] = {};
1852    struct bpf_prog_stats stats;
1853
1854    bpf_prog_get_stats(prog, &stats);
1855    bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
1856    seq_printf(m,
1857               "prog_type:\t%u\n"
1858               "prog_jited:\t%u\n"
1859               "prog_tag:\t%s\n"
1860               "memlock:\t%llu\n"
1861               "prog_id:\t%u\n"
1862               "run_time_ns:\t%llu\n"
1863               "run_cnt:\t%llu\n",
1864               prog->type, prog->jited, prog_tag, prog->pages * 1ULL << PAGE_SHIFT, prog->aux->id, stats.nsecs,
1865               stats.cnt);
1866}
1867#endif
1868
1869const struct file_operations bpf_prog_fops = {
1870#ifdef CONFIG_PROC_FS
1871    .show_fdinfo = bpf_prog_show_fdinfo,
1872#endif
1873    .release = bpf_prog_release,
1874    .read = bpf_dummy_read,
1875    .write = bpf_dummy_write,
1876};
1877
1878int bpf_prog_new_fd(struct bpf_prog *prog)
1879{
1880    int ret;
1881
1882    ret = security_bpf_prog(prog);
1883    if (ret < 0) {
1884        return ret;
1885    }
1886
1887    return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
1888}
1889
1890static struct bpf_prog *i_bpf_prog_get(struct fd f)
1891{
1892    if (!f.file) {
1893        return ERR_PTR(-EBADF);
1894    }
1895    if (f.file->f_op != &bpf_prog_fops) {
1896        fdput(f);
1897        return ERR_PTR(-EINVAL);
1898    }
1899
1900    return f.file->private_data;
1901}
1902
1903void bpf_prog_add(struct bpf_prog *prog, int i)
1904{
1905    atomic64_add(i, &prog->aux->refcnt);
1906}
1907EXPORT_SYMBOL_GPL(bpf_prog_add);
1908
1909void bpf_prog_sub(struct bpf_prog *prog, int i)
1910{
1911    /* Only to be used for undoing previous bpf_prog_add() in some
1912     * error path. We still know that another entity in our call
1913     * path holds a reference to the program, thus atomic_sub() can
1914     * be safely used in such cases!
1915     */
1916    WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
1917}
1918EXPORT_SYMBOL_GPL(bpf_prog_sub);
1919
1920void bpf_prog_inc(struct bpf_prog *prog)
1921{
1922    atomic64_inc(&prog->aux->refcnt);
1923}
1924EXPORT_SYMBOL_GPL(bpf_prog_inc);
1925
1926/* prog_idr_lock should have been held */
1927struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
1928{
1929    int refold;
1930
1931    refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
1932    if (!refold) {
1933        return ERR_PTR(-ENOENT);
1934    }
1935
1936    return prog;
1937}
1938EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
1939
1940bool bpf_prog_get_ok(struct bpf_prog *prog, enum bpf_prog_type *attach_type, bool attach_drv)
1941{
1942    /* not an attachment, just a refcount inc, always allow */
1943    if (!attach_type) {
1944        return true;
1945    }
1946
1947    if (prog->type != *attach_type) {
1948        return false;
1949    }
1950    if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) {
1951        return false;
1952    }
1953
1954    return true;
1955}
1956
1957static struct bpf_prog *_bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, bool attach_drv)
1958{
1959    struct fd f = fdget(ufd);
1960    struct bpf_prog *prog;
1961
1962    prog = i_bpf_prog_get(f);
1963    if (IS_ERR(prog)) {
1964        return prog;
1965    }
1966    if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
1967        prog = ERR_PTR(-EINVAL);
1968        goto out;
1969    }
1970
1971    bpf_prog_inc(prog);
1972out:
1973    fdput(f);
1974    return prog;
1975}
1976
1977struct bpf_prog *bpf_prog_get(u32 ufd)
1978{
1979    return _bpf_prog_get(ufd, NULL, false);
1980}
1981
1982struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv)
1983{
1984    return _bpf_prog_get(ufd, &type, attach_drv);
1985}
1986EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
1987
1988/* Initially all BPF programs could be loaded w/o specifying
1989 * expected_attach_type. Later for some of them specifying expected_attach_type
1990 * at load time became required so that program could be validated properly.
1991 * Programs of types that are allowed to be loaded both w/ and w/o (for
1992 * backward compatibility) expected_attach_type, should have the default attach
1993 * type assigned to expected_attach_type for the latter case, so that it can be
1994 * validated later at attach time.
1995 *
1996 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
1997 * prog type requires it but has some attach types that have to be backward
1998 * compatible.
1999 */
2000static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
2001{
2002    if (attr->prog_type == BPF_PROG_TYPE_CGROUP_SOCK) {
2003        /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
2004         * exist so checking for non-zero is the way to go here.
2005         */
2006        if (!attr->expected_attach_type) {
2007            attr->expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
2008        }
2009    }
2010}
2011
2012static int bpf_prog_load_check_attach(enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type,
2013                                      u32 btf_id, u32 prog_fd)
2014{
2015    if (btf_id) {
2016        if (btf_id > BTF_MAX_TYPE) {
2017            return -EINVAL;
2018        }
2019
2020        switch (prog_type) {
2021            case BPF_PROG_TYPE_TRACING:
2022            case BPF_PROG_TYPE_LSM:
2023            case BPF_PROG_TYPE_STRUCT_OPS:
2024            case BPF_PROG_TYPE_EXT:
2025                break;
2026            default:
2027                return -EINVAL;
2028        }
2029    }
2030
2031    if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING && prog_type != BPF_PROG_TYPE_EXT) {
2032        return -EINVAL;
2033    }
2034
2035    switch (prog_type) {
2036        case BPF_PROG_TYPE_CGROUP_SOCK:
2037            switch (expected_attach_type) {
2038                case BPF_CGROUP_INET_SOCK_CREATE:
2039                case BPF_CGROUP_INET_SOCK_RELEASE:
2040                case BPF_CGROUP_INET4_POST_BIND:
2041                case BPF_CGROUP_INET6_POST_BIND:
2042                    return 0;
2043                default:
2044                    return -EINVAL;
2045            }
2046        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2047            switch (expected_attach_type) {
2048                case BPF_CGROUP_INET4_BIND:
2049                case BPF_CGROUP_INET6_BIND:
2050                case BPF_CGROUP_INET4_CONNECT:
2051                case BPF_CGROUP_INET6_CONNECT:
2052                case BPF_CGROUP_INET4_GETPEERNAME:
2053                case BPF_CGROUP_INET6_GETPEERNAME:
2054                case BPF_CGROUP_INET4_GETSOCKNAME:
2055                case BPF_CGROUP_INET6_GETSOCKNAME:
2056                case BPF_CGROUP_UDP4_SENDMSG:
2057                case BPF_CGROUP_UDP6_SENDMSG:
2058                case BPF_CGROUP_UDP4_RECVMSG:
2059                case BPF_CGROUP_UDP6_RECVMSG:
2060                    return 0;
2061                default:
2062                    return -EINVAL;
2063            }
2064        case BPF_PROG_TYPE_CGROUP_SKB:
2065            switch (expected_attach_type) {
2066                case BPF_CGROUP_INET_INGRESS:
2067                case BPF_CGROUP_INET_EGRESS:
2068                    return 0;
2069                default:
2070                    return -EINVAL;
2071            }
2072        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2073            switch (expected_attach_type) {
2074                case BPF_CGROUP_SETSOCKOPT:
2075                case BPF_CGROUP_GETSOCKOPT:
2076                    return 0;
2077                default:
2078                    return -EINVAL;
2079            }
2080        case BPF_PROG_TYPE_SK_LOOKUP:
2081            if (expected_attach_type == BPF_SK_LOOKUP) {
2082                return 0;
2083            }
2084            return -EINVAL;
2085        case BPF_PROG_TYPE_EXT:
2086            if (expected_attach_type) {
2087                return -EINVAL;
2088            }
2089            fallthrough;
2090        default:
2091            return 0;
2092    }
2093}
2094
2095static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
2096{
2097    switch (prog_type) {
2098        case BPF_PROG_TYPE_SCHED_CLS:
2099        case BPF_PROG_TYPE_SCHED_ACT:
2100        case BPF_PROG_TYPE_XDP:
2101        case BPF_PROG_TYPE_LWT_IN:
2102        case BPF_PROG_TYPE_LWT_OUT:
2103        case BPF_PROG_TYPE_LWT_XMIT:
2104        case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2105        case BPF_PROG_TYPE_SK_SKB:
2106        case BPF_PROG_TYPE_SK_MSG:
2107        case BPF_PROG_TYPE_LIRC_MODE2:
2108        case BPF_PROG_TYPE_FLOW_DISSECTOR:
2109        case BPF_PROG_TYPE_CGROUP_DEVICE:
2110        case BPF_PROG_TYPE_CGROUP_SOCK:
2111        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2112        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2113        case BPF_PROG_TYPE_CGROUP_SYSCTL:
2114        case BPF_PROG_TYPE_SOCK_OPS:
2115        case BPF_PROG_TYPE_EXT: /* extends any prog */
2116            return true;
2117        case BPF_PROG_TYPE_CGROUP_SKB:
2118            /* always unpriv */
2119        case BPF_PROG_TYPE_SK_REUSEPORT:
2120            /* equivalent to SOCKET_FILTER. need CAP_BPF only */
2121        default:
2122            return false;
2123    }
2124}
2125
2126static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
2127{
2128    switch (prog_type) {
2129        case BPF_PROG_TYPE_KPROBE:
2130        case BPF_PROG_TYPE_TRACEPOINT:
2131        case BPF_PROG_TYPE_PERF_EVENT:
2132        case BPF_PROG_TYPE_RAW_TRACEPOINT:
2133        case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2134        case BPF_PROG_TYPE_TRACING:
2135        case BPF_PROG_TYPE_LSM:
2136        case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
2137        case BPF_PROG_TYPE_EXT:        /* extends any prog */
2138            return true;
2139        default:
2140            return false;
2141    }
2142}
2143
2144/* last field in 'union bpf_attr' used by this command */
2145#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
2146
2147static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
2148{
2149    enum bpf_prog_type type = attr->prog_type;
2150    struct bpf_prog *prog;
2151    int err;
2152    char license[128];
2153    bool is_gpl;
2154
2155    if (CHECK_ATTR(BPF_PROG_LOAD)) {
2156        return -EINVAL;
2157    }
2158
2159    if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT | BPF_F_TEST_STATE_FREQ | BPF_F_SLEEPABLE |
2160                             BPF_F_TEST_RND_HI32)) {
2161        return -EINVAL;
2162    }
2163
2164    if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
2165        !bpf_capable()) {
2166        return -EPERM;
2167    }
2168
2169    /* copy eBPF program license from user space */
2170    if (strncpy_from_user(license, u64_to_user_ptr(attr->license), sizeof(license) - 1) < 0) {
2171        return -EFAULT;
2172    }
2173    license[sizeof(license) - 1] = 0;
2174
2175    /* eBPF programs must be GPL compatible to use GPL-ed functions */
2176    is_gpl = license_is_gpl_compatible(license);
2177
2178    if (attr->insn_cnt == 0 || attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) {
2179        return -E2BIG;
2180    }
2181    if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && !bpf_capable()) {
2182        return -EPERM;
2183    }
2184
2185    if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) {
2186        return -EPERM;
2187    }
2188    if (is_perfmon_prog_type(type) && !perfmon_capable()) {
2189        return -EPERM;
2190    }
2191
2192    bpf_prog_load_fixup_attach_type(attr);
2193    if (bpf_prog_load_check_attach(type, attr->expected_attach_type, attr->attach_btf_id, attr->attach_prog_fd)) {
2194        return -EINVAL;
2195    }
2196
2197    /* plain bpf_prog allocation */
2198    prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
2199    if (!prog) {
2200        return -ENOMEM;
2201    }
2202
2203    prog->expected_attach_type = attr->expected_attach_type;
2204    prog->aux->attach_btf_id = attr->attach_btf_id;
2205    if (attr->attach_prog_fd) {
2206        struct bpf_prog *dst_prog;
2207
2208        dst_prog = bpf_prog_get(attr->attach_prog_fd);
2209        if (IS_ERR(dst_prog)) {
2210            err = PTR_ERR(dst_prog);
2211            goto free_prog_nouncharge;
2212        }
2213        prog->aux->dst_prog = dst_prog;
2214    }
2215
2216    prog->aux->offload_requested = !!attr->prog_ifindex;
2217    prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
2218
2219    err = security_bpf_prog_alloc(prog->aux);
2220    if (err) {
2221        goto free_prog_nouncharge;
2222    }
2223
2224    err = bpf_prog_charge_memlock(prog);
2225    if (err) {
2226        goto free_prog_sec;
2227    }
2228
2229    prog->len = attr->insn_cnt;
2230
2231    err = -EFAULT;
2232    if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), bpf_prog_insn_size(prog)) != 0) {
2233        goto free_prog;
2234    }
2235
2236    prog->orig_prog = NULL;
2237    prog->jited = 0;
2238
2239    atomic64_set(&prog->aux->refcnt, 1);
2240    prog->gpl_compatible = is_gpl ? 1 : 0;
2241
2242    if (bpf_prog_is_dev_bound(prog->aux)) {
2243        err = bpf_prog_offload_init(prog, attr);
2244        if (err) {
2245            goto free_prog;
2246        }
2247    }
2248
2249    /* find program type: socket_filter vs tracing_filter */
2250    err = find_prog_type(type, prog);
2251    if (err < 0) {
2252        goto free_prog;
2253    }
2254
2255    prog->aux->load_time = ktime_get_boottime_ns();
2256    err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, sizeof(attr->prog_name));
2257    if (err < 0) {
2258        goto free_prog;
2259    }
2260
2261    /* run eBPF verifier */
2262    err = bpf_check(&prog, attr, uattr);
2263    if (err < 0) {
2264        goto free_used_maps;
2265    }
2266
2267    prog = bpf_prog_select_runtime(prog, &err);
2268    if (err < 0) {
2269        goto free_used_maps;
2270    }
2271
2272    err = bpf_prog_alloc_id(prog);
2273    if (err) {
2274        goto free_used_maps;
2275    }
2276
2277    /* Upon success of bpf_prog_alloc_id(), the BPF prog is
2278     * effectively publicly exposed. However, retrieving via
2279     * bpf_prog_get_fd_by_id() will take another reference,
2280     * therefore it cannot be gone underneath us.
2281     *
2282     * Only for the time /after/ successful bpf_prog_new_fd()
2283     * and before returning to userspace, we might just hold
2284     * one reference and any parallel close on that fd could
2285     * rip everything out. Hence, below notifications must
2286     * happen before bpf_prog_new_fd().
2287     *
2288     * Also, any failure handling from this point onwards must
2289     * be using bpf_prog_put() given the program is exposed.
2290     */
2291    bpf_prog_kallsyms_add(prog);
2292    perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
2293    bpf_audit_prog(prog, BPF_AUDIT_LOAD);
2294
2295    err = bpf_prog_new_fd(prog);
2296    if (err < 0) {
2297        bpf_prog_put(prog);
2298    }
2299    return err;
2300
2301free_used_maps:
2302    /* In case we have subprogs, we need to wait for a grace
2303     * period before we can tear down JIT memory since symbols
2304     * are already exposed under kallsyms.
2305     */
2306    _bpf_prog_put_noref(prog, prog->aux->func_cnt);
2307    return err;
2308free_prog:
2309    bpf_prog_uncharge_memlock(prog);
2310free_prog_sec:
2311    security_bpf_prog_free(prog->aux);
2312free_prog_nouncharge:
2313    bpf_prog_free(prog);
2314    return err;
2315}
2316
2317#define BPF_OBJ_LAST_FIELD file_flags
2318
2319static int bpf_obj_pin(const union bpf_attr *attr)
2320{
2321    if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) {
2322        return -EINVAL;
2323    }
2324
2325    return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
2326}
2327
2328static int bpf_obj_get(const union bpf_attr *attr)
2329{
2330    if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || attr->file_flags & ~BPF_OBJ_FLAG_MASK) {
2331        return -EINVAL;
2332    }
2333
2334    return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), attr->file_flags);
2335}
2336
2337void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops,
2338                   struct bpf_prog *prog)
2339{
2340    atomic64_set(&link->refcnt, 1);
2341    link->type = type;
2342    link->id = 0;
2343    link->ops = ops;
2344    link->prog = prog;
2345}
2346
2347static void bpf_link_free_id(int id)
2348{
2349    if (!id) {
2350        return;
2351    }
2352
2353    spin_lock_bh(&link_idr_lock);
2354    idr_remove(&link_idr, id);
2355    spin_unlock_bh(&link_idr_lock);
2356}
2357
2358/* Clean up bpf_link and corresponding anon_inode file and FD. After
2359 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
2360 * anon_inode's release() call. This helper marksbpf_link as
2361 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
2362 * is not decremented, it's the responsibility of a calling code that failed
2363 * to complete bpf_link initialization.
2364 */
2365void bpf_link_cleanup(struct bpf_link_primer *primer)
2366{
2367    primer->link->prog = NULL;
2368    bpf_link_free_id(primer->id);
2369    fput(primer->file);
2370    put_unused_fd(primer->fd);
2371}
2372
2373void bpf_link_inc(struct bpf_link *link)
2374{
2375    atomic64_inc(&link->refcnt);
2376}
2377
2378/* bpf_link_free is guaranteed to be called from process context */
2379static void bpf_link_free(struct bpf_link *link)
2380{
2381    bpf_link_free_id(link->id);
2382    if (link->prog) {
2383        /* detach BPF program, clean up used resources */
2384        link->ops->release(link);
2385        bpf_prog_put(link->prog);
2386    }
2387    /* free bpf_link and its containing memory */
2388    link->ops->dealloc(link);
2389}
2390
2391static void bpf_link_put_deferred(struct work_struct *work)
2392{
2393    struct bpf_link *link = container_of(work, struct bpf_link, work);
2394
2395    bpf_link_free(link);
2396}
2397
2398/* bpf_link_put can be called from atomic context, but ensures that resources
2399 * are freed from process context
2400 */
2401void bpf_link_put(struct bpf_link *link)
2402{
2403    if (!atomic64_dec_and_test(&link->refcnt)) {
2404        return;
2405    }
2406
2407    if (in_atomic()) {
2408        INIT_WORK(&link->work, bpf_link_put_deferred);
2409        schedule_work(&link->work);
2410    } else {
2411        bpf_link_free(link);
2412    }
2413}
2414
2415static int bpf_link_release(struct inode *inode, struct file *filp)
2416{
2417    struct bpf_link *link = filp->private_data;
2418
2419    bpf_link_put(link);
2420    return 0;
2421}
2422
2423#ifdef CONFIG_PROC_FS
2424#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
2425#define BPF_MAP_TYPE(_id, _ops)
2426#define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
2427static const char *bpf_link_type_strs[] = {
2428    [BPF_LINK_TYPE_UNSPEC] = "<invalid>",
2429#include <linux/bpf_types.h>
2430};
2431#undef BPF_PROG_TYPE
2432#undef BPF_MAP_TYPE
2433#undef BPF_LINK_TYPE
2434
2435static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
2436{
2437    const struct bpf_link *link = filp->private_data;
2438    const struct bpf_prog *prog = link->prog;
2439    char prog_tag[sizeof(prog->tag) * 0x2 + 1] = {};
2440
2441    bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
2442    seq_printf(m,
2443               "link_type:\t%s\n"
2444               "link_id:\t%u\n"
2445               "prog_tag:\t%s\n"
2446               "prog_id:\t%u\n",
2447               bpf_link_type_strs[link->type], link->id, prog_tag, prog->aux->id);
2448    if (link->ops->show_fdinfo) {
2449        link->ops->show_fdinfo(link, m);
2450    }
2451}
2452#endif
2453
2454static const struct file_operations bpf_link_fops = {
2455#ifdef CONFIG_PROC_FS
2456    .show_fdinfo = bpf_link_show_fdinfo,
2457#endif
2458    .release = bpf_link_release,
2459    .read = bpf_dummy_read,
2460    .write = bpf_dummy_write,
2461};
2462
2463static int bpf_link_alloc_id(struct bpf_link *link)
2464{
2465    int id;
2466
2467    idr_preload(GFP_KERNEL);
2468    spin_lock_bh(&link_idr_lock);
2469    id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
2470    spin_unlock_bh(&link_idr_lock);
2471    idr_preload_end();
2472
2473    return id;
2474}
2475
2476/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
2477 * reserving unused FD and allocating ID from link_idr. This is to be paired
2478 * with bpf_link_settle() to install FD and ID and expose bpf_link to
2479 * user-space, if bpf_link is successfully attached. If not, bpf_link and
2480 * pre-allocated resources are to be freed with bpf_cleanup() call. All the
2481 * transient state is passed around in struct bpf_link_primer.
2482 * This is preferred way to create and initialize bpf_link, especially when
2483 * there are complicated and expensive operations inbetween creating bpf_link
2484 * itself and attaching it to BPF hook. By using bpf_link_prime() and
2485 * bpf_link_settle() kernel code using bpf_link doesn't have to perform
2486 * expensive (and potentially failing) roll back operations in a rare case
2487 * that file, FD, or ID can't be allocated.
2488 */
2489int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
2490{
2491    struct file *file;
2492    int fd, id;
2493
2494    fd = get_unused_fd_flags(O_CLOEXEC);
2495    if (fd < 0) {
2496        return fd;
2497    }
2498
2499    id = bpf_link_alloc_id(link);
2500    if (id < 0) {
2501        put_unused_fd(fd);
2502        return id;
2503    }
2504
2505    file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
2506    if (IS_ERR(file)) {
2507        bpf_link_free_id(id);
2508        put_unused_fd(fd);
2509        return PTR_ERR(file);
2510    }
2511
2512    primer->link = link;
2513    primer->file = file;
2514    primer->fd = fd;
2515    primer->id = id;
2516    return 0;
2517}
2518
2519int bpf_link_settle(struct bpf_link_primer *primer)
2520{
2521    /* make bpf_link fetchable by ID */
2522    spin_lock_bh(&link_idr_lock);
2523    primer->link->id = primer->id;
2524    spin_unlock_bh(&link_idr_lock);
2525    /* make bpf_link fetchable by FD */
2526    fd_install(primer->fd, primer->file);
2527    /* pass through installed FD */
2528    return primer->fd;
2529}
2530
2531int bpf_link_new_fd(struct bpf_link *link)
2532{
2533    return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
2534}
2535
2536struct bpf_link *bpf_link_get_from_fd(u32 ufd)
2537{
2538    struct fd f = fdget(ufd);
2539    struct bpf_link *link;
2540
2541    if (!f.file) {
2542        return ERR_PTR(-EBADF);
2543    }
2544    if (f.file->f_op != &bpf_link_fops) {
2545        fdput(f);
2546        return ERR_PTR(-EINVAL);
2547    }
2548
2549    link = f.file->private_data;
2550    bpf_link_inc(link);
2551    fdput(f);
2552
2553    return link;
2554}
2555
2556struct bpf_tracing_link {
2557    struct bpf_link link;
2558    enum bpf_attach_type attach_type;
2559    struct bpf_trampoline *trampoline;
2560    struct bpf_prog *tgt_prog;
2561};
2562
2563static void bpf_tracing_link_release(struct bpf_link *link)
2564{
2565    struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link);
2566
2567    WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog, tr_link->trampoline));
2568
2569    bpf_trampoline_put(tr_link->trampoline);
2570
2571    /* tgt_prog is NULL if target is a kernel function */
2572    if (tr_link->tgt_prog) {
2573        bpf_prog_put(tr_link->tgt_prog);
2574    }
2575}
2576
2577static void bpf_tracing_link_dealloc(struct bpf_link *link)
2578{
2579    struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link);
2580
2581    kfree(tr_link);
2582}
2583
2584static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq)
2585{
2586    struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link);
2587
2588    seq_printf(seq, "attach_type:\t%d\n", tr_link->attach_type);
2589}
2590
2591static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info)
2592{
2593    struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link);
2594
2595    info->tracing.attach_type = tr_link->attach_type;
2596
2597    return 0;
2598}
2599
2600static const struct bpf_link_ops bpf_tracing_link_lops = {
2601    .release = bpf_tracing_link_release,
2602    .dealloc = bpf_tracing_link_dealloc,
2603    .show_fdinfo = bpf_tracing_link_show_fdinfo,
2604    .fill_link_info = bpf_tracing_link_fill_link_info,
2605};
2606
2607static int bpf_tracing_prog_attach(struct bpf_prog *prog, int tgt_prog_fd, u32 btf_id)
2608{
2609    struct bpf_link_primer link_primer;
2610    struct bpf_prog *tgt_prog = NULL;
2611    struct bpf_trampoline *tr = NULL;
2612    struct bpf_tracing_link *link;
2613    u64 key = 0;
2614    int err;
2615
2616    switch (prog->type) {
2617        case BPF_PROG_TYPE_TRACING:
2618            if (prog->expected_attach_type != BPF_TRACE_FENTRY && prog->expected_attach_type != BPF_TRACE_FEXIT &&
2619                prog->expected_attach_type != BPF_MODIFY_RETURN) {
2620                err = -EINVAL;
2621                goto out_put_prog;
2622            }
2623            break;
2624        case BPF_PROG_TYPE_EXT:
2625            if (prog->expected_attach_type != 0) {
2626                err = -EINVAL;
2627                goto out_put_prog;
2628            }
2629            break;
2630        case BPF_PROG_TYPE_LSM:
2631            if (prog->expected_attach_type != BPF_LSM_MAC) {
2632                err = -EINVAL;
2633                goto out_put_prog;
2634            }
2635            break;
2636        default:
2637            err = -EINVAL;
2638            goto out_put_prog;
2639    }
2640
2641    if (!!tgt_prog_fd != !!btf_id) {
2642        err = -EINVAL;
2643        goto out_put_prog;
2644    }
2645
2646    if (tgt_prog_fd) {
2647        /* For now we only allow new targets for BPF_PROG_TYPE_EXT */
2648        if (prog->type != BPF_PROG_TYPE_EXT) {
2649            err = -EINVAL;
2650            goto out_put_prog;
2651        }
2652
2653        tgt_prog = bpf_prog_get(tgt_prog_fd);
2654        if (IS_ERR(tgt_prog)) {
2655            err = PTR_ERR(tgt_prog);
2656            tgt_prog = NULL;
2657            goto out_put_prog;
2658        }
2659
2660        key = bpf_trampoline_compute_key(tgt_prog, btf_id);
2661    }
2662
2663    link = kzalloc(sizeof(*link), GFP_USER);
2664    if (!link) {
2665        err = -ENOMEM;
2666        goto out_put_prog;
2667    }
2668    bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING, &bpf_tracing_link_lops, prog);
2669    link->attach_type = prog->expected_attach_type;
2670
2671    mutex_lock(&prog->aux->dst_mutex);
2672
2673    /* There are a few possible cases here:
2674     *
2675     * - if prog->aux->dst_trampoline is set, the program was just loaded
2676     *   and not yet attached to anything, so we can use the values stored
2677     *   in prog->aux
2678     *
2679     * - if prog->aux->dst_trampoline is NULL, the program has already been
2680     *   attached to a target and its initial target was cleared (below)
2681     *
2682     * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
2683     *   target_btf_id using the link_create API.
2684     *
2685     * - if tgt_prog == NULL when this function was called using the old
2686     *   raw_tracepoint_open API, and we need a target from prog->aux
2687     *
2688     * The combination of no saved target in prog->aux, and no target
2689     * specified on load is illegal, and we reject that here.
2690     */
2691    if (!prog->aux->dst_trampoline && !tgt_prog) {
2692        err = -ENOENT;
2693        goto out_unlock;
2694    }
2695
2696    if (!prog->aux->dst_trampoline || (key && key != prog->aux->dst_trampoline->key)) {
2697        /* If there is no saved target, or the specified target is
2698         * different from the destination specified at load time, we
2699         * need a new trampoline and a check for compatibility
2700         */
2701        struct bpf_attach_target_info tgt_info = {};
2702
2703        err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, &tgt_info);
2704        if (err) {
2705            goto out_unlock;
2706        }
2707
2708        tr = bpf_trampoline_get(key, &tgt_info);
2709        if (!tr) {
2710            err = -ENOMEM;
2711            goto out_unlock;
2712        }
2713    } else {
2714        /* The caller didn't specify a target, or the target was the
2715         * same as the destination supplied during program load. This
2716         * means we can reuse the trampoline and reference from program
2717         * load time, and there is no need to allocate a new one. This
2718         * can only happen once for any program, as the saved values in
2719         * prog->aux are cleared below.
2720         */
2721        tr = prog->aux->dst_trampoline;
2722        tgt_prog = prog->aux->dst_prog;
2723    }
2724
2725    err = bpf_link_prime(&link->link, &link_primer);
2726    if (err) {
2727        goto out_unlock;
2728    }
2729
2730    err = bpf_trampoline_link_prog(prog, tr);
2731    if (err) {
2732        bpf_link_cleanup(&link_primer);
2733        link = NULL;
2734        goto out_unlock;
2735    }
2736
2737    link->tgt_prog = tgt_prog;
2738    link->trampoline = tr;
2739
2740    /* Always clear the trampoline and target prog from prog->aux to make
2741     * sure the original attach destination is not kept alive after a
2742     * program is (re-)attached to another target.
2743     */
2744    if (prog->aux->dst_prog && (tgt_prog_fd || tr != prog->aux->dst_trampoline)) {
2745        /* got extra prog ref from syscall, or attaching to different prog */
2746        bpf_prog_put(prog->aux->dst_prog);
2747    }
2748    if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) {
2749        /* we allocated a new trampoline, so free the old one */
2750        bpf_trampoline_put(prog->aux->dst_trampoline);
2751    }
2752
2753    prog->aux->dst_prog = NULL;
2754    prog->aux->dst_trampoline = NULL;
2755    mutex_unlock(&prog->aux->dst_mutex);
2756
2757    return bpf_link_settle(&link_primer);
2758out_unlock:
2759    if (tr && tr != prog->aux->dst_trampoline) {
2760        bpf_trampoline_put(tr);
2761    }
2762    mutex_unlock(&prog->aux->dst_mutex);
2763    kfree(link);
2764out_put_prog:
2765    if (tgt_prog_fd && tgt_prog) {
2766        bpf_prog_put(tgt_prog);
2767    }
2768    return err;
2769}
2770
2771struct bpf_raw_tp_link {
2772    struct bpf_link link;
2773    struct bpf_raw_event_map *btp;
2774};
2775
2776static void bpf_raw_tp_link_release(struct bpf_link *link)
2777{
2778    struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link);
2779
2780    bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
2781    bpf_put_raw_tracepoint(raw_tp->btp);
2782}
2783
2784static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
2785{
2786    struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link);
2787
2788    kfree(raw_tp);
2789}
2790
2791static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq)
2792{
2793    struct bpf_raw_tp_link *raw_tp_link = container_of(link, struct bpf_raw_tp_link, link);
2794
2795    seq_printf(seq, "tp_name:\t%s\n", raw_tp_link->btp->tp->name);
2796}
2797
2798static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info)
2799{
2800    struct bpf_raw_tp_link *raw_tp_link = container_of(link, struct bpf_raw_tp_link, link);
2801    char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name);
2802    const char *tp_name = raw_tp_link->btp->tp->name;
2803    u32 ulen = info->raw_tracepoint.tp_name_len;
2804    size_t tp_len = strlen(tp_name);
2805
2806    if (!ulen ^ !ubuf) {
2807        return -EINVAL;
2808    }
2809
2810    info->raw_tracepoint.tp_name_len = tp_len + 1;
2811
2812    if (!ubuf) {
2813        return 0;
2814    }
2815
2816    if (ulen >= tp_len + 1) {
2817        if (copy_to_user(ubuf, tp_name, tp_len + 1)) {
2818            return -EFAULT;
2819        }
2820    } else {
2821        char zero = '\0';
2822
2823        if (copy_to_user(ubuf, tp_name, ulen - 1)) {
2824            return -EFAULT;
2825        }
2826        if (put_user(zero, ubuf + ulen - 1)) {
2827            return -EFAULT;
2828        }
2829        return -ENOSPC;
2830    }
2831
2832    return 0;
2833}
2834
2835static const struct bpf_link_ops bpf_raw_tp_link_lops = {
2836    .release = bpf_raw_tp_link_release,
2837    .dealloc = bpf_raw_tp_link_dealloc,
2838    .show_fdinfo = bpf_raw_tp_link_show_fdinfo,
2839    .fill_link_info = bpf_raw_tp_link_fill_link_info,
2840};
2841
2842#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
2843
2844static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
2845{
2846    struct bpf_link_primer link_primer;
2847    struct bpf_raw_tp_link *link;
2848    struct bpf_raw_event_map *btp;
2849    struct bpf_prog *prog;
2850    const char *tp_name;
2851    char buf[128];
2852    int err;
2853
2854    if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) {
2855        return -EINVAL;
2856    }
2857
2858    prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
2859    if (IS_ERR(prog)) {
2860        return PTR_ERR(prog);
2861    }
2862
2863    switch (prog->type) {
2864        case BPF_PROG_TYPE_TRACING:
2865        case BPF_PROG_TYPE_EXT:
2866        case BPF_PROG_TYPE_LSM:
2867            if (attr->raw_tracepoint.name) {
2868                /* The attach point for this category of programs
2869                 * should be specified via btf_id during program load.
2870                 */
2871                err = -EINVAL;
2872                goto out_put_prog;
2873            }
2874            if (prog->type == BPF_PROG_TYPE_TRACING && prog->expected_attach_type == BPF_TRACE_RAW_TP) {
2875                tp_name = prog->aux->attach_func_name;
2876                break;
2877            }
2878            err = bpf_tracing_prog_attach(prog, 0, 0);
2879            if (err >= 0) {
2880                return err;
2881            }
2882            goto out_put_prog;
2883        case BPF_PROG_TYPE_RAW_TRACEPOINT:
2884        case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2885            if (strncpy_from_user(buf, u64_to_user_ptr(attr->raw_tracepoint.name), sizeof(buf) - 1) < 0) {
2886                err = -EFAULT;
2887                goto out_put_prog;
2888            }
2889            buf[sizeof(buf) - 1] = 0;
2890            tp_name = buf;
2891            break;
2892        default:
2893            err = -EINVAL;
2894            goto out_put_prog;
2895    }
2896
2897    btp = bpf_get_raw_tracepoint(tp_name);
2898    if (!btp) {
2899        err = -ENOENT;
2900        goto out_put_prog;
2901    }
2902
2903    link = kzalloc(sizeof(*link), GFP_USER);
2904    if (!link) {
2905        err = -ENOMEM;
2906        goto out_put_btp;
2907    }
2908    bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, &bpf_raw_tp_link_lops, prog);
2909    link->btp = btp;
2910
2911    err = bpf_link_prime(&link->link, &link_primer);
2912    if (err) {
2913        kfree(link);
2914        goto out_put_btp;
2915    }
2916
2917    err = bpf_probe_register(link->btp, prog);
2918    if (err) {
2919        bpf_link_cleanup(&link_primer);
2920        goto out_put_btp;
2921    }
2922
2923    return bpf_link_settle(&link_primer);
2924
2925out_put_btp:
2926    bpf_put_raw_tracepoint(btp);
2927out_put_prog:
2928    bpf_prog_put(prog);
2929    return err;
2930}
2931
2932static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, enum bpf_attach_type attach_type)
2933{
2934    switch (prog->type) {
2935        case BPF_PROG_TYPE_CGROUP_SOCK:
2936        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2937        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2938        case BPF_PROG_TYPE_SK_LOOKUP:
2939            return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
2940        case BPF_PROG_TYPE_CGROUP_SKB:
2941            if (!capable(CAP_NET_ADMIN)) {
2942                /* cg-skb progs can be loaded by unpriv user.
2943                 * check permissions at attach time.
2944                 */
2945                return -EPERM;
2946            }
2947            return prog->enforce_expected_attach_type && prog->expected_attach_type != attach_type ? -EINVAL : 0;
2948        default:
2949            return 0;
2950    }
2951}
2952
2953static enum bpf_prog_type attach_type_to_prog_type(enum bpf_attach_type attach_type)
2954{
2955    switch (attach_type) {
2956        case BPF_CGROUP_INET_INGRESS:
2957        case BPF_CGROUP_INET_EGRESS:
2958            return BPF_PROG_TYPE_CGROUP_SKB;
2959        case BPF_CGROUP_INET_SOCK_CREATE:
2960        case BPF_CGROUP_INET_SOCK_RELEASE:
2961        case BPF_CGROUP_INET4_POST_BIND:
2962        case BPF_CGROUP_INET6_POST_BIND:
2963            return BPF_PROG_TYPE_CGROUP_SOCK;
2964        case BPF_CGROUP_INET4_BIND:
2965        case BPF_CGROUP_INET6_BIND:
2966        case BPF_CGROUP_INET4_CONNECT:
2967        case BPF_CGROUP_INET6_CONNECT:
2968        case BPF_CGROUP_INET4_GETPEERNAME:
2969        case BPF_CGROUP_INET6_GETPEERNAME:
2970        case BPF_CGROUP_INET4_GETSOCKNAME:
2971        case BPF_CGROUP_INET6_GETSOCKNAME:
2972        case BPF_CGROUP_UDP4_SENDMSG:
2973        case BPF_CGROUP_UDP6_SENDMSG:
2974        case BPF_CGROUP_UDP4_RECVMSG:
2975        case BPF_CGROUP_UDP6_RECVMSG:
2976            return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
2977        case BPF_CGROUP_SOCK_OPS:
2978            return BPF_PROG_TYPE_SOCK_OPS;
2979        case BPF_CGROUP_DEVICE:
2980            return BPF_PROG_TYPE_CGROUP_DEVICE;
2981        case BPF_SK_MSG_VERDICT:
2982            return BPF_PROG_TYPE_SK_MSG;
2983        case BPF_SK_SKB_STREAM_PARSER:
2984        case BPF_SK_SKB_STREAM_VERDICT:
2985            return BPF_PROG_TYPE_SK_SKB;
2986        case BPF_LIRC_MODE2:
2987            return BPF_PROG_TYPE_LIRC_MODE2;
2988        case BPF_FLOW_DISSECTOR:
2989            return BPF_PROG_TYPE_FLOW_DISSECTOR;
2990        case BPF_CGROUP_SYSCTL:
2991            return BPF_PROG_TYPE_CGROUP_SYSCTL;
2992        case BPF_CGROUP_GETSOCKOPT:
2993        case BPF_CGROUP_SETSOCKOPT:
2994            return BPF_PROG_TYPE_CGROUP_SOCKOPT;
2995        case BPF_TRACE_ITER:
2996            return BPF_PROG_TYPE_TRACING;
2997        case BPF_SK_LOOKUP:
2998            return BPF_PROG_TYPE_SK_LOOKUP;
2999        case BPF_XDP:
3000            return BPF_PROG_TYPE_XDP;
3001        default:
3002            return BPF_PROG_TYPE_UNSPEC;
3003    }
3004}
3005
3006#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
3007
3008#define BPF_F_ATTACH_MASK (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
3009
3010static int bpf_prog_attach(const union bpf_attr *attr)
3011{
3012    enum bpf_prog_type ptype;
3013    struct bpf_prog *prog;
3014    int ret;
3015
3016    if (CHECK_ATTR(BPF_PROG_ATTACH)) {
3017        return -EINVAL;
3018    }
3019
3020    if (attr->attach_flags & ~BPF_F_ATTACH_MASK) {
3021        return -EINVAL;
3022    }
3023
3024    ptype = attach_type_to_prog_type(attr->attach_type);
3025    if (ptype == BPF_PROG_TYPE_UNSPEC) {
3026        return -EINVAL;
3027    }
3028
3029    prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
3030    if (IS_ERR(prog)) {
3031        return PTR_ERR(prog);
3032    }
3033
3034    if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
3035        bpf_prog_put(prog);
3036        return -EINVAL;
3037    }
3038
3039    switch (ptype) {
3040        case BPF_PROG_TYPE_SK_SKB:
3041        case BPF_PROG_TYPE_SK_MSG:
3042            ret = sock_map_get_from_fd(attr, prog);
3043            break;
3044        case BPF_PROG_TYPE_LIRC_MODE2:
3045            ret = lirc_prog_attach(attr, prog);
3046            break;
3047        case BPF_PROG_TYPE_FLOW_DISSECTOR:
3048            ret = netns_bpf_prog_attach(attr, prog);
3049            break;
3050        case BPF_PROG_TYPE_CGROUP_DEVICE:
3051        case BPF_PROG_TYPE_CGROUP_SKB:
3052        case BPF_PROG_TYPE_CGROUP_SOCK:
3053        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3054        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3055        case BPF_PROG_TYPE_CGROUP_SYSCTL:
3056        case BPF_PROG_TYPE_SOCK_OPS:
3057            ret = cgroup_bpf_prog_attach(attr, ptype, prog);
3058            break;
3059        default:
3060            ret = -EINVAL;
3061    }
3062
3063    if (ret) {
3064        bpf_prog_put(prog);
3065    }
3066    return ret;
3067}
3068
3069#define BPF_PROG_DETACH_LAST_FIELD attach_type
3070
3071static int bpf_prog_detach(const union bpf_attr *attr)
3072{
3073    enum bpf_prog_type ptype;
3074
3075    if (CHECK_ATTR(BPF_PROG_DETACH)) {
3076        return -EINVAL;
3077    }
3078
3079    ptype = attach_type_to_prog_type(attr->attach_type);
3080
3081    switch (ptype) {
3082        case BPF_PROG_TYPE_SK_MSG:
3083        case BPF_PROG_TYPE_SK_SKB:
3084            return sock_map_prog_detach(attr, ptype);
3085        case BPF_PROG_TYPE_LIRC_MODE2:
3086            return lirc_prog_detach(attr);
3087        case BPF_PROG_TYPE_FLOW_DISSECTOR:
3088            return netns_bpf_prog_detach(attr, ptype);
3089        case BPF_PROG_TYPE_CGROUP_DEVICE:
3090        case BPF_PROG_TYPE_CGROUP_SKB:
3091        case BPF_PROG_TYPE_CGROUP_SOCK:
3092        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3093        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3094        case BPF_PROG_TYPE_CGROUP_SYSCTL:
3095        case BPF_PROG_TYPE_SOCK_OPS:
3096            return cgroup_bpf_prog_detach(attr, ptype);
3097        default:
3098            return -EINVAL;
3099    }
3100}
3101
3102#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
3103
3104static int bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
3105{
3106    if (!capable(CAP_NET_ADMIN)) {
3107        return -EPERM;
3108    }
3109    if (CHECK_ATTR(BPF_PROG_QUERY)) {
3110        return -EINVAL;
3111    }
3112    if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) {
3113        return -EINVAL;
3114    }
3115
3116    switch (attr->query.attach_type) {
3117        case BPF_CGROUP_INET_INGRESS:
3118        case BPF_CGROUP_INET_EGRESS:
3119        case BPF_CGROUP_INET_SOCK_CREATE:
3120        case BPF_CGROUP_INET_SOCK_RELEASE:
3121        case BPF_CGROUP_INET4_BIND:
3122        case BPF_CGROUP_INET6_BIND:
3123        case BPF_CGROUP_INET4_POST_BIND:
3124        case BPF_CGROUP_INET6_POST_BIND:
3125        case BPF_CGROUP_INET4_CONNECT:
3126        case BPF_CGROUP_INET6_CONNECT:
3127        case BPF_CGROUP_INET4_GETPEERNAME:
3128        case BPF_CGROUP_INET6_GETPEERNAME:
3129        case BPF_CGROUP_INET4_GETSOCKNAME:
3130        case BPF_CGROUP_INET6_GETSOCKNAME:
3131        case BPF_CGROUP_UDP4_SENDMSG:
3132        case BPF_CGROUP_UDP6_SENDMSG:
3133        case BPF_CGROUP_UDP4_RECVMSG:
3134        case BPF_CGROUP_UDP6_RECVMSG:
3135        case BPF_CGROUP_SOCK_OPS:
3136        case BPF_CGROUP_DEVICE:
3137        case BPF_CGROUP_SYSCTL:
3138        case BPF_CGROUP_GETSOCKOPT:
3139        case BPF_CGROUP_SETSOCKOPT:
3140            return cgroup_bpf_prog_query(attr, uattr);
3141        case BPF_LIRC_MODE2:
3142            return lirc_prog_query(attr, uattr);
3143        case BPF_FLOW_DISSECTOR:
3144        case BPF_SK_LOOKUP:
3145            return netns_bpf_prog_query(attr, uattr);
3146        default:
3147            return -EINVAL;
3148    }
3149}
3150
3151#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
3152
3153static int bpf_prog_test_run(const union bpf_attr *attr, union bpf_attr __user *uattr)
3154{
3155    struct bpf_prog *prog;
3156    int ret = -ENOTSUPP;
3157
3158    if (CHECK_ATTR(BPF_PROG_TEST_RUN)) {
3159        return -EINVAL;
3160    }
3161
3162    if ((attr->test.ctx_size_in && !attr->test.ctx_in) || (!attr->test.ctx_size_in && attr->test.ctx_in)) {
3163        return -EINVAL;
3164    }
3165
3166    if ((attr->test.ctx_size_out && !attr->test.ctx_out) || (!attr->test.ctx_size_out && attr->test.ctx_out)) {
3167        return -EINVAL;
3168    }
3169
3170    prog = bpf_prog_get(attr->test.prog_fd);
3171    if (IS_ERR(prog)) {
3172        return PTR_ERR(prog);
3173    }
3174
3175    if (prog->aux->ops->test_run) {
3176        ret = prog->aux->ops->test_run(prog, attr, uattr);
3177    }
3178
3179    bpf_prog_put(prog);
3180    return ret;
3181}
3182
3183#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
3184
3185static int bpf_obj_get_next_id(const union bpf_attr *attr, union bpf_attr __user *uattr, struct idr *idr,
3186                               spinlock_t *lock)
3187{
3188    u32 next_id = attr->start_id;
3189    int err = 0;
3190
3191    if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) {
3192        return -EINVAL;
3193    }
3194
3195    if (!capable(CAP_SYS_ADMIN)) {
3196        return -EPERM;
3197    }
3198
3199    next_id++;
3200    spin_lock_bh(lock);
3201    if (!idr_get_next(idr, &next_id)) {
3202        err = -ENOENT;
3203    }
3204    spin_unlock_bh(lock);
3205
3206    if (!err) {
3207        err = put_user(next_id, &uattr->next_id);
3208    }
3209
3210    return err;
3211}
3212
3213struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
3214{
3215    struct bpf_map *map;
3216
3217    spin_lock_bh(&map_idr_lock);
3218
3219    while (1) {
3220        map = idr_get_next(&map_idr, id);
3221        if (map) {
3222            map = _bpf_map_inc_not_zero(map, false);
3223            if (IS_ERR(map)) {
3224                (*id)++;
3225                continue;
3226            }
3227        }
3228        break;
3229    }
3230    spin_unlock_bh(&map_idr_lock);
3231
3232    return map;
3233}
3234
3235struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id)
3236{
3237    struct bpf_prog *prog;
3238
3239    spin_lock_bh(&prog_idr_lock);
3240    while (1) {
3241        prog = idr_get_next(&prog_idr, id);
3242        if (prog) {
3243            prog = bpf_prog_inc_not_zero(prog);
3244            if (IS_ERR(prog)) {
3245                (*id)++;
3246                continue;
3247            }
3248        }
3249        break;
3250    }
3251    spin_unlock_bh(&prog_idr_lock);
3252
3253    return prog;
3254}
3255
3256#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
3257
3258struct bpf_prog *bpf_prog_by_id(u32 id)
3259{
3260    struct bpf_prog *prog;
3261
3262    if (!id) {
3263        return ERR_PTR(-ENOENT);
3264    }
3265
3266    spin_lock_bh(&prog_idr_lock);
3267    prog = idr_find(&prog_idr, id);
3268    if (prog) {
3269        prog = bpf_prog_inc_not_zero(prog);
3270    } else {
3271        prog = ERR_PTR(-ENOENT);
3272    }
3273    spin_unlock_bh(&prog_idr_lock);
3274    return prog;
3275}
3276
3277static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
3278{
3279    struct bpf_prog *prog;
3280    u32 id = attr->prog_id;
3281    int fd;
3282
3283    if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) {
3284        return -EINVAL;
3285    }
3286
3287    if (!capable(CAP_SYS_ADMIN)) {
3288        return -EPERM;
3289    }
3290
3291    prog = bpf_prog_by_id(id);
3292    if (IS_ERR(prog)) {
3293        return PTR_ERR(prog);
3294    }
3295
3296    fd = bpf_prog_new_fd(prog);
3297    if (fd < 0) {
3298        bpf_prog_put(prog);
3299    }
3300
3301    return fd;
3302}
3303
3304#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
3305
3306static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
3307{
3308    struct bpf_map *map;
3309    u32 id = attr->map_id;
3310    int f_flags;
3311    int fd;
3312
3313    if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || attr->open_flags & ~BPF_OBJ_FLAG_MASK) {
3314        return -EINVAL;
3315    }
3316
3317    if (!capable(CAP_SYS_ADMIN)) {
3318        return -EPERM;
3319    }
3320
3321    f_flags = bpf_get_file_flag(attr->open_flags);
3322    if (f_flags < 0) {
3323        return f_flags;
3324    }
3325
3326    spin_lock_bh(&map_idr_lock);
3327    map = idr_find(&map_idr, id);
3328    if (map) {
3329        map = _bpf_map_inc_not_zero(map, true);
3330    } else {
3331        map = ERR_PTR(-ENOENT);
3332    }
3333    spin_unlock_bh(&map_idr_lock);
3334
3335    if (IS_ERR(map)) {
3336        return PTR_ERR(map);
3337    }
3338
3339    fd = bpf_map_new_fd(map, f_flags);
3340    if (fd < 0) {
3341        bpf_map_put_with_uref(map);
3342    }
3343
3344    return fd;
3345}
3346
3347static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, unsigned long addr, u32 *off, u32 *type)
3348{
3349    const struct bpf_map *map;
3350    int i;
3351
3352    mutex_lock(&prog->aux->used_maps_mutex);
3353    for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
3354        map = prog->aux->used_maps[i];
3355        if (map == (void *)addr) {
3356            *type = BPF_PSEUDO_MAP_FD;
3357            goto out;
3358        }
3359        if (!map->ops->map_direct_value_meta) {
3360            continue;
3361        }
3362        if (!map->ops->map_direct_value_meta(map, addr, off)) {
3363            *type = BPF_PSEUDO_MAP_VALUE;
3364            goto out;
3365        }
3366    }
3367    map = NULL;
3368
3369out:
3370    mutex_unlock(&prog->aux->used_maps_mutex);
3371    return map;
3372}
3373
3374static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, const struct cred *f_cred)
3375{
3376    const struct bpf_map *map;
3377    struct bpf_insn *insns;
3378    u32 off, type;
3379    u64 imm;
3380    u8 code;
3381    int i;
3382
3383    insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), GFP_USER);
3384    if (!insns) {
3385        return insns;
3386    }
3387
3388    for (i = 0; i < prog->len; i++) {
3389        code = insns[i].code;
3390
3391        if (code == (BPF_JMP | BPF_TAIL_CALL)) {
3392            insns[i].code = BPF_JMP | BPF_CALL;
3393            insns[i].imm = BPF_FUNC_tail_call;
3394            /* fall-through */
3395        }
3396        if (code == (BPF_JMP | BPF_CALL) || code == (BPF_JMP | BPF_CALL_ARGS)) {
3397            if (code == (BPF_JMP | BPF_CALL_ARGS)) {
3398                insns[i].code = BPF_JMP | BPF_CALL;
3399            }
3400            if (!bpf_dump_raw_ok(f_cred)) {
3401                insns[i].imm = 0;
3402            }
3403            continue;
3404        }
3405        if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) {
3406            insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM;
3407            continue;
3408        }
3409
3410        if (code != (BPF_LD | BPF_IMM | BPF_DW)) {
3411            continue;
3412        }
3413
3414        imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
3415        map = bpf_map_from_imm(prog, imm, &off, &type);
3416        if (map) {
3417            insns[i].src_reg = type;
3418            insns[i].imm = map->id;
3419            insns[i + 1].imm = off;
3420            continue;
3421        }
3422    }
3423
3424    return insns;
3425}
3426
3427static int set_info_rec_size(struct bpf_prog_info *info)
3428{
3429    /*
3430     * Ensure info.*_rec_size is the same as kernel expected size
3431     *
3432     * or
3433     *
3434     * Only allow zero *_rec_size if both _rec_size and _cnt are
3435     * zero.  In this case, the kernel will set the expected
3436     * _rec_size back to the info.
3437     */
3438
3439    if ((info->nr_func_info || info->func_info_rec_size) && info->func_info_rec_size != sizeof(struct bpf_func_info)) {
3440        return -EINVAL;
3441    }
3442
3443    if ((info->nr_line_info || info->line_info_rec_size) && info->line_info_rec_size != sizeof(struct bpf_line_info)) {
3444        return -EINVAL;
3445    }
3446
3447    if ((info->nr_jited_line_info || info->jited_line_info_rec_size) &&
3448        info->jited_line_info_rec_size != sizeof(__u64)) {
3449        return -EINVAL;
3450    }
3451
3452    info->func_info_rec_size = sizeof(struct bpf_func_info);
3453    info->line_info_rec_size = sizeof(struct bpf_line_info);
3454    info->jited_line_info_rec_size = sizeof(__u64);
3455
3456    return 0;
3457}
3458
3459static int bpf_prog_get_info_by_fd(struct file *file, struct bpf_prog *prog, const union bpf_attr *attr,
3460                                   union bpf_attr __user *uattr)
3461{
3462    struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3463    struct bpf_prog_info info;
3464    u32 info_len = attr->info.info_len;
3465    struct bpf_prog_stats stats;
3466    char __user *uinsns;
3467    u32 ulen;
3468    int err;
3469
3470    err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
3471    if (err) {
3472        return err;
3473    }
3474    info_len = min_t(u32, sizeof(info), info_len);
3475
3476    memset(&info, 0, sizeof(info));
3477    if (copy_from_user(&info, uinfo, info_len)) {
3478        return -EFAULT;
3479    }
3480
3481    info.type = prog->type;
3482    info.id = prog->aux->id;
3483    info.load_time = prog->aux->load_time;
3484    info.created_by_uid = from_kuid_munged(current_user_ns(), prog->aux->user->uid);
3485    info.gpl_compatible = prog->gpl_compatible;
3486
3487    memcpy(info.tag, prog->tag, sizeof(prog->tag));
3488    memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
3489
3490    mutex_lock(&prog->aux->used_maps_mutex);
3491    ulen = info.nr_map_ids;
3492    info.nr_map_ids = prog->aux->used_map_cnt;
3493    ulen = min_t(u32, info.nr_map_ids, ulen);
3494    if (ulen) {
3495        u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
3496        u32 i;
3497
3498        for (i = 0; i < ulen; i++) {
3499            if (put_user(prog->aux->used_maps[i]->id, &user_map_ids[i])) {
3500                mutex_unlock(&prog->aux->used_maps_mutex);
3501                return -EFAULT;
3502            }
3503        }
3504    }
3505    mutex_unlock(&prog->aux->used_maps_mutex);
3506
3507    err = set_info_rec_size(&info);
3508    if (err) {
3509        return err;
3510    }
3511
3512    bpf_prog_get_stats(prog, &stats);
3513    info.run_time_ns = stats.nsecs;
3514    info.run_cnt = stats.cnt;
3515
3516    if (!bpf_capable()) {
3517        info.jited_prog_len = 0;
3518        info.xlated_prog_len = 0;
3519        info.nr_jited_ksyms = 0;
3520        info.nr_jited_func_lens = 0;
3521        info.nr_func_info = 0;
3522        info.nr_line_info = 0;
3523        info.nr_jited_line_info = 0;
3524        goto done;
3525    }
3526
3527    ulen = info.xlated_prog_len;
3528    info.xlated_prog_len = bpf_prog_insn_size(prog);
3529    if (info.xlated_prog_len && ulen) {
3530        struct bpf_insn *insns_sanitized;
3531        bool fault;
3532
3533        if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
3534            info.xlated_prog_insns = 0;
3535            goto done;
3536        }
3537        insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
3538        if (!insns_sanitized) {
3539            return -ENOMEM;
3540        }
3541        uinsns = u64_to_user_ptr(info.xlated_prog_insns);
3542        ulen = min_t(u32, info.xlated_prog_len, ulen);
3543        fault = copy_to_user(uinsns, insns_sanitized, ulen);
3544        kfree(insns_sanitized);
3545        if (fault) {
3546            return -EFAULT;
3547        }
3548    }
3549
3550    if (bpf_prog_is_dev_bound(prog->aux)) {
3551        err = bpf_prog_offload_info_fill(&info, prog);
3552        if (err) {
3553            return err;
3554        }
3555        goto done;
3556    }
3557
3558    /* NOTE: the following code is supposed to be skipped for offload.
3559     * bpf_prog_offload_info_fill() is the place to fill similar fields
3560     * for offload.
3561     */
3562    ulen = info.jited_prog_len;
3563    if (prog->aux->func_cnt) {
3564        u32 i;
3565
3566        info.jited_prog_len = 0;
3567        for (i = 0; i < prog->aux->func_cnt; i++) {
3568            info.jited_prog_len += prog->aux->func[i]->jited_len;
3569        }
3570    } else {
3571        info.jited_prog_len = prog->jited_len;
3572    }
3573
3574    if (info.jited_prog_len && ulen) {
3575        if (bpf_dump_raw_ok(file->f_cred)) {
3576            uinsns = u64_to_user_ptr(info.jited_prog_insns);
3577            ulen = min_t(u32, info.jited_prog_len, ulen);
3578
3579            /* for multi-function programs, copy the JITed
3580             * instructions for all the functions
3581             */
3582            if (prog->aux->func_cnt) {
3583                u32 len, free, i;
3584                u8 *img;
3585
3586                free = ulen;
3587                for (i = 0; i < prog->aux->func_cnt; i++) {
3588                    len = prog->aux->func[i]->jited_len;
3589                    len = min_t(u32, len, free);
3590                    img = (u8 *)prog->aux->func[i]->bpf_func;
3591                    if (copy_to_user(uinsns, img, len)) {
3592                        return -EFAULT;
3593                    }
3594                    uinsns += len;
3595                    free -= len;
3596                    if (!free) {
3597                        break;
3598                    }
3599                }
3600            } else {
3601                if (copy_to_user(uinsns, prog->bpf_func, ulen)) {
3602                    return -EFAULT;
3603                }
3604            }
3605        } else {
3606            info.jited_prog_insns = 0;
3607        }
3608    }
3609
3610    ulen = info.nr_jited_ksyms;
3611    info.nr_jited_ksyms = prog->aux->func_cnt ?: 1;
3612    if (ulen) {
3613        if (bpf_dump_raw_ok(file->f_cred)) {
3614            unsigned long ksym_addr;
3615            u64 __user *user_ksyms;
3616            u32 i;
3617
3618            /* copy the address of the kernel symbol
3619             * corresponding to each function
3620             */
3621            ulen = min_t(u32, info.nr_jited_ksyms, ulen);
3622            user_ksyms = u64_to_user_ptr(info.jited_ksyms);
3623            if (prog->aux->func_cnt) {
3624                for (i = 0; i < ulen; i++) {
3625                    ksym_addr = (unsigned long)prog->aux->func[i]->bpf_func;
3626                    if (put_user((u64)ksym_addr, &user_ksyms[i])) {
3627                        return -EFAULT;
3628                    }
3629                }
3630            } else {
3631                ksym_addr = (unsigned long)prog->bpf_func;
3632                if (put_user((u64)ksym_addr, &user_ksyms[0])) {
3633                    return -EFAULT;
3634                }
3635            }
3636        } else {
3637            info.jited_ksyms = 0;
3638        }
3639    }
3640
3641    ulen = info.nr_jited_func_lens;
3642    info.nr_jited_func_lens = prog->aux->func_cnt ?: 1;
3643    if (ulen) {
3644        if (bpf_dump_raw_ok(file->f_cred)) {
3645            u32 __user *user_lens;
3646            u32 func_len, i;
3647
3648            /* copy the JITed image lengths for each function */
3649            ulen = min_t(u32, info.nr_jited_func_lens, ulen);
3650            user_lens = u64_to_user_ptr(info.jited_func_lens);
3651            if (prog->aux->func_cnt) {
3652                for (i = 0; i < ulen; i++) {
3653                    func_len = prog->aux->func[i]->jited_len;
3654                    if (put_user(func_len, &user_lens[i])) {
3655                        return -EFAULT;
3656                    }
3657                }
3658            } else {
3659                func_len = prog->jited_len;
3660                if (put_user(func_len, &user_lens[0])) {
3661                    return -EFAULT;
3662                }
3663            }
3664        } else {
3665            info.jited_func_lens = 0;
3666        }
3667    }
3668
3669    if (prog->aux->btf) {
3670        info.btf_id = btf_id(prog->aux->btf);
3671    }
3672
3673    ulen = info.nr_func_info;
3674    info.nr_func_info = prog->aux->func_info_cnt;
3675    if (info.nr_func_info && ulen) {
3676        char __user *user_finfo;
3677
3678        user_finfo = u64_to_user_ptr(info.func_info);
3679        ulen = min_t(u32, info.nr_func_info, ulen);
3680        if (copy_to_user(user_finfo, prog->aux->func_info, info.func_info_rec_size * ulen)) {
3681            return -EFAULT;
3682        }
3683    }
3684
3685    ulen = info.nr_line_info;
3686    info.nr_line_info = prog->aux->nr_linfo;
3687    if (info.nr_line_info && ulen) {
3688        __u8 __user *user_linfo;
3689
3690        user_linfo = u64_to_user_ptr(info.line_info);
3691        ulen = min_t(u32, info.nr_line_info, ulen);
3692        if (copy_to_user(user_linfo, prog->aux->linfo, info.line_info_rec_size * ulen)) {
3693            return -EFAULT;
3694        }
3695    }
3696
3697    ulen = info.nr_jited_line_info;
3698    if (prog->aux->jited_linfo) {
3699        info.nr_jited_line_info = prog->aux->nr_linfo;
3700    } else {
3701        info.nr_jited_line_info = 0;
3702    }
3703    if (info.nr_jited_line_info && ulen) {
3704        if (bpf_dump_raw_ok(file->f_cred)) {
3705            __u64 __user *user_linfo;
3706            u32 i;
3707
3708            user_linfo = u64_to_user_ptr(info.jited_line_info);
3709            ulen = min_t(u32, info.nr_jited_line_info, ulen);
3710            for (i = 0; i < ulen; i++) {
3711                if (put_user((__u64)(long)prog->aux->jited_linfo[i], &user_linfo[i])) {
3712                    return -EFAULT;
3713                }
3714            }
3715        } else {
3716            info.jited_line_info = 0;
3717        }
3718    }
3719
3720    ulen = info.nr_prog_tags;
3721    info.nr_prog_tags = prog->aux->func_cnt ?: 1;
3722    if (ulen) {
3723        __u8 __user(*user_prog_tags)[BPF_TAG_SIZE];
3724        u32 i;
3725
3726        user_prog_tags = u64_to_user_ptr(info.prog_tags);
3727        ulen = min_t(u32, info.nr_prog_tags, ulen);
3728        if (prog->aux->func_cnt) {
3729            for (i = 0; i < ulen; i++) {
3730                if (copy_to_user(user_prog_tags[i], prog->aux->func[i]->tag, BPF_TAG_SIZE)) {
3731                    return -EFAULT;
3732                }
3733            }
3734        } else {
3735            if (copy_to_user(user_prog_tags[0], prog->tag, BPF_TAG_SIZE)) {
3736                return -EFAULT;
3737            }
3738        }
3739    }
3740
3741done:
3742    if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) {
3743        return -EFAULT;
3744    }
3745
3746    return 0;
3747}
3748
3749static int bpf_map_get_info_by_fd(struct file *file, struct bpf_map *map, const union bpf_attr *attr,
3750                                  union bpf_attr __user *uattr)
3751{
3752    struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3753    struct bpf_map_info info;
3754    u32 info_len = attr->info.info_len;
3755    int err;
3756
3757    err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
3758    if (err) {
3759        return err;
3760    }
3761    info_len = min_t(u32, sizeof(info), info_len);
3762
3763    memset(&info, 0, sizeof(info));
3764    info.type = map->map_type;
3765    info.id = map->id;
3766    info.key_size = map->key_size;
3767    info.value_size = map->value_size;
3768    info.max_entries = map->max_entries;
3769    info.map_flags = map->map_flags;
3770    memcpy(info.name, map->name, sizeof(map->name));
3771
3772    if (map->btf) {
3773        info.btf_id = btf_id(map->btf);
3774        info.btf_key_type_id = map->btf_key_type_id;
3775        info.btf_value_type_id = map->btf_value_type_id;
3776    }
3777    info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
3778
3779    if (bpf_map_is_dev_bound(map)) {
3780        err = bpf_map_offload_info_fill(&info, map);
3781        if (err) {
3782            return err;
3783        }
3784    }
3785
3786    if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) {
3787        return -EFAULT;
3788    }
3789
3790    return 0;
3791}
3792
3793static int bpf_btf_get_info_by_fd(struct file *file, struct btf *btf, const union bpf_attr *attr,
3794                                  union bpf_attr __user *uattr)
3795{
3796    struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3797    u32 info_len = attr->info.info_len;
3798    int err;
3799
3800    err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
3801    if (err) {
3802        return err;
3803    }
3804
3805    return btf_get_info_by_fd(btf, attr, uattr);
3806}
3807
3808static int bpf_link_get_info_by_fd(struct file *file, struct bpf_link *link, const union bpf_attr *attr,
3809                                   union bpf_attr __user *uattr)
3810{
3811    struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3812    struct bpf_link_info info;
3813    u32 info_len = attr->info.info_len;
3814    int err;
3815
3816    err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
3817    if (err) {
3818        return err;
3819    }
3820    info_len = min_t(u32, sizeof(info), info_len);
3821
3822    memset(&info, 0, sizeof(info));
3823    if (copy_from_user(&info, uinfo, info_len)) {
3824        return -EFAULT;
3825    }
3826
3827    info.type = link->type;
3828    info.id = link->id;
3829    info.prog_id = link->prog->aux->id;
3830
3831    if (link->ops->fill_link_info) {
3832        err = link->ops->fill_link_info(link, &info);
3833        if (err) {
3834            return err;
3835        }
3836    }
3837
3838    if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) {
3839        return -EFAULT;
3840    }
3841
3842    return 0;
3843}
3844
3845#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
3846
3847static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, union bpf_attr __user *uattr)
3848{
3849    int ufd = attr->info.bpf_fd;
3850    struct fd f;
3851    int err;
3852
3853    if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) {
3854        return -EINVAL;
3855    }
3856
3857    f = fdget(ufd);
3858    if (!f.file) {
3859        return -EBADFD;
3860    }
3861
3862    if (f.file->f_op == &bpf_prog_fops) {
3863        err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
3864    } else if (f.file->f_op == &bpf_map_fops) {
3865        err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
3866    } else if (f.file->f_op == &btf_fops) {
3867        err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
3868    } else if (f.file->f_op == &bpf_link_fops) {
3869        err = bpf_link_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
3870    } else {
3871        err = -EINVAL;
3872    }
3873
3874    fdput(f);
3875    return err;
3876}
3877
3878#define BPF_BTF_LOAD_LAST_FIELD btf_log_level
3879
3880static int bpf_btf_load(const union bpf_attr *attr)
3881{
3882    if (CHECK_ATTR(BPF_BTF_LOAD)) {
3883        return -EINVAL;
3884    }
3885
3886    if (!bpf_capable()) {
3887        return -EPERM;
3888    }
3889
3890    return btf_new_fd(attr);
3891}
3892
3893#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
3894
3895static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
3896{
3897    if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) {
3898        return -EINVAL;
3899    }
3900
3901    if (!capable(CAP_SYS_ADMIN)) {
3902        return -EPERM;
3903    }
3904
3905    return btf_get_fd_by_id(attr->btf_id);
3906}
3907
3908static int bpf_task_fd_query_copy(const union bpf_attr *attr, union bpf_attr __user *uattr, u32 prog_id, u32 fd_type,
3909                                  const char *buf, u64 probe_offset, u64 probe_addr)
3910{
3911    char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
3912    u32 len = buf ? strlen(buf) : 0, input_len;
3913    int err = 0;
3914
3915    if (put_user(len, &uattr->task_fd_query.buf_len)) {
3916        return -EFAULT;
3917    }
3918    input_len = attr->task_fd_query.buf_len;
3919    if (input_len && ubuf) {
3920        if (!len) {
3921            /* nothing to copy, just make ubuf NULL terminated */
3922            char zero = '\0';
3923
3924            if (put_user(zero, ubuf)) {
3925                return -EFAULT;
3926            }
3927        } else if (input_len >= len + 1) {
3928            /* ubuf can hold the string with NULL terminator */
3929            if (copy_to_user(ubuf, buf, len + 1)) {
3930                return -EFAULT;
3931            }
3932        } else {
3933            /* ubuf cannot hold the string with NULL terminator,
3934             * do a partial copy with NULL terminator.
3935             */
3936            char zero = '\0';
3937
3938            err = -ENOSPC;
3939            if (copy_to_user(ubuf, buf, input_len - 1)) {
3940                return -EFAULT;
3941            }
3942            if (put_user(zero, ubuf + input_len - 1)) {
3943                return -EFAULT;
3944            }
3945        }
3946    }
3947
3948    if (put_user(prog_id, &uattr->task_fd_query.prog_id) || put_user(fd_type, &uattr->task_fd_query.fd_type) ||
3949        put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
3950        put_user(probe_addr, &uattr->task_fd_query.probe_addr)) {
3951        return -EFAULT;
3952    }
3953
3954    return err;
3955}
3956
3957#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
3958
3959static int bpf_task_fd_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
3960{
3961    pid_t pid = attr->task_fd_query.pid;
3962    u32 fd = attr->task_fd_query.fd;
3963    const struct perf_event *event;
3964    struct files_struct *files;
3965    struct task_struct *task;
3966    struct file *file;
3967    int err;
3968
3969    if (CHECK_ATTR(BPF_TASK_FD_QUERY)) {
3970        return -EINVAL;
3971    }
3972
3973    if (!capable(CAP_SYS_ADMIN)) {
3974        return -EPERM;
3975    }
3976
3977    if (attr->task_fd_query.flags != 0) {
3978        return -EINVAL;
3979    }
3980
3981    task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
3982    if (!task) {
3983        return -ENOENT;
3984    }
3985
3986    files = get_files_struct(task);
3987    put_task_struct(task);
3988    if (!files) {
3989        return -ENOENT;
3990    }
3991
3992    err = 0;
3993    spin_lock(&files->file_lock);
3994    file = fcheck_files(files, fd);
3995    if (!file) {
3996        err = -EBADF;
3997    } else {
3998        get_file(file);
3999    }
4000    spin_unlock(&files->file_lock);
4001    put_files_struct(files);
4002
4003    if (err) {
4004        goto out;
4005    }
4006
4007    if (file->f_op == &bpf_link_fops) {
4008        struct bpf_link *link = file->private_data;
4009
4010        if (link->ops == &bpf_raw_tp_link_lops) {
4011            struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link);
4012            struct bpf_raw_event_map *btp = raw_tp->btp;
4013
4014            err = bpf_task_fd_query_copy(attr, uattr, raw_tp->link.prog->aux->id, BPF_FD_TYPE_RAW_TRACEPOINT,
4015                                         btp->tp->name, 0, 0);
4016            goto put_file;
4017        }
4018        goto out_not_supp;
4019    }
4020
4021    event = perf_get_event(file);
4022    if (!IS_ERR(event)) {
4023        u64 probe_offset, probe_addr;
4024        u32 prog_id, fd_type;
4025        const char *buf;
4026
4027        err = bpf_get_perf_event_info(event, &prog_id, &fd_type, &buf, &probe_offset, &probe_addr);
4028        if (!err) {
4029            err = bpf_task_fd_query_copy(attr, uattr, prog_id, fd_type, buf, probe_offset, probe_addr);
4030        }
4031        goto put_file;
4032    }
4033
4034out_not_supp:
4035    err = -ENOTSUPP;
4036put_file:
4037    fput(file);
4038out:
4039    return err;
4040}
4041
4042#define BPF_MAP_BATCH_LAST_FIELD batch.flags
4043
4044#define BPF_DO_BATCH(fn)                                                                                               \
4045    do {                                                                                                               \
4046        if (!(fn)) {                                                                                                   \
4047            err = -ENOTSUPP;                                                                                           \
4048            goto err_put;                                                                                              \
4049        }                                                                                                              \
4050        err = fn(map, attr, uattr);                                                                                    \
4051    } while (0)
4052
4053static int bpf_map_do_batch(const union bpf_attr *attr, union bpf_attr __user *uattr, int cmd)
4054{
4055    struct bpf_map *map;
4056    int err, ufd;
4057    struct fd f;
4058
4059    if (CHECK_ATTR(BPF_MAP_BATCH)) {
4060        return -EINVAL;
4061    }
4062
4063    ufd = attr->batch.map_fd;
4064    f = fdget(ufd);
4065    map = __bpf_map_get(f);
4066    if (IS_ERR(map)) {
4067        return PTR_ERR(map);
4068    }
4069
4070    if ((cmd == BPF_MAP_LOOKUP_BATCH || cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
4071        !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
4072        err = -EPERM;
4073        goto err_put;
4074    }
4075
4076    if (cmd != BPF_MAP_LOOKUP_BATCH && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
4077        err = -EPERM;
4078        goto err_put;
4079    }
4080
4081    if (cmd == BPF_MAP_LOOKUP_BATCH) {
4082        BPF_DO_BATCH(map->ops->map_lookup_batch);
4083    } else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) {
4084        BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
4085    } else if (cmd == BPF_MAP_UPDATE_BATCH) {
4086        BPF_DO_BATCH(map->ops->map_update_batch);
4087    } else {
4088        BPF_DO_BATCH(map->ops->map_delete_batch);
4089    }
4090
4091err_put:
4092    fdput(f);
4093    return err;
4094}
4095
4096static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
4097{
4098    if (attr->link_create.attach_type != prog->expected_attach_type) {
4099        return -EINVAL;
4100    }
4101
4102    if (prog->expected_attach_type == BPF_TRACE_ITER) {
4103        return bpf_iter_link_attach(attr, prog);
4104    } else if (prog->type == BPF_PROG_TYPE_EXT) {
4105        return bpf_tracing_prog_attach(prog, attr->link_create.target_fd, attr->link_create.target_btf_id);
4106    }
4107    return -EINVAL;
4108}
4109
4110#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
4111static int link_create(union bpf_attr *attr)
4112{
4113    enum bpf_prog_type ptype;
4114    struct bpf_prog *prog;
4115    int ret;
4116
4117    if (CHECK_ATTR(BPF_LINK_CREATE)) {
4118        return -EINVAL;
4119    }
4120
4121    prog = bpf_prog_get(attr->link_create.prog_fd);
4122    if (IS_ERR(prog)) {
4123        return PTR_ERR(prog);
4124    }
4125
4126    ret = bpf_prog_attach_check_attach_type(prog, attr->link_create.attach_type);
4127    if (ret) {
4128        goto out;
4129    }
4130
4131    if (prog->type == BPF_PROG_TYPE_EXT) {
4132        ret = tracing_bpf_link_attach(attr, prog);
4133        goto out;
4134    }
4135
4136    ptype = attach_type_to_prog_type(attr->link_create.attach_type);
4137    if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
4138        ret = -EINVAL;
4139        goto out;
4140    }
4141
4142    switch (ptype) {
4143        case BPF_PROG_TYPE_CGROUP_SKB:
4144        case BPF_PROG_TYPE_CGROUP_SOCK:
4145        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
4146        case BPF_PROG_TYPE_SOCK_OPS:
4147        case BPF_PROG_TYPE_CGROUP_DEVICE:
4148        case BPF_PROG_TYPE_CGROUP_SYSCTL:
4149        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4150            ret = cgroup_bpf_link_attach(attr, prog);
4151            break;
4152        case BPF_PROG_TYPE_TRACING:
4153            ret = tracing_bpf_link_attach(attr, prog);
4154            break;
4155        case BPF_PROG_TYPE_FLOW_DISSECTOR:
4156        case BPF_PROG_TYPE_SK_LOOKUP:
4157            ret = netns_bpf_link_create(attr, prog);
4158            break;
4159#ifdef CONFIG_NET
4160        case BPF_PROG_TYPE_XDP:
4161            ret = bpf_xdp_link_attach(attr, prog);
4162            break;
4163#endif
4164        default:
4165            ret = -EINVAL;
4166    }
4167
4168out:
4169    if (ret < 0) {
4170        bpf_prog_put(prog);
4171    }
4172    return ret;
4173}
4174
4175#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
4176
4177static int link_update(union bpf_attr *attr)
4178{
4179    struct bpf_prog *old_prog = NULL, *new_prog;
4180    struct bpf_link *link;
4181    u32 flags;
4182    int ret;
4183
4184    if (CHECK_ATTR(BPF_LINK_UPDATE)) {
4185        return -EINVAL;
4186    }
4187
4188    flags = attr->link_update.flags;
4189    if (flags & ~BPF_F_REPLACE) {
4190        return -EINVAL;
4191    }
4192
4193    link = bpf_link_get_from_fd(attr->link_update.link_fd);
4194    if (IS_ERR(link)) {
4195        return PTR_ERR(link);
4196    }
4197
4198    new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
4199    if (IS_ERR(new_prog)) {
4200        ret = PTR_ERR(new_prog);
4201        goto out_put_link;
4202    }
4203
4204    if (flags & BPF_F_REPLACE) {
4205        old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
4206        if (IS_ERR(old_prog)) {
4207            ret = PTR_ERR(old_prog);
4208            old_prog = NULL;
4209            goto out_put_progs;
4210        }
4211    } else if (attr->link_update.old_prog_fd) {
4212        ret = -EINVAL;
4213        goto out_put_progs;
4214    }
4215
4216    if (link->ops->update_prog) {
4217        ret = link->ops->update_prog(link, new_prog, old_prog);
4218    } else {
4219        ret = -EINVAL;
4220    }
4221
4222out_put_progs:
4223    if (old_prog) {
4224        bpf_prog_put(old_prog);
4225    }
4226    if (ret) {
4227        bpf_prog_put(new_prog);
4228    }
4229out_put_link:
4230    bpf_link_put(link);
4231    return ret;
4232}
4233
4234#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
4235
4236static int link_detach(union bpf_attr *attr)
4237{
4238    struct bpf_link *link;
4239    int ret;
4240
4241    if (CHECK_ATTR(BPF_LINK_DETACH)) {
4242        return -EINVAL;
4243    }
4244
4245    link = bpf_link_get_from_fd(attr->link_detach.link_fd);
4246    if (IS_ERR(link)) {
4247        return PTR_ERR(link);
4248    }
4249
4250    if (link->ops->detach) {
4251        ret = link->ops->detach(link);
4252    } else {
4253        ret = -EOPNOTSUPP;
4254    }
4255
4256    bpf_link_put(link);
4257    return ret;
4258}
4259
4260static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
4261{
4262    return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
4263}
4264
4265struct bpf_link *bpf_link_by_id(u32 id)
4266{
4267    struct bpf_link *link;
4268
4269    if (!id) {
4270        return ERR_PTR(-ENOENT);
4271    }
4272
4273    spin_lock_bh(&link_idr_lock);
4274    /* before link is "settled", ID is 0, pretend it doesn't exist yet */
4275    link = idr_find(&link_idr, id);
4276    if (link) {
4277        if (link->id) {
4278            link = bpf_link_inc_not_zero(link);
4279        } else {
4280            link = ERR_PTR(-EAGAIN);
4281        }
4282    } else {
4283        link = ERR_PTR(-ENOENT);
4284    }
4285    spin_unlock_bh(&link_idr_lock);
4286    return link;
4287}
4288
4289#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
4290
4291static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
4292{
4293    struct bpf_link *link;
4294    u32 id = attr->link_id;
4295    int fd;
4296
4297    if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) {
4298        return -EINVAL;
4299    }
4300
4301    if (!capable(CAP_SYS_ADMIN)) {
4302        return -EPERM;
4303    }
4304
4305    link = bpf_link_by_id(id);
4306    if (IS_ERR(link)) {
4307        return PTR_ERR(link);
4308    }
4309
4310    fd = bpf_link_new_fd(link);
4311    if (fd < 0) {
4312        bpf_link_put(link);
4313    }
4314
4315    return fd;
4316}
4317
4318DEFINE_MUTEX(bpf_stats_enabled_mutex);
4319
4320static int bpf_stats_release(struct inode *inode, struct file *file)
4321{
4322    mutex_lock(&bpf_stats_enabled_mutex);
4323    static_key_slow_dec(&bpf_stats_enabled_key.key);
4324    mutex_unlock(&bpf_stats_enabled_mutex);
4325    return 0;
4326}
4327
4328static const struct file_operations bpf_stats_fops = {
4329    .release = bpf_stats_release,
4330};
4331
4332static int bpf_enable_runtime_stats(void)
4333{
4334    int fd;
4335
4336    mutex_lock(&bpf_stats_enabled_mutex);
4337
4338    /* Set a very high limit to avoid overflow */
4339    if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 0x2) {
4340        mutex_unlock(&bpf_stats_enabled_mutex);
4341        return -EBUSY;
4342    }
4343
4344    fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
4345    if (fd >= 0) {
4346        static_key_slow_inc(&bpf_stats_enabled_key.key);
4347    }
4348
4349    mutex_unlock(&bpf_stats_enabled_mutex);
4350    return fd;
4351}
4352
4353#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
4354
4355static int bpf_enable_stats(union bpf_attr *attr)
4356{
4357    if (CHECK_ATTR(BPF_ENABLE_STATS)) {
4358        return -EINVAL;
4359    }
4360
4361    if (!capable(CAP_SYS_ADMIN)) {
4362        return -EPERM;
4363    }
4364
4365    switch (attr->enable_stats.type) {
4366        case BPF_STATS_RUN_TIME:
4367            return bpf_enable_runtime_stats();
4368        default:
4369            break;
4370    }
4371    return -EINVAL;
4372}
4373
4374#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
4375
4376static int bpf_iter_create(union bpf_attr *attr)
4377{
4378    struct bpf_link *link;
4379    int err;
4380
4381    if (CHECK_ATTR(BPF_ITER_CREATE)) {
4382        return -EINVAL;
4383    }
4384
4385    if (attr->iter_create.flags) {
4386        return -EINVAL;
4387    }
4388
4389    link = bpf_link_get_from_fd(attr->iter_create.link_fd);
4390    if (IS_ERR(link)) {
4391        return PTR_ERR(link);
4392    }
4393
4394    err = bpf_iter_new_fd(link);
4395    bpf_link_put(link);
4396
4397    return err;
4398}
4399
4400#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
4401
4402static int bpf_prog_bind_map(union bpf_attr *attr)
4403{
4404    struct bpf_prog *prog;
4405    struct bpf_map *map;
4406    struct bpf_map **used_maps_old, **used_maps_new;
4407    int i, ret = 0;
4408
4409    if (CHECK_ATTR(BPF_PROG_BIND_MAP)) {
4410        return -EINVAL;
4411    }
4412
4413    if (attr->prog_bind_map.flags) {
4414        return -EINVAL;
4415    }
4416
4417    prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
4418    if (IS_ERR(prog)) {
4419        return PTR_ERR(prog);
4420    }
4421
4422    map = bpf_map_get(attr->prog_bind_map.map_fd);
4423    if (IS_ERR(map)) {
4424        ret = PTR_ERR(map);
4425        goto out_prog_put;
4426    }
4427
4428    mutex_lock(&prog->aux->used_maps_mutex);
4429
4430    used_maps_old = prog->aux->used_maps;
4431
4432    for (i = 0; i < prog->aux->used_map_cnt; i++) {
4433        if (used_maps_old[i] == map) {
4434            bpf_map_put(map);
4435            goto out_unlock;
4436        }
4437    }
4438
4439    used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, sizeof(used_maps_new[0]), GFP_KERNEL);
4440    if (!used_maps_new) {
4441        ret = -ENOMEM;
4442        goto out_unlock;
4443    }
4444
4445    memcpy(used_maps_new, used_maps_old, sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
4446    used_maps_new[prog->aux->used_map_cnt] = map;
4447
4448    prog->aux->used_map_cnt++;
4449    prog->aux->used_maps = used_maps_new;
4450
4451    kfree(used_maps_old);
4452
4453out_unlock:
4454    mutex_unlock(&prog->aux->used_maps_mutex);
4455
4456    if (ret) {
4457        bpf_map_put(map);
4458    }
4459out_prog_put:
4460    bpf_prog_put(prog);
4461    return ret;
4462}
4463
4464SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
4465{
4466    union bpf_attr attr;
4467    int err;
4468
4469    if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) {
4470        return -EPERM;
4471    }
4472
4473    err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
4474    if (err) {
4475        return err;
4476    }
4477    size = min_t(u32, size, sizeof(attr));
4478
4479    /* copy attributes from user space, may be less than sizeof(bpf_attr) */
4480    memset(&attr, 0, sizeof(attr));
4481    if (copy_from_user(&attr, uattr, size) != 0) {
4482        return -EFAULT;
4483    }
4484
4485    err = security_bpf(cmd, &attr, size);
4486    if (err < 0) {
4487        return err;
4488    }
4489
4490    switch (cmd) {
4491        case BPF_MAP_CREATE:
4492            err = map_create(&attr);
4493            break;
4494        case BPF_MAP_LOOKUP_ELEM:
4495            err = map_lookup_elem(&attr);
4496            break;
4497        case BPF_MAP_UPDATE_ELEM:
4498            err = map_update_elem(&attr);
4499            break;
4500        case BPF_MAP_DELETE_ELEM:
4501            err = map_delete_elem(&attr);
4502            break;
4503        case BPF_MAP_GET_NEXT_KEY:
4504            err = map_get_next_key(&attr);
4505            break;
4506        case BPF_MAP_FREEZE:
4507            err = map_freeze(&attr);
4508            break;
4509        case BPF_PROG_LOAD:
4510            err = bpf_prog_load(&attr, uattr);
4511            break;
4512        case BPF_OBJ_PIN:
4513            err = bpf_obj_pin(&attr);
4514            break;
4515        case BPF_OBJ_GET:
4516            err = bpf_obj_get(&attr);
4517            break;
4518        case BPF_PROG_ATTACH:
4519            err = bpf_prog_attach(&attr);
4520            break;
4521        case BPF_PROG_DETACH:
4522            err = bpf_prog_detach(&attr);
4523            break;
4524        case BPF_PROG_QUERY:
4525            err = bpf_prog_query(&attr, uattr);
4526            break;
4527        case BPF_PROG_TEST_RUN:
4528            err = bpf_prog_test_run(&attr, uattr);
4529            break;
4530        case BPF_PROG_GET_NEXT_ID:
4531            err = bpf_obj_get_next_id(&attr, uattr, &prog_idr, &prog_idr_lock);
4532            break;
4533        case BPF_MAP_GET_NEXT_ID:
4534            err = bpf_obj_get_next_id(&attr, uattr, &map_idr, &map_idr_lock);
4535            break;
4536        case BPF_BTF_GET_NEXT_ID:
4537            err = bpf_obj_get_next_id(&attr, uattr, &btf_idr, &btf_idr_lock);
4538            break;
4539        case BPF_PROG_GET_FD_BY_ID:
4540            err = bpf_prog_get_fd_by_id(&attr);
4541            break;
4542        case BPF_MAP_GET_FD_BY_ID:
4543            err = bpf_map_get_fd_by_id(&attr);
4544            break;
4545        case BPF_OBJ_GET_INFO_BY_FD:
4546            err = bpf_obj_get_info_by_fd(&attr, uattr);
4547            break;
4548        case BPF_RAW_TRACEPOINT_OPEN:
4549            err = bpf_raw_tracepoint_open(&attr);
4550            break;
4551        case BPF_BTF_LOAD:
4552            err = bpf_btf_load(&attr);
4553            break;
4554        case BPF_BTF_GET_FD_BY_ID:
4555            err = bpf_btf_get_fd_by_id(&attr);
4556            break;
4557        case BPF_TASK_FD_QUERY:
4558            err = bpf_task_fd_query(&attr, uattr);
4559            break;
4560        case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
4561            err = map_lookup_and_delete_elem(&attr);
4562            break;
4563        case BPF_MAP_LOOKUP_BATCH:
4564            err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH);
4565            break;
4566        case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
4567            err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_AND_DELETE_BATCH);
4568            break;
4569        case BPF_MAP_UPDATE_BATCH:
4570            err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH);
4571            break;
4572        case BPF_MAP_DELETE_BATCH:
4573            err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
4574            break;
4575        case BPF_LINK_CREATE:
4576            err = link_create(&attr);
4577            break;
4578        case BPF_LINK_UPDATE:
4579            err = link_update(&attr);
4580            break;
4581        case BPF_LINK_GET_FD_BY_ID:
4582            err = bpf_link_get_fd_by_id(&attr);
4583            break;
4584        case BPF_LINK_GET_NEXT_ID:
4585            err = bpf_obj_get_next_id(&attr, uattr, &link_idr, &link_idr_lock);
4586            break;
4587        case BPF_ENABLE_STATS:
4588            err = bpf_enable_stats(&attr);
4589            break;
4590        case BPF_ITER_CREATE:
4591            err = bpf_iter_create(&attr);
4592            break;
4593        case BPF_LINK_DETACH:
4594            err = link_detach(&attr);
4595            break;
4596        case BPF_PROG_BIND_MAP:
4597            err = bpf_prog_bind_map(&attr);
4598            break;
4599        default:
4600            err = -EINVAL;
4601            break;
4602    }
4603
4604    return err;
4605}
4606