1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  */
4 #include <linux/bpf.h>
5 #include <linux/bpf_trace.h>
6 #include <linux/bpf_lirc.h>
7 #include <linux/bpf_verifier.h>
8 #include <linux/btf.h>
9 #include <linux/syscalls.h>
10 #include <linux/slab.h>
11 #include <linux/sched/signal.h>
12 #include <linux/vmalloc.h>
13 #include <linux/mmzone.h>
14 #include <linux/anon_inodes.h>
15 #include <linux/fdtable.h>
16 #include <linux/file.h>
17 #include <linux/fs.h>
18 #include <linux/license.h>
19 #include <linux/filter.h>
20 #include <linux/version.h>
21 #include <linux/kernel.h>
22 #include <linux/idr.h>
23 #include <linux/cred.h>
24 #include <linux/timekeeping.h>
25 #include <linux/ctype.h>
26 #include <linux/nospec.h>
27 #include <linux/audit.h>
28 #include <uapi/linux/btf.h>
29 #include <linux/pgtable.h>
30 #include <linux/bpf_lsm.h>
31 #include <linux/poll.h>
32 #include <linux/bpf-netns.h>
33 #include <linux/rcupdate_trace.h>
34 
35 #define IS_FD_ARRAY(map)                                                                                               \
36     ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||               \
37      (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
38 #define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
39 #define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
40 #define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || IS_FD_HASH(map))
41 
42 #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
43 
44 DEFINE_PER_CPU(int, bpf_prog_active);
45 static DEFINE_IDR(prog_idr);
46 static DEFINE_SPINLOCK(prog_idr_lock);
47 static DEFINE_IDR(map_idr);
48 static DEFINE_SPINLOCK(map_idr_lock);
49 static DEFINE_IDR(link_idr);
50 static DEFINE_SPINLOCK(link_idr_lock);
51 
52 int sysctl_unprivileged_bpf_disabled __read_mostly = IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
53 
54 static const struct bpf_map_ops *const bpf_map_types[] = {
55 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
56 #define BPF_MAP_TYPE(_id, _ops) [_id] = &(_ops),
57 #define BPF_LINK_TYPE(_id, _name)
58 #include <linux/bpf_types.h>
59 #undef BPF_PROG_TYPE
60 #undef BPF_MAP_TYPE
61 #undef BPF_LINK_TYPE
62 };
63 
64 /*
65  * If we're handed a bigger struct than we know of, ensure all the unknown bits
66  * are 0 - i.e. new user-space does not rely on any kernel feature extensions
67  * we don't know about yet.
68  *
69  * There is a ToCToU between this function call and the following
70  * copy_from_user() call. However, this is not a concern since this function is
71  * meant to be a future-proofing of bits.
72  */
bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size, size_t actual_size)73 int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size, size_t actual_size)
74 {
75     unsigned char __user *addr = uaddr + expected_size;
76     int res;
77 
78     if (unlikely(actual_size > PAGE_SIZE)) { /* silly large */
79         return -E2BIG;
80     }
81 
82     if (actual_size <= expected_size) {
83         return 0;
84     }
85 
86     res = check_zeroed_user(addr, actual_size - expected_size);
87     if (res < 0) {
88         return res;
89     }
90     return res ? 0 : -E2BIG;
91 }
92 
93 const struct bpf_map_ops bpf_map_offload_ops = {
94     .map_meta_equal = bpf_map_meta_equal,
95     .map_alloc = bpf_map_offload_map_alloc,
96     .map_free = bpf_map_offload_map_free,
97     .map_check_btf = map_check_no_btf,
98 };
99 
find_and_alloc_map(union bpf_attr *attr)100 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
101 {
102     const struct bpf_map_ops *ops;
103     u32 type = attr->map_type;
104     struct bpf_map *map;
105     int err;
106 
107     if (type >= ARRAY_SIZE(bpf_map_types)) {
108         return ERR_PTR(-EINVAL);
109     }
110     type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
111     ops = bpf_map_types[type];
112     if (!ops) {
113         return ERR_PTR(-EINVAL);
114     }
115 
116     if (ops->map_alloc_check) {
117         err = ops->map_alloc_check(attr);
118         if (err) {
119             return ERR_PTR(err);
120         }
121     }
122     if (attr->map_ifindex) {
123         ops = &bpf_map_offload_ops;
124     }
125     map = ops->map_alloc(attr);
126     if (IS_ERR(map)) {
127         return map;
128     }
129     map->ops = ops;
130     map->map_type = type;
131     return map;
132 }
133 
bpf_map_value_size(struct bpf_map *map)134 static u32 bpf_map_value_size(struct bpf_map *map)
135 {
136     if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
137         map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
138         return round_up(map->value_size, 0x8) * num_possible_cpus();
139     } else if (IS_FD_MAP(map)) {
140         return sizeof(u32);
141     } else {
142         return map->value_size;
143     }
144 }
145 
maybe_wait_bpf_programs(struct bpf_map *map)146 static void maybe_wait_bpf_programs(struct bpf_map *map)
147 {
148     /* Wait for any running BPF programs to complete so that
149      * userspace, when we return to it, knows that all programs
150      * that could be running use the new map value.
151      */
152     if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
153         synchronize_rcu();
154     }
155 }
156 
bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, void *value, __u64 flags)157 static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, void *value, __u64 flags)
158 {
159     int err;
160 
161     /* Need to create a kthread, thus must support schedule */
162     if (bpf_map_is_dev_bound(map)) {
163         return bpf_map_offload_update_elem(map, key, value, flags);
164     } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
165         return map->ops->map_update_elem(map, key, value, flags);
166     } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || map->map_type == BPF_MAP_TYPE_SOCKMAP) {
167         return sock_map_update_elem_sys(map, key, value, flags);
168     } else if (IS_FD_PROG_ARRAY(map)) {
169         return bpf_fd_array_map_update_elem(map, f.file, key, value, flags);
170     }
171 
172     bpf_disable_instrumentation();
173     if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
174         err = bpf_percpu_hash_update(map, key, value, flags);
175     } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
176         err = bpf_percpu_array_update(map, key, value, flags);
177     } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
178         err = bpf_percpu_cgroup_storage_update(map, key, value, flags);
179     } else if (IS_FD_ARRAY(map)) {
180         rcu_read_lock();
181         err = bpf_fd_array_map_update_elem(map, f.file, key, value, flags);
182         rcu_read_unlock();
183     } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
184         rcu_read_lock();
185         err = bpf_fd_htab_map_update_elem(map, f.file, key, value, flags);
186         rcu_read_unlock();
187     } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
188         /* rcu_read_lock() is not needed */
189         err = bpf_fd_reuseport_array_update_elem(map, key, value, flags);
190     } else if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) {
191         err = map->ops->map_push_elem(map, value, flags);
192     } else {
193         rcu_read_lock();
194         err = map->ops->map_update_elem(map, key, value, flags);
195         rcu_read_unlock();
196     }
197     bpf_enable_instrumentation();
198     maybe_wait_bpf_programs(map);
199 
200     return err;
201 }
202 
bpf_map_copy_value(struct bpf_map *map, void *key, void *value, __u64 flags)203 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, __u64 flags)
204 {
205     void *ptr;
206     int err;
207 
208     if (bpf_map_is_dev_bound(map)) {
209         return bpf_map_offload_lookup_elem(map, key, value);
210     }
211 
212     bpf_disable_instrumentation();
213     if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
214         err = bpf_percpu_hash_copy(map, key, value);
215     } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
216         err = bpf_percpu_array_copy(map, key, value);
217     } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
218         err = bpf_percpu_cgroup_storage_copy(map, key, value);
219     } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
220         err = bpf_stackmap_copy(map, key, value);
221     } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
222         err = bpf_fd_array_map_lookup_elem(map, key, value);
223     } else if (IS_FD_HASH(map)) {
224         err = bpf_fd_htab_map_lookup_elem(map, key, value);
225     } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
226         err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
227     } else if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) {
228         err = map->ops->map_peek_elem(map, value);
229     } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
230         /* struct_ops map requires directly updating "value" */
231         err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
232     } else {
233         rcu_read_lock();
234         if (map->ops->map_lookup_elem_sys_only) {
235             ptr = map->ops->map_lookup_elem_sys_only(map, key);
236         } else {
237             ptr = map->ops->map_lookup_elem(map, key);
238         }
239         if (IS_ERR(ptr)) {
240             err = PTR_ERR(ptr);
241         } else if (!ptr) {
242             err = -ENOENT;
243         } else {
244             err = 0;
245             if (flags & BPF_F_LOCK) {
246                 /* lock 'ptr' and copy everything but lock */
247                 copy_map_value_locked(map, value, ptr, true);
248             } else {
249                 copy_map_value(map, value, ptr);
250             }
251             /* mask lock, since value wasn't zero inited */
252             check_and_init_map_lock(map, value);
253         }
254         rcu_read_unlock();
255     }
256 
257     bpf_enable_instrumentation();
258     maybe_wait_bpf_programs(map);
259 
260     return err;
261 }
262 
_bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)263 static void *_bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
264 {
265     /* We really just want to fail instead of triggering OOM killer
266      * under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
267      * which is used for lower order allocation requests.
268      *
269      * It has been observed that higher order allocation requests done by
270      * vmalloc with __GFP_NORETRY being set might fail due to not trying
271      * to reclaim memory from the page cache, thus we set
272      * __GFP_RETRY_MAYFAIL to avoid such situations.
273      */
274 
275     const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO;
276     unsigned int flags = 0;
277     unsigned long align = 1;
278     void *area;
279 
280     if (size >= SIZE_MAX) {
281         return NULL;
282     }
283 
284     /* kmalloc()'ed memory can't be mmap()'ed */
285     if (mmapable) {
286         BUG_ON(!PAGE_ALIGNED(size));
287         align = SHMLBA;
288         flags = VM_USERMAP;
289     } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
290         area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, numa_node);
291         if (area != NULL) {
292             return area;
293         }
294     }
295 
296     return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL,
297                                 PAGE_KERNEL, flags, numa_node, __builtin_return_address(0));
298 }
299 
bpf_map_area_alloc(u64 size, int numa_node)300 void *bpf_map_area_alloc(u64 size, int numa_node)
301 {
302     return _bpf_map_area_alloc(size, numa_node, false);
303 }
304 
bpf_map_area_mmapable_alloc(u64 size, int numa_node)305 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
306 {
307     return _bpf_map_area_alloc(size, numa_node, true);
308 }
309 
bpf_map_area_free(void *area)310 void bpf_map_area_free(void *area)
311 {
312     kvfree(area);
313 }
314 
bpf_map_flags_retain_permanent(u32 flags)315 static u32 bpf_map_flags_retain_permanent(u32 flags)
316 {
317     /* Some map creation flags are not tied to the map object but
318      * rather to the map fd instead, so they have no meaning upon
319      * map object inspection since multiple file descriptors with
320      * different (access) properties can exist here. Thus, given
321      * this has zero meaning for the map itself, lets clear these
322      * from here.
323      */
324     return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY);
325 }
326 
bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)327 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
328 {
329     map->map_type = attr->map_type;
330     map->key_size = attr->key_size;
331     map->value_size = attr->value_size;
332     map->max_entries = attr->max_entries;
333     map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
334     map->numa_node = bpf_map_attr_numa_node(attr);
335 }
336 
bpf_charge_memlock(struct user_struct *user, u32 pages)337 static int bpf_charge_memlock(struct user_struct *user, u32 pages)
338 {
339     unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
340     if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
341         atomic_long_sub(pages, &user->locked_vm);
342         return -EPERM;
343     }
344     return 0;
345 }
346 
bpf_uncharge_memlock(struct user_struct *user, u32 pages)347 static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
348 {
349     if (user) {
350         atomic_long_sub(pages, &user->locked_vm);
351     }
352 }
353 
bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)354 int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
355 {
356     u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
357     struct user_struct *user;
358     int ret;
359 
360     if (size >= U32_MAX - PAGE_SIZE) {
361         return -E2BIG;
362     }
363 
364     user = get_current_user();
365     ret = bpf_charge_memlock(user, pages);
366     if (ret) {
367         free_uid(user);
368         return ret;
369     }
370 
371     mem->pages = pages;
372     mem->user = user;
373 
374     return 0;
375 }
376 
bpf_map_charge_finish(struct bpf_map_memory *mem)377 void bpf_map_charge_finish(struct bpf_map_memory *mem)
378 {
379     bpf_uncharge_memlock(mem->user, mem->pages);
380     free_uid(mem->user);
381 }
382 
bpf_map_charge_move(struct bpf_map_memory *dst, struct bpf_map_memory *src)383 void bpf_map_charge_move(struct bpf_map_memory *dst, struct bpf_map_memory *src)
384 {
385     *dst = *src;
386 
387     /* Make sure src will not be used for the redundant uncharging. */
388     memset(src, 0, sizeof(struct bpf_map_memory));
389 }
390 
bpf_map_charge_memlock(struct bpf_map *map, u32 pages)391 int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
392 {
393     int ret;
394 
395     ret = bpf_charge_memlock(map->memory.user, pages);
396     if (ret) {
397         return ret;
398     }
399     map->memory.pages += pages;
400     return ret;
401 }
402 
bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)403 void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
404 {
405     bpf_uncharge_memlock(map->memory.user, pages);
406     map->memory.pages -= pages;
407 }
408 
bpf_map_alloc_id(struct bpf_map *map)409 static int bpf_map_alloc_id(struct bpf_map *map)
410 {
411     int id;
412 
413     idr_preload(GFP_KERNEL);
414     spin_lock_bh(&map_idr_lock);
415     id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
416     if (id > 0) {
417         map->id = id;
418     }
419     spin_unlock_bh(&map_idr_lock);
420     idr_preload_end();
421 
422     if (WARN_ON_ONCE(!id)) {
423         return -ENOSPC;
424     }
425 
426     return id > 0 ? 0 : id;
427 }
428 
bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)429 void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
430 {
431     unsigned long flags;
432 
433     /* Offloaded maps are removed from the IDR store when their device
434      * disappears - even if someone holds an fd to them they are unusable,
435      * the memory is gone, all ops will fail; they are simply waiting for
436      * refcnt to drop to be freed.
437      */
438     if (!map->id) {
439         return;
440     }
441 
442     if (do_idr_lock) {
443         spin_lock_irqsave(&map_idr_lock, flags);
444     } else {
445         __acquire(&map_idr_lock);
446     }
447 
448     idr_remove(&map_idr, map->id);
449     map->id = 0;
450 
451     if (do_idr_lock) {
452         spin_unlock_irqrestore(&map_idr_lock, flags);
453     } else {
454         __release(&map_idr_lock);
455     }
456 }
457 
458 /* called from workqueue */
bpf_map_free_deferred(struct work_struct *work)459 static void bpf_map_free_deferred(struct work_struct *work)
460 {
461     struct bpf_map *map = container_of(work, struct bpf_map, work);
462     struct bpf_map_memory mem;
463 
464     bpf_map_charge_move(&mem, &map->memory);
465     security_bpf_map_free(map);
466     /* implementation dependent freeing */
467     map->ops->map_free(map);
468     bpf_map_charge_finish(&mem);
469 }
470 
bpf_map_put_uref(struct bpf_map *map)471 static void bpf_map_put_uref(struct bpf_map *map)
472 {
473     if (atomic64_dec_and_test(&map->usercnt)) {
474         if (map->ops->map_release_uref) {
475             map->ops->map_release_uref(map);
476         }
477     }
478 }
479 
480 /* decrement map refcnt and schedule it for freeing via workqueue
481  * (unrelying map implementation ops->map_free() might sleep)
482  */
_bpf_map_put(struct bpf_map *map, bool do_idr_lock)483 static void _bpf_map_put(struct bpf_map *map, bool do_idr_lock)
484 {
485     if (atomic64_dec_and_test(&map->refcnt)) {
486         /* bpf_map_free_id() must be called first */
487         bpf_map_free_id(map, do_idr_lock);
488         btf_put(map->btf);
489         INIT_WORK(&map->work, bpf_map_free_deferred);
490         schedule_work(&map->work);
491     }
492 }
493 
bpf_map_put(struct bpf_map *map)494 void bpf_map_put(struct bpf_map *map)
495 {
496     _bpf_map_put(map, true);
497 }
498 EXPORT_SYMBOL_GPL(bpf_map_put);
499 
bpf_map_put_with_uref(struct bpf_map *map)500 void bpf_map_put_with_uref(struct bpf_map *map)
501 {
502     bpf_map_put_uref(map);
503     bpf_map_put(map);
504 }
505 
bpf_map_release(struct inode *inode, struct file *filp)506 static int bpf_map_release(struct inode *inode, struct file *filp)
507 {
508     struct bpf_map *map = filp->private_data;
509 
510     if (map->ops->map_release) {
511         map->ops->map_release(map, filp);
512     }
513 
514     bpf_map_put_with_uref(map);
515     return 0;
516 }
517 
map_get_sys_perms(struct bpf_map *map, struct fd f)518 static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f)
519 {
520     fmode_t mode = f.file->f_mode;
521 
522     /* Our file permissions may have been overridden by global
523      * map permissions facing syscall side.
524      */
525     if (READ_ONCE(map->frozen)) {
526         mode &= ~FMODE_CAN_WRITE;
527     }
528     return mode;
529 }
530 
531 #ifdef CONFIG_PROC_FS
bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)532 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
533 {
534     const struct bpf_map *map = filp->private_data;
535     const struct bpf_array *array;
536     u32 type = 0, jited = 0;
537 
538     if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
539         array = container_of(map, struct bpf_array, map);
540         spin_lock(&array->aux->owner.lock);
541         type = array->aux->owner.type;
542         jited = array->aux->owner.jited;
543         spin_unlock(&array->aux->owner.lock);
544     }
545 
546     seq_printf(m,
547                "map_type:\t%u\n"
548                "key_size:\t%u\n"
549                "value_size:\t%u\n"
550                "max_entries:\t%u\n"
551                "map_flags:\t%#x\n"
552                "memlock:\t%llu\n"
553                "map_id:\t%u\n"
554                "frozen:\t%u\n",
555                map->map_type, map->key_size, map->value_size, map->max_entries, map->map_flags,
556                map->memory.pages * 1ULL << PAGE_SHIFT, map->id, READ_ONCE(map->frozen));
557     if (type) {
558         seq_printf(m, "owner_prog_type:\t%u\n", type);
559         seq_printf(m, "owner_jited:\t%u\n", jited);
560     }
561 }
562 #endif
563 
bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)564 static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
565 {
566     /* We need this handler such that alloc_file() enables
567      * f_mode with FMODE_CAN_READ.
568      */
569     return -EINVAL;
570 }
571 
bpf_dummy_write(struct file *filp, const char __user *buf, size_t siz, loff_t *ppos)572 static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, size_t siz, loff_t *ppos)
573 {
574     /* We need this handler such that alloc_file() enables
575      * f_mode with FMODE_CAN_WRITE.
576      */
577     return -EINVAL;
578 }
579 
580 /* called for any extra memory-mapped regions (except initial) */
bpf_map_mmap_open(struct vm_area_struct *vma)581 static void bpf_map_mmap_open(struct vm_area_struct *vma)
582 {
583     struct bpf_map *map = vma->vm_file->private_data;
584 
585     if (vma->vm_flags & VM_MAYWRITE) {
586         mutex_lock(&map->freeze_mutex);
587         map->writecnt++;
588         mutex_unlock(&map->freeze_mutex);
589     }
590 }
591 
592 /* called for all unmapped memory region (including initial) */
bpf_map_mmap_close(struct vm_area_struct *vma)593 static void bpf_map_mmap_close(struct vm_area_struct *vma)
594 {
595     struct bpf_map *map = vma->vm_file->private_data;
596 
597     if (vma->vm_flags & VM_MAYWRITE) {
598         mutex_lock(&map->freeze_mutex);
599         map->writecnt--;
600         mutex_unlock(&map->freeze_mutex);
601     }
602 }
603 
604 static const struct vm_operations_struct bpf_map_default_vmops = {
605     .open = bpf_map_mmap_open,
606     .close = bpf_map_mmap_close,
607 };
608 
bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)609 static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
610 {
611     struct bpf_map *map = filp->private_data;
612     int err;
613 
614     if (!map->ops->map_mmap || map_value_has_spin_lock(map)) {
615         return -ENOTSUPP;
616     }
617 
618     if (!(vma->vm_flags & VM_SHARED)) {
619         return -EINVAL;
620     }
621 
622     mutex_lock(&map->freeze_mutex);
623 
624     if (vma->vm_flags & VM_WRITE) {
625         if (map->frozen) {
626             err = -EPERM;
627             goto out;
628         }
629         /* map is meant to be read-only, so do not allow mapping as
630          * writable, because it's possible to leak a writable page
631          * reference and allows user-space to still modify it after
632          * freezing, while verifier will assume contents do not change
633          */
634         if (map->map_flags & BPF_F_RDONLY_PROG) {
635             err = -EACCES;
636             goto out;
637         }
638     }
639 
640     /* set default open/close callbacks */
641     vma->vm_ops = &bpf_map_default_vmops;
642     vma->vm_private_data = map;
643     vma->vm_flags &= ~VM_MAYEXEC;
644     if (!(vma->vm_flags & VM_WRITE)) {
645         /* disallow re-mapping with PROT_WRITE */
646         vma->vm_flags &= ~VM_MAYWRITE;
647     }
648 
649     err = map->ops->map_mmap(map, vma);
650     if (err) {
651         goto out;
652     }
653 
654     if (vma->vm_flags & VM_MAYWRITE) {
655         map->writecnt++;
656     }
657 out:
658     mutex_unlock(&map->freeze_mutex);
659     return err;
660 }
661 
bpf_map_poll(struct file *filp, struct poll_table_struct *pts)662 static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts)
663 {
664     struct bpf_map *map = filp->private_data;
665 
666     if (map->ops->map_poll) {
667         return map->ops->map_poll(map, filp, pts);
668     }
669 
670     return EPOLLERR;
671 }
672 
673 const struct file_operations bpf_map_fops = {
674 #ifdef CONFIG_PROC_FS
675     .show_fdinfo = bpf_map_show_fdinfo,
676 #endif
677     .release = bpf_map_release,
678     .read = bpf_dummy_read,
679     .write = bpf_dummy_write,
680     .mmap = bpf_map_mmap,
681     .poll = bpf_map_poll,
682 };
683 
bpf_map_new_fd(struct bpf_map *map, int flags)684 int bpf_map_new_fd(struct bpf_map *map, int flags)
685 {
686     int ret;
687 
688     ret = security_bpf_map(map, OPEN_FMODE(flags));
689     if (ret < 0) {
690         return ret;
691     }
692 
693     return anon_inode_getfd("bpf-map", &bpf_map_fops, map, flags | O_CLOEXEC);
694 }
695 
bpf_get_file_flag(int flags)696 int bpf_get_file_flag(int flags)
697 {
698     if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) {
699         return -EINVAL;
700     }
701     if (flags & BPF_F_RDONLY) {
702         return O_RDONLY;
703     }
704     if (flags & BPF_F_WRONLY) {
705         return O_WRONLY;
706     }
707     return O_RDWR;
708 }
709 
710 /* helper macro to check that unused fields 'union bpf_attr' are zero */
711 #define CHECK_ATTR(CMD)                                                                                                \
712     memchr_inv((void *)&attr->CMD##_LAST_FIELD + sizeof(attr->CMD##_LAST_FIELD), 0,                                    \
713                sizeof(*attr) - offsetof(union bpf_attr, CMD##_LAST_FIELD) - sizeof(attr->CMD##_LAST_FIELD)) != NULL
714 
715 /* dst and src must have at least "size" number of bytes.
716  * Return strlen on success and < 0 on error.
717  */
bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)718 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size)
719 {
720     const char *end = src + size;
721     const char *orig_src = src;
722 
723     memset(dst, 0, size);
724     /* Copy all isalnum(), '_' and '.' chars. */
725     while (src < end && *src) {
726         if (!isalnum(*src) && *src != '_' && *src != '.') {
727             return -EINVAL;
728         }
729         *dst++ = *src++;
730     }
731 
732     /* No '\0' found in "size" number of bytes */
733     if (src == end) {
734         return -EINVAL;
735     }
736 
737     return src - orig_src;
738 }
739 
map_check_no_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type)740 int map_check_no_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type,
741                      const struct btf_type *value_type)
742 {
743     return -ENOTSUPP;
744 }
745 
map_check_btf(struct bpf_map *map, const struct btf *btf, u32 btf_key_id, u32 btf_value_id)746 static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 btf_key_id, u32 btf_value_id)
747 {
748     const struct btf_type *key_type, *value_type;
749     u32 key_size, value_size;
750     int ret = 0;
751 
752     /* Some maps allow key to be unspecified. */
753     if (btf_key_id) {
754         key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
755         if (!key_type || key_size != map->key_size) {
756             return -EINVAL;
757         }
758     } else {
759         key_type = btf_type_by_id(btf, 0);
760         if (!map->ops->map_check_btf) {
761             return -EINVAL;
762         }
763     }
764 
765     value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
766     if (!value_type || value_size != map->value_size) {
767         return -EINVAL;
768     }
769 
770     map->spin_lock_off = btf_find_spin_lock(btf, value_type);
771 
772     if (map_value_has_spin_lock(map)) {
773         if (map->map_flags & BPF_F_RDONLY_PROG) {
774             return -EACCES;
775         }
776         if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_ARRAY &&
777             map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
778             map->map_type != BPF_MAP_TYPE_INODE_STORAGE) {
779             return -ENOTSUPP;
780         }
781         if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > map->value_size) {
782             WARN_ONCE(1, "verifier bug spin_lock_off %d value_size %d\n", map->spin_lock_off, map->value_size);
783             return -EFAULT;
784         }
785     }
786 
787     if (map->ops->map_check_btf) {
788         ret = map->ops->map_check_btf(map, btf, key_type, value_type);
789     }
790 
791     return ret;
792 }
793 
794 #define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id
795 /* called via syscall */
map_create(union bpf_attr *attr)796 static int map_create(union bpf_attr *attr)
797 {
798     int numa_node = bpf_map_attr_numa_node(attr);
799     struct bpf_map_memory mem;
800     struct bpf_map *map;
801     int f_flags;
802     int err;
803 
804     err = CHECK_ATTR(BPF_MAP_CREATE);
805     if (err) {
806         return -EINVAL;
807     }
808 
809     if (attr->btf_vmlinux_value_type_id) {
810         if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || attr->btf_key_type_id || attr->btf_value_type_id) {
811             return -EINVAL;
812         }
813     } else if (attr->btf_key_type_id && !attr->btf_value_type_id) {
814         return -EINVAL;
815     }
816 
817     f_flags = bpf_get_file_flag(attr->map_flags);
818     if (f_flags < 0) {
819         return f_flags;
820     }
821 
822     if (numa_node != NUMA_NO_NODE && ((unsigned int)numa_node >= nr_node_ids || !node_online(numa_node))) {
823         return -EINVAL;
824     }
825 
826     /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
827     map = find_and_alloc_map(attr);
828     if (IS_ERR(map)) {
829         return PTR_ERR(map);
830     }
831 
832     err = bpf_obj_name_cpy(map->name, attr->map_name, sizeof(attr->map_name));
833     if (err < 0) {
834         goto free_map;
835     }
836 
837     atomic64_set(&map->refcnt, 1);
838     atomic64_set(&map->usercnt, 1);
839     mutex_init(&map->freeze_mutex);
840 
841     map->spin_lock_off = -EINVAL;
842     if (attr->btf_key_type_id || attr->btf_value_type_id ||
843         /* Even the map's value is a kernel's struct,
844          * the bpf_prog.o must have BTF to begin with
845          * to figure out the corresponding kernel's
846          * counter part.  Thus, attr->btf_fd has
847          * to be valid also.
848          */
849         attr->btf_vmlinux_value_type_id) {
850         struct btf *btf;
851 
852         btf = btf_get_by_fd(attr->btf_fd);
853         if (IS_ERR(btf)) {
854             err = PTR_ERR(btf);
855             goto free_map;
856         }
857         map->btf = btf;
858 
859         if (attr->btf_value_type_id) {
860             err = map_check_btf(map, btf, attr->btf_key_type_id, attr->btf_value_type_id);
861             if (err) {
862                 goto free_map;
863             }
864         }
865 
866         map->btf_key_type_id = attr->btf_key_type_id;
867         map->btf_value_type_id = attr->btf_value_type_id;
868         map->btf_vmlinux_value_type_id = attr->btf_vmlinux_value_type_id;
869     }
870 
871     err = security_bpf_map_alloc(map);
872     if (err) {
873         goto free_map;
874     }
875 
876     err = bpf_map_alloc_id(map);
877     if (err) {
878         goto free_map_sec;
879     }
880 
881     err = bpf_map_new_fd(map, f_flags);
882     if (err < 0) {
883         /* failed to allocate fd.
884          * bpf_map_put_with_uref() is needed because the above
885          * bpf_map_alloc_id() has published the map
886          * to the userspace and the userspace may
887          * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
888          */
889         bpf_map_put_with_uref(map);
890         return err;
891     }
892 
893     return err;
894 
895 free_map_sec:
896     security_bpf_map_free(map);
897 free_map:
898     btf_put(map->btf);
899     bpf_map_charge_move(&mem, &map->memory);
900     map->ops->map_free(map);
901     bpf_map_charge_finish(&mem);
902     return err;
903 }
904 
905 /* if error is returned, fd is released.
906  * On success caller should complete fd access with matching fdput()
907  */
__bpf_map_get(struct fd f)908 struct bpf_map *__bpf_map_get(struct fd f)
909 {
910     if (!f.file) {
911         return ERR_PTR(-EBADF);
912     }
913     if (f.file->f_op != &bpf_map_fops) {
914         fdput(f);
915         return ERR_PTR(-EINVAL);
916     }
917 
918     return f.file->private_data;
919 }
920 
bpf_map_inc(struct bpf_map *map)921 void bpf_map_inc(struct bpf_map *map)
922 {
923     atomic64_inc(&map->refcnt);
924 }
925 EXPORT_SYMBOL_GPL(bpf_map_inc);
926 
bpf_map_inc_with_uref(struct bpf_map *map)927 void bpf_map_inc_with_uref(struct bpf_map *map)
928 {
929     atomic64_inc(&map->refcnt);
930     atomic64_inc(&map->usercnt);
931 }
932 EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
933 
bpf_map_get(u32 ufd)934 struct bpf_map *bpf_map_get(u32 ufd)
935 {
936     struct fd f = fdget(ufd);
937     struct bpf_map *map;
938 
939     map = __bpf_map_get(f);
940     if (IS_ERR(map)) {
941         return map;
942     }
943 
944     bpf_map_inc(map);
945     fdput(f);
946 
947     return map;
948 }
949 
bpf_map_get_with_uref(u32 ufd)950 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
951 {
952     struct fd f = fdget(ufd);
953     struct bpf_map *map;
954 
955     map = __bpf_map_get(f);
956     if (IS_ERR(map)) {
957         return map;
958     }
959 
960     bpf_map_inc_with_uref(map);
961     fdput(f);
962 
963     return map;
964 }
965 
966 /* map_idr_lock should have been held */
_bpf_map_inc_not_zero(struct bpf_map *map, bool uref)967 static struct bpf_map *_bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
968 {
969     int refold;
970 
971     refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
972     if (!refold) {
973         return ERR_PTR(-ENOENT);
974     }
975     if (uref) {
976         atomic64_inc(&map->usercnt);
977     }
978 
979     return map;
980 }
981 
bpf_map_inc_not_zero(struct bpf_map *map)982 struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
983 {
984     spin_lock_bh(&map_idr_lock);
985     map = _bpf_map_inc_not_zero(map, false);
986     spin_unlock_bh(&map_idr_lock);
987 
988     return map;
989 }
990 EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
991 
bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)992 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
993 {
994     return -ENOTSUPP;
995 }
996 
__bpf_copy_key(void __user *ukey, u64 key_size)997 static void *__bpf_copy_key(void __user *ukey, u64 key_size)
998 {
999     if (key_size) {
1000         return memdup_user(ukey, key_size);
1001     }
1002 
1003     if (ukey) {
1004         return ERR_PTR(-EINVAL);
1005     }
1006 
1007     return NULL;
1008 }
1009 
1010 /* last field in 'union bpf_attr' used by this command */
1011 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
1012 
map_lookup_elem(union bpf_attr *attr)1013 static int map_lookup_elem(union bpf_attr *attr)
1014 {
1015     void __user *ukey = u64_to_user_ptr(attr->key);
1016     void __user *uvalue = u64_to_user_ptr(attr->value);
1017     int ufd = attr->map_fd;
1018     struct bpf_map *map;
1019     void *key, *value;
1020     u32 value_size;
1021     struct fd f;
1022     int err;
1023 
1024     if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) {
1025         return -EINVAL;
1026     }
1027 
1028     if (attr->flags & ~BPF_F_LOCK) {
1029         return -EINVAL;
1030     }
1031 
1032     f = fdget(ufd);
1033     map = __bpf_map_get(f);
1034     if (IS_ERR(map)) {
1035         return PTR_ERR(map);
1036     }
1037     if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
1038         err = -EPERM;
1039         goto err_put;
1040     }
1041 
1042     if ((attr->flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) {
1043         err = -EINVAL;
1044         goto err_put;
1045     }
1046 
1047     key = __bpf_copy_key(ukey, map->key_size);
1048     if (IS_ERR(key)) {
1049         err = PTR_ERR(key);
1050         goto err_put;
1051     }
1052 
1053     value_size = bpf_map_value_size(map);
1054 
1055     err = -ENOMEM;
1056     value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1057     if (!value) {
1058         goto free_key;
1059     }
1060 
1061     err = bpf_map_copy_value(map, key, value, attr->flags);
1062     if (err) {
1063         goto free_value;
1064     }
1065 
1066     err = -EFAULT;
1067     if (copy_to_user(uvalue, value, value_size) != 0) {
1068         goto free_value;
1069     }
1070 
1071     err = 0;
1072 
1073 free_value:
1074     kfree(value);
1075 free_key:
1076     kfree(key);
1077 err_put:
1078     fdput(f);
1079     return err;
1080 }
1081 
1082 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
1083 
map_update_elem(union bpf_attr *attr)1084 static int map_update_elem(union bpf_attr *attr)
1085 {
1086     void __user *ukey = u64_to_user_ptr(attr->key);
1087     void __user *uvalue = u64_to_user_ptr(attr->value);
1088     int ufd = attr->map_fd;
1089     struct bpf_map *map;
1090     void *key, *value;
1091     u32 value_size;
1092     struct fd f;
1093     int err;
1094 
1095     if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) {
1096         return -EINVAL;
1097     }
1098 
1099     f = fdget(ufd);
1100     map = __bpf_map_get(f);
1101     if (IS_ERR(map)) {
1102         return PTR_ERR(map);
1103     }
1104     if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1105         err = -EPERM;
1106         goto err_put;
1107     }
1108 
1109     if ((attr->flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) {
1110         err = -EINVAL;
1111         goto err_put;
1112     }
1113 
1114     key = __bpf_copy_key(ukey, map->key_size);
1115     if (IS_ERR(key)) {
1116         err = PTR_ERR(key);
1117         goto err_put;
1118     }
1119 
1120     if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
1121         map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
1122         value_size = round_up(map->value_size, 0x8) * num_possible_cpus();
1123     } else {
1124         value_size = map->value_size;
1125     }
1126 
1127     err = -ENOMEM;
1128     value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1129     if (!value) {
1130         goto free_key;
1131     }
1132 
1133     err = -EFAULT;
1134     if (copy_from_user(value, uvalue, value_size) != 0) {
1135         goto free_value;
1136     }
1137 
1138     err = bpf_map_update_value(map, f, key, value, attr->flags);
1139 
1140 free_value:
1141     kfree(value);
1142 free_key:
1143     kfree(key);
1144 err_put:
1145     fdput(f);
1146     return err;
1147 }
1148 
1149 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
1150 
map_delete_elem(union bpf_attr *attr)1151 static int map_delete_elem(union bpf_attr *attr)
1152 {
1153     void __user *ukey = u64_to_user_ptr(attr->key);
1154     int ufd = attr->map_fd;
1155     struct bpf_map *map;
1156     struct fd f;
1157     void *key;
1158     int err;
1159 
1160     if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) {
1161         return -EINVAL;
1162     }
1163 
1164     f = fdget(ufd);
1165     map = __bpf_map_get(f);
1166     if (IS_ERR(map)) {
1167         return PTR_ERR(map);
1168     }
1169     if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1170         err = -EPERM;
1171         goto err_put;
1172     }
1173 
1174     key = __bpf_copy_key(ukey, map->key_size);
1175     if (IS_ERR(key)) {
1176         err = PTR_ERR(key);
1177         goto err_put;
1178     }
1179 
1180     if (bpf_map_is_dev_bound(map)) {
1181         err = bpf_map_offload_delete_elem(map, key);
1182         goto out;
1183     } else if (IS_FD_PROG_ARRAY(map) || map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1184         /* These maps require sleepable context */
1185         err = map->ops->map_delete_elem(map, key);
1186         goto out;
1187     }
1188 
1189     bpf_disable_instrumentation();
1190     rcu_read_lock();
1191     err = map->ops->map_delete_elem(map, key);
1192     rcu_read_unlock();
1193     bpf_enable_instrumentation();
1194     maybe_wait_bpf_programs(map);
1195 out:
1196     kfree(key);
1197 err_put:
1198     fdput(f);
1199     return err;
1200 }
1201 
1202 /* last field in 'union bpf_attr' used by this command */
1203 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
1204 
map_get_next_key(union bpf_attr *attr)1205 static int map_get_next_key(union bpf_attr *attr)
1206 {
1207     void __user *ukey = u64_to_user_ptr(attr->key);
1208     void __user *unext_key = u64_to_user_ptr(attr->next_key);
1209     int ufd = attr->map_fd;
1210     struct bpf_map *map;
1211     void *key, *next_key;
1212     struct fd f;
1213     int err;
1214 
1215     if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) {
1216         return -EINVAL;
1217     }
1218 
1219     f = fdget(ufd);
1220     map = __bpf_map_get(f);
1221     if (IS_ERR(map)) {
1222         return PTR_ERR(map);
1223     }
1224     if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
1225         err = -EPERM;
1226         goto err_put;
1227     }
1228 
1229     if (ukey) {
1230         key = __bpf_copy_key(ukey, map->key_size);
1231         if (IS_ERR(key)) {
1232             err = PTR_ERR(key);
1233             goto err_put;
1234         }
1235     } else {
1236         key = NULL;
1237     }
1238 
1239     err = -ENOMEM;
1240     next_key = kmalloc(map->key_size, GFP_USER);
1241     if (!next_key) {
1242         goto free_key;
1243     }
1244 
1245     if (bpf_map_is_dev_bound(map)) {
1246         err = bpf_map_offload_get_next_key(map, key, next_key);
1247         goto out;
1248     }
1249 
1250     rcu_read_lock();
1251     err = map->ops->map_get_next_key(map, key, next_key);
1252     rcu_read_unlock();
1253 out:
1254     if (err) {
1255         goto free_next_key;
1256     }
1257 
1258     err = -EFAULT;
1259     if (copy_to_user(unext_key, next_key, map->key_size) != 0) {
1260         goto free_next_key;
1261     }
1262 
1263     err = 0;
1264 
1265 free_next_key:
1266     kfree(next_key);
1267 free_key:
1268     kfree(key);
1269 err_put:
1270     fdput(f);
1271     return err;
1272 }
1273 
generic_map_delete_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr)1274 int generic_map_delete_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr)
1275 {
1276     void __user *keys = u64_to_user_ptr(attr->batch.keys);
1277     u32 cp, max_count;
1278     int err = 0;
1279     void *key;
1280 
1281     if (attr->batch.elem_flags & ~BPF_F_LOCK) {
1282         return -EINVAL;
1283     }
1284 
1285     if ((attr->batch.elem_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) {
1286         return -EINVAL;
1287     }
1288 
1289     max_count = attr->batch.count;
1290     if (!max_count) {
1291         return 0;
1292     }
1293 
1294     key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1295     if (!key) {
1296         return -ENOMEM;
1297     }
1298 
1299     for (cp = 0; cp < max_count; cp++) {
1300         err = -EFAULT;
1301         if (copy_from_user(key, keys + cp * map->key_size, map->key_size)) {
1302             break;
1303         }
1304 
1305         if (bpf_map_is_dev_bound(map)) {
1306             err = bpf_map_offload_delete_elem(map, key);
1307             break;
1308         }
1309 
1310         bpf_disable_instrumentation();
1311         rcu_read_lock();
1312         err = map->ops->map_delete_elem(map, key);
1313         rcu_read_unlock();
1314         bpf_enable_instrumentation();
1315         maybe_wait_bpf_programs(map);
1316         if (err) {
1317             break;
1318         }
1319     }
1320     if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) {
1321         err = -EFAULT;
1322     }
1323 
1324     kfree(key);
1325     return err;
1326 }
1327 
generic_map_update_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr)1328 int generic_map_update_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr)
1329 {
1330     void __user *values = u64_to_user_ptr(attr->batch.values);
1331     void __user *keys = u64_to_user_ptr(attr->batch.keys);
1332     u32 value_size, cp, max_count;
1333     int ufd = attr->batch.map_fd;
1334     void *key, *value;
1335     struct fd f;
1336     int err = 0;
1337 
1338     if (attr->batch.elem_flags & ~BPF_F_LOCK) {
1339         return -EINVAL;
1340     }
1341 
1342     if ((attr->batch.elem_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) {
1343         return -EINVAL;
1344     }
1345 
1346     value_size = bpf_map_value_size(map);
1347 
1348     max_count = attr->batch.count;
1349     if (!max_count) {
1350         return 0;
1351     }
1352 
1353     key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1354     if (!key) {
1355         return -ENOMEM;
1356     }
1357 
1358     value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1359     if (!value) {
1360         kfree(key);
1361         return -ENOMEM;
1362     }
1363 
1364     f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */
1365     for (cp = 0; cp < max_count; cp++) {
1366         err = -EFAULT;
1367         if (copy_from_user(key, keys + cp * map->key_size, map->key_size) ||
1368             copy_from_user(value, values + cp * value_size, value_size)) {
1369             break;
1370         }
1371 
1372         err = bpf_map_update_value(map, f, key, value, attr->batch.elem_flags);
1373 
1374         if (err) {
1375             break;
1376         cond_resched();
1377         }
1378     }
1379 
1380     if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) {
1381         err = -EFAULT;
1382     }
1383 
1384     kfree(value);
1385     kfree(key);
1386     fdput(f);
1387     return err;
1388 }
1389 
1390 #define MAP_LOOKUP_RETRIES 3
1391 
generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr)1392 int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr)
1393 {
1394     void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
1395     void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
1396     void __user *values = u64_to_user_ptr(attr->batch.values);
1397     void __user *keys = u64_to_user_ptr(attr->batch.keys);
1398     void *buf, *buf_prevkey, *prev_key, *key, *value;
1399     int err, retry = MAP_LOOKUP_RETRIES;
1400     u32 value_size, cp, max_count;
1401 
1402     if (attr->batch.elem_flags & ~BPF_F_LOCK) {
1403         return -EINVAL;
1404     }
1405 
1406     if ((attr->batch.elem_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) {
1407         return -EINVAL;
1408     }
1409 
1410     value_size = bpf_map_value_size(map);
1411 
1412     max_count = attr->batch.count;
1413     if (!max_count) {
1414         return 0;
1415     }
1416 
1417     if (put_user(0, &uattr->batch.count)) {
1418         return -EFAULT;
1419     }
1420 
1421     buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
1422     if (!buf_prevkey) {
1423         return -ENOMEM;
1424     }
1425 
1426     buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
1427     if (!buf) {
1428         kfree(buf_prevkey);
1429         return -ENOMEM;
1430     }
1431 
1432     err = -EFAULT;
1433     prev_key = NULL;
1434     if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) {
1435         goto free_buf;
1436     }
1437     key = buf;
1438     value = key + map->key_size;
1439     if (ubatch) {
1440         prev_key = buf_prevkey;
1441     }
1442 
1443     for (cp = 0; cp < max_count;) {
1444         rcu_read_lock();
1445         err = map->ops->map_get_next_key(map, prev_key, key);
1446         rcu_read_unlock();
1447         if (err) {
1448             break;
1449         }
1450         err = bpf_map_copy_value(map, key, value, attr->batch.elem_flags);
1451 
1452         if (err == -ENOENT) {
1453             if (retry) {
1454                 retry--;
1455                 continue;
1456             }
1457             err = -EINTR;
1458             break;
1459         }
1460 
1461         if (err) {
1462             goto free_buf;
1463         }
1464 
1465         if (copy_to_user(keys + cp * map->key_size, key, map->key_size)) {
1466             err = -EFAULT;
1467             goto free_buf;
1468         }
1469         if (copy_to_user(values + cp * value_size, value, value_size)) {
1470             err = -EFAULT;
1471             goto free_buf;
1472         }
1473 
1474         if (!prev_key) {
1475             prev_key = buf_prevkey;
1476         }
1477 
1478         swap(prev_key, key);
1479         retry = MAP_LOOKUP_RETRIES;
1480         cp++;
1481         cond_resched();
1482     }
1483 
1484     if (err == -EFAULT) {
1485         goto free_buf;
1486     }
1487 
1488     if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
1489          (cp && copy_to_user(uobatch, prev_key, map->key_size)))) {
1490         err = -EFAULT;
1491     }
1492 
1493 free_buf:
1494     kfree(buf_prevkey);
1495     kfree(buf);
1496     return err;
1497 }
1498 
1499 #define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
1500 
map_lookup_and_delete_elem(union bpf_attr *attr)1501 static int map_lookup_and_delete_elem(union bpf_attr *attr)
1502 {
1503     void __user *ukey = u64_to_user_ptr(attr->key);
1504     void __user *uvalue = u64_to_user_ptr(attr->value);
1505     int ufd = attr->map_fd;
1506     struct bpf_map *map;
1507     void *key, *value;
1508     u32 value_size;
1509     struct fd f;
1510     int err;
1511 
1512     if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) {
1513         return -EINVAL;
1514     }
1515 
1516     f = fdget(ufd);
1517     map = __bpf_map_get(f);
1518     if (IS_ERR(map)) {
1519         return PTR_ERR(map);
1520     }
1521     if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
1522         err = -EPERM;
1523         goto err_put;
1524     }
1525 
1526     key = __bpf_copy_key(ukey, map->key_size);
1527     if (IS_ERR(key)) {
1528         err = PTR_ERR(key);
1529         goto err_put;
1530     }
1531 
1532     value_size = map->value_size;
1533 
1534     err = -ENOMEM;
1535     value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
1536     if (!value) {
1537         goto free_key;
1538     }
1539 
1540     if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) {
1541         err = map->ops->map_pop_elem(map, value);
1542     } else {
1543         err = -ENOTSUPP;
1544     }
1545 
1546     if (err) {
1547         goto free_value;
1548     }
1549 
1550     if (copy_to_user(uvalue, value, value_size) != 0) {
1551         err = -EFAULT;
1552         goto free_value;
1553     }
1554 
1555     err = 0;
1556 
1557 free_value:
1558     kfree(value);
1559 free_key:
1560     kfree(key);
1561 err_put:
1562     fdput(f);
1563     return err;
1564 }
1565 
1566 #define BPF_MAP_FREEZE_LAST_FIELD map_fd
1567 
map_freeze(const union bpf_attr *attr)1568 static int map_freeze(const union bpf_attr *attr)
1569 {
1570     int err = 0, ufd = attr->map_fd;
1571     struct bpf_map *map;
1572     struct fd f;
1573 
1574     if (CHECK_ATTR(BPF_MAP_FREEZE)) {
1575         return -EINVAL;
1576     }
1577 
1578     f = fdget(ufd);
1579     map = __bpf_map_get(f);
1580     if (IS_ERR(map)) {
1581         return PTR_ERR(map);
1582     }
1583 
1584     if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
1585         fdput(f);
1586         return -ENOTSUPP;
1587     }
1588 
1589     mutex_lock(&map->freeze_mutex);
1590 
1591     if (map->writecnt) {
1592         err = -EBUSY;
1593         goto err_put;
1594     }
1595     if (READ_ONCE(map->frozen)) {
1596         err = -EBUSY;
1597         goto err_put;
1598     }
1599     if (!bpf_capable()) {
1600         err = -EPERM;
1601         goto err_put;
1602     }
1603 
1604     WRITE_ONCE(map->frozen, true);
1605 err_put:
1606     mutex_unlock(&map->freeze_mutex);
1607     fdput(f);
1608     return err;
1609 }
1610 
1611 static const struct bpf_prog_ops *const bpf_prog_types[] = {
1612 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) [_id] = &_name##_prog_ops,
1613 #define BPF_MAP_TYPE(_id, _ops)
1614 #define BPF_LINK_TYPE(_id, _name)
1615 #include <linux/bpf_types.h>
1616 #undef BPF_PROG_TYPE
1617 #undef BPF_MAP_TYPE
1618 #undef BPF_LINK_TYPE
1619 };
1620 
find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)1621 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
1622 {
1623     const struct bpf_prog_ops *ops;
1624 
1625     if (type >= ARRAY_SIZE(bpf_prog_types)) {
1626         return -EINVAL;
1627     }
1628     type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
1629     ops = bpf_prog_types[type];
1630     if (!ops) {
1631         return -EINVAL;
1632     }
1633 
1634     if (!bpf_prog_is_dev_bound(prog->aux)) {
1635         prog->aux->ops = ops;
1636     } else {
1637         prog->aux->ops = &bpf_offload_prog_ops;
1638     }
1639     prog->type = type;
1640     return 0;
1641 }
1642 
1643 enum bpf_audit {
1644     BPF_AUDIT_LOAD,
1645     BPF_AUDIT_UNLOAD,
1646     BPF_AUDIT_MAX,
1647 };
1648 
1649 static const char *const bpf_audit_str[BPF_AUDIT_MAX] = {
1650     [BPF_AUDIT_LOAD] = "LOAD",
1651     [BPF_AUDIT_UNLOAD] = "UNLOAD",
1652 };
1653 
bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)1654 static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
1655 {
1656     struct audit_context *ctx = NULL;
1657     struct audit_buffer *ab;
1658 
1659     if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) {
1660         return;
1661     }
1662     if (audit_enabled == AUDIT_OFF) {
1663         return;
1664     }
1665     if (op == BPF_AUDIT_LOAD) {
1666         ctx = audit_context();
1667     }
1668     ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
1669     if (unlikely(!ab)) {
1670         return;
1671     }
1672     audit_log_format(ab, "prog-id=%u op=%s", prog->aux->id, bpf_audit_str[op]);
1673     audit_log_end(ab);
1674 }
1675 
__bpf_prog_charge(struct user_struct *user, u32 pages)1676 int __bpf_prog_charge(struct user_struct *user, u32 pages)
1677 {
1678     unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1679     unsigned long user_bufs;
1680 
1681     if (user) {
1682         user_bufs = atomic_long_add_return(pages, &user->locked_vm);
1683         if (user_bufs > memlock_limit) {
1684             atomic_long_sub(pages, &user->locked_vm);
1685             return -EPERM;
1686         }
1687     }
1688 
1689     return 0;
1690 }
1691 
__bpf_prog_uncharge(struct user_struct *user, u32 pages)1692 void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
1693 {
1694     if (user) {
1695         atomic_long_sub(pages, &user->locked_vm);
1696     }
1697 }
1698 
bpf_prog_charge_memlock(struct bpf_prog *prog)1699 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
1700 {
1701     struct user_struct *user = get_current_user();
1702     int ret;
1703 
1704     ret = __bpf_prog_charge(user, prog->pages);
1705     if (ret) {
1706         free_uid(user);
1707         return ret;
1708     }
1709 
1710     prog->aux->user = user;
1711     return 0;
1712 }
1713 
bpf_prog_uncharge_memlock(struct bpf_prog *prog)1714 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
1715 {
1716     struct user_struct *user = prog->aux->user;
1717 
1718     __bpf_prog_uncharge(user, prog->pages);
1719     free_uid(user);
1720 }
1721 
bpf_prog_alloc_id(struct bpf_prog *prog)1722 static int bpf_prog_alloc_id(struct bpf_prog *prog)
1723 {
1724     int id;
1725 
1726     idr_preload(GFP_KERNEL);
1727     spin_lock_bh(&prog_idr_lock);
1728     id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
1729     if (id > 0) {
1730         prog->aux->id = id;
1731     }
1732     spin_unlock_bh(&prog_idr_lock);
1733     idr_preload_end();
1734 
1735     /* id is in [1, INT_MAX) */
1736     if (WARN_ON_ONCE(!id)) {
1737         return -ENOSPC;
1738     }
1739 
1740     return id > 0 ? 0 : id;
1741 }
1742 
bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)1743 void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
1744 {
1745     /* cBPF to eBPF migrations are currently not in the idr store.
1746      * Offloaded programs are removed from the store when their device
1747      * disappears - even if someone grabs an fd to them they are unusable,
1748      * simply waiting for refcnt to drop to be freed.
1749      */
1750     if (!prog->aux->id) {
1751         return;
1752     }
1753 
1754     if (do_idr_lock) {
1755         spin_lock_bh(&prog_idr_lock);
1756     } else {
1757         __acquire(&prog_idr_lock);
1758     }
1759 
1760     idr_remove(&prog_idr, prog->aux->id);
1761     prog->aux->id = 0;
1762 
1763     if (do_idr_lock) {
1764         spin_unlock_bh(&prog_idr_lock);
1765     } else {
1766         __release(&prog_idr_lock);
1767     }
1768 }
1769 
_bpf_prog_put_rcu(struct rcu_head *rcu)1770 static void _bpf_prog_put_rcu(struct rcu_head *rcu)
1771 {
1772     struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
1773 
1774     kvfree(aux->func_info);
1775     kfree(aux->func_info_aux);
1776     bpf_prog_uncharge_memlock(aux->prog);
1777     security_bpf_prog_free(aux);
1778     bpf_prog_free(aux->prog);
1779 }
1780 
_bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)1781 static void _bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
1782 {
1783     bpf_prog_kallsyms_del_all(prog);
1784     btf_put(prog->aux->btf);
1785     bpf_prog_free_linfo(prog);
1786 
1787     if (deferred) {
1788         if (prog->aux->sleepable) {
1789             call_rcu_tasks_trace(&prog->aux->rcu, _bpf_prog_put_rcu);
1790         } else {
1791             call_rcu(&prog->aux->rcu, _bpf_prog_put_rcu);
1792         }
1793     } else {
1794         _bpf_prog_put_rcu(&prog->aux->rcu);
1795     }
1796 }
1797 
_bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)1798 static void _bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
1799 {
1800     if (atomic64_dec_and_test(&prog->aux->refcnt)) {
1801         perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
1802         bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
1803         /* bpf_prog_free_id() must be called first */
1804         bpf_prog_free_id(prog, do_idr_lock);
1805         _bpf_prog_put_noref(prog, true);
1806     }
1807 }
1808 
bpf_prog_put(struct bpf_prog *prog)1809 void bpf_prog_put(struct bpf_prog *prog)
1810 {
1811     _bpf_prog_put(prog, true);
1812 }
1813 EXPORT_SYMBOL_GPL(bpf_prog_put);
1814 
bpf_prog_release(struct inode *inode, struct file *filp)1815 static int bpf_prog_release(struct inode *inode, struct file *filp)
1816 {
1817     struct bpf_prog *prog = filp->private_data;
1818 
1819     bpf_prog_put(prog);
1820     return 0;
1821 }
1822 
bpf_prog_get_stats(const struct bpf_prog *prog, struct bpf_prog_stats *stats)1823 static void bpf_prog_get_stats(const struct bpf_prog *prog, struct bpf_prog_stats *stats)
1824 {
1825     u64 nsecs = 0, cnt = 0;
1826     int cpu;
1827 
1828     for_each_possible_cpu(cpu)
1829     {
1830         const struct bpf_prog_stats *st;
1831         unsigned int start;
1832         u64 tnsecs, tcnt;
1833 
1834         st = per_cpu_ptr(prog->aux->stats, cpu);
1835         do {
1836             start = u64_stats_fetch_begin_irq(&st->syncp);
1837             tnsecs = st->nsecs;
1838             tcnt = st->cnt;
1839         } while (u64_stats_fetch_retry_irq(&st->syncp, start));
1840         nsecs += tnsecs;
1841         cnt += tcnt;
1842     }
1843     stats->nsecs = nsecs;
1844     stats->cnt = cnt;
1845 }
1846 
1847 #ifdef CONFIG_PROC_FS
bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)1848 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
1849 {
1850     const struct bpf_prog *prog = filp->private_data;
1851     char prog_tag[sizeof(prog->tag) * 0x2 + 1] = {};
1852     struct bpf_prog_stats stats;
1853 
1854     bpf_prog_get_stats(prog, &stats);
1855     bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
1856     seq_printf(m,
1857                "prog_type:\t%u\n"
1858                "prog_jited:\t%u\n"
1859                "prog_tag:\t%s\n"
1860                "memlock:\t%llu\n"
1861                "prog_id:\t%u\n"
1862                "run_time_ns:\t%llu\n"
1863                "run_cnt:\t%llu\n",
1864                prog->type, prog->jited, prog_tag, prog->pages * 1ULL << PAGE_SHIFT, prog->aux->id, stats.nsecs,
1865                stats.cnt);
1866 }
1867 #endif
1868 
1869 const struct file_operations bpf_prog_fops = {
1870 #ifdef CONFIG_PROC_FS
1871     .show_fdinfo = bpf_prog_show_fdinfo,
1872 #endif
1873     .release = bpf_prog_release,
1874     .read = bpf_dummy_read,
1875     .write = bpf_dummy_write,
1876 };
1877 
bpf_prog_new_fd(struct bpf_prog *prog)1878 int bpf_prog_new_fd(struct bpf_prog *prog)
1879 {
1880     int ret;
1881 
1882     ret = security_bpf_prog(prog);
1883     if (ret < 0) {
1884         return ret;
1885     }
1886 
1887     return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
1888 }
1889 
i_bpf_prog_get(struct fd f)1890 static struct bpf_prog *i_bpf_prog_get(struct fd f)
1891 {
1892     if (!f.file) {
1893         return ERR_PTR(-EBADF);
1894     }
1895     if (f.file->f_op != &bpf_prog_fops) {
1896         fdput(f);
1897         return ERR_PTR(-EINVAL);
1898     }
1899 
1900     return f.file->private_data;
1901 }
1902 
bpf_prog_add(struct bpf_prog *prog, int i)1903 void bpf_prog_add(struct bpf_prog *prog, int i)
1904 {
1905     atomic64_add(i, &prog->aux->refcnt);
1906 }
1907 EXPORT_SYMBOL_GPL(bpf_prog_add);
1908 
bpf_prog_sub(struct bpf_prog *prog, int i)1909 void bpf_prog_sub(struct bpf_prog *prog, int i)
1910 {
1911     /* Only to be used for undoing previous bpf_prog_add() in some
1912      * error path. We still know that another entity in our call
1913      * path holds a reference to the program, thus atomic_sub() can
1914      * be safely used in such cases!
1915      */
1916     WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
1917 }
1918 EXPORT_SYMBOL_GPL(bpf_prog_sub);
1919 
bpf_prog_inc(struct bpf_prog *prog)1920 void bpf_prog_inc(struct bpf_prog *prog)
1921 {
1922     atomic64_inc(&prog->aux->refcnt);
1923 }
1924 EXPORT_SYMBOL_GPL(bpf_prog_inc);
1925 
1926 /* prog_idr_lock should have been held */
bpf_prog_inc_not_zero(struct bpf_prog *prog)1927 struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
1928 {
1929     int refold;
1930 
1931     refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
1932     if (!refold) {
1933         return ERR_PTR(-ENOENT);
1934     }
1935 
1936     return prog;
1937 }
1938 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
1939 
bpf_prog_get_ok(struct bpf_prog *prog, enum bpf_prog_type *attach_type, bool attach_drv)1940 bool bpf_prog_get_ok(struct bpf_prog *prog, enum bpf_prog_type *attach_type, bool attach_drv)
1941 {
1942     /* not an attachment, just a refcount inc, always allow */
1943     if (!attach_type) {
1944         return true;
1945     }
1946 
1947     if (prog->type != *attach_type) {
1948         return false;
1949     }
1950     if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) {
1951         return false;
1952     }
1953 
1954     return true;
1955 }
1956 
_bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, bool attach_drv)1957 static struct bpf_prog *_bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, bool attach_drv)
1958 {
1959     struct fd f = fdget(ufd);
1960     struct bpf_prog *prog;
1961 
1962     prog = i_bpf_prog_get(f);
1963     if (IS_ERR(prog)) {
1964         return prog;
1965     }
1966     if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
1967         prog = ERR_PTR(-EINVAL);
1968         goto out;
1969     }
1970 
1971     bpf_prog_inc(prog);
1972 out:
1973     fdput(f);
1974     return prog;
1975 }
1976 
bpf_prog_get(u32 ufd)1977 struct bpf_prog *bpf_prog_get(u32 ufd)
1978 {
1979     return _bpf_prog_get(ufd, NULL, false);
1980 }
1981 
bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv)1982 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv)
1983 {
1984     return _bpf_prog_get(ufd, &type, attach_drv);
1985 }
1986 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
1987 
1988 /* Initially all BPF programs could be loaded w/o specifying
1989  * expected_attach_type. Later for some of them specifying expected_attach_type
1990  * at load time became required so that program could be validated properly.
1991  * Programs of types that are allowed to be loaded both w/ and w/o (for
1992  * backward compatibility) expected_attach_type, should have the default attach
1993  * type assigned to expected_attach_type for the latter case, so that it can be
1994  * validated later at attach time.
1995  *
1996  * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if
1997  * prog type requires it but has some attach types that have to be backward
1998  * compatible.
1999  */
bpf_prog_load_fixup_attach_type(union bpf_attr *attr)2000 static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
2001 {
2002     if (attr->prog_type == BPF_PROG_TYPE_CGROUP_SOCK) {
2003         /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't
2004          * exist so checking for non-zero is the way to go here.
2005          */
2006         if (!attr->expected_attach_type) {
2007             attr->expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
2008         }
2009     }
2010 }
2011 
bpf_prog_load_check_attach(enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type, u32 btf_id, u32 prog_fd)2012 static int bpf_prog_load_check_attach(enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type,
2013                                       u32 btf_id, u32 prog_fd)
2014 {
2015     if (btf_id) {
2016         if (btf_id > BTF_MAX_TYPE) {
2017             return -EINVAL;
2018         }
2019 
2020         switch (prog_type) {
2021             case BPF_PROG_TYPE_TRACING:
2022             case BPF_PROG_TYPE_LSM:
2023             case BPF_PROG_TYPE_STRUCT_OPS:
2024             case BPF_PROG_TYPE_EXT:
2025                 break;
2026             default:
2027                 return -EINVAL;
2028         }
2029     }
2030 
2031     if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING && prog_type != BPF_PROG_TYPE_EXT) {
2032         return -EINVAL;
2033     }
2034 
2035     switch (prog_type) {
2036         case BPF_PROG_TYPE_CGROUP_SOCK:
2037             switch (expected_attach_type) {
2038                 case BPF_CGROUP_INET_SOCK_CREATE:
2039                 case BPF_CGROUP_INET_SOCK_RELEASE:
2040                 case BPF_CGROUP_INET4_POST_BIND:
2041                 case BPF_CGROUP_INET6_POST_BIND:
2042                     return 0;
2043                 default:
2044                     return -EINVAL;
2045             }
2046         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2047             switch (expected_attach_type) {
2048                 case BPF_CGROUP_INET4_BIND:
2049                 case BPF_CGROUP_INET6_BIND:
2050                 case BPF_CGROUP_INET4_CONNECT:
2051                 case BPF_CGROUP_INET6_CONNECT:
2052                 case BPF_CGROUP_INET4_GETPEERNAME:
2053                 case BPF_CGROUP_INET6_GETPEERNAME:
2054                 case BPF_CGROUP_INET4_GETSOCKNAME:
2055                 case BPF_CGROUP_INET6_GETSOCKNAME:
2056                 case BPF_CGROUP_UDP4_SENDMSG:
2057                 case BPF_CGROUP_UDP6_SENDMSG:
2058                 case BPF_CGROUP_UDP4_RECVMSG:
2059                 case BPF_CGROUP_UDP6_RECVMSG:
2060                     return 0;
2061                 default:
2062                     return -EINVAL;
2063             }
2064         case BPF_PROG_TYPE_CGROUP_SKB:
2065             switch (expected_attach_type) {
2066                 case BPF_CGROUP_INET_INGRESS:
2067                 case BPF_CGROUP_INET_EGRESS:
2068                     return 0;
2069                 default:
2070                     return -EINVAL;
2071             }
2072         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2073             switch (expected_attach_type) {
2074                 case BPF_CGROUP_SETSOCKOPT:
2075                 case BPF_CGROUP_GETSOCKOPT:
2076                     return 0;
2077                 default:
2078                     return -EINVAL;
2079             }
2080         case BPF_PROG_TYPE_SK_LOOKUP:
2081             if (expected_attach_type == BPF_SK_LOOKUP) {
2082                 return 0;
2083             }
2084             return -EINVAL;
2085         case BPF_PROG_TYPE_EXT:
2086             if (expected_attach_type) {
2087                 return -EINVAL;
2088             }
2089             fallthrough;
2090         default:
2091             return 0;
2092     }
2093 }
2094 
is_net_admin_prog_type(enum bpf_prog_type prog_type)2095 static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
2096 {
2097     switch (prog_type) {
2098         case BPF_PROG_TYPE_SCHED_CLS:
2099         case BPF_PROG_TYPE_SCHED_ACT:
2100         case BPF_PROG_TYPE_XDP:
2101         case BPF_PROG_TYPE_LWT_IN:
2102         case BPF_PROG_TYPE_LWT_OUT:
2103         case BPF_PROG_TYPE_LWT_XMIT:
2104         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2105         case BPF_PROG_TYPE_SK_SKB:
2106         case BPF_PROG_TYPE_SK_MSG:
2107         case BPF_PROG_TYPE_LIRC_MODE2:
2108         case BPF_PROG_TYPE_FLOW_DISSECTOR:
2109         case BPF_PROG_TYPE_CGROUP_DEVICE:
2110         case BPF_PROG_TYPE_CGROUP_SOCK:
2111         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2112         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2113         case BPF_PROG_TYPE_CGROUP_SYSCTL:
2114         case BPF_PROG_TYPE_SOCK_OPS:
2115         case BPF_PROG_TYPE_EXT: /* extends any prog */
2116             return true;
2117         case BPF_PROG_TYPE_CGROUP_SKB:
2118             /* always unpriv */
2119         case BPF_PROG_TYPE_SK_REUSEPORT:
2120             /* equivalent to SOCKET_FILTER. need CAP_BPF only */
2121         default:
2122             return false;
2123     }
2124 }
2125 
is_perfmon_prog_type(enum bpf_prog_type prog_type)2126 static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
2127 {
2128     switch (prog_type) {
2129         case BPF_PROG_TYPE_KPROBE:
2130         case BPF_PROG_TYPE_TRACEPOINT:
2131         case BPF_PROG_TYPE_PERF_EVENT:
2132         case BPF_PROG_TYPE_RAW_TRACEPOINT:
2133         case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2134         case BPF_PROG_TYPE_TRACING:
2135         case BPF_PROG_TYPE_LSM:
2136         case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
2137         case BPF_PROG_TYPE_EXT:        /* extends any prog */
2138             return true;
2139         default:
2140             return false;
2141     }
2142 }
2143 
2144 /* last field in 'union bpf_attr' used by this command */
2145 #define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
2146 
bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)2147 static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
2148 {
2149     enum bpf_prog_type type = attr->prog_type;
2150     struct bpf_prog *prog;
2151     int err;
2152     char license[128];
2153     bool is_gpl;
2154 
2155     if (CHECK_ATTR(BPF_PROG_LOAD)) {
2156         return -EINVAL;
2157     }
2158 
2159     if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT | BPF_F_TEST_STATE_FREQ | BPF_F_SLEEPABLE |
2160                              BPF_F_TEST_RND_HI32)) {
2161         return -EINVAL;
2162     }
2163 
2164     if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
2165         !bpf_capable()) {
2166         return -EPERM;
2167     }
2168 
2169     /* copy eBPF program license from user space */
2170     if (strncpy_from_user(license, u64_to_user_ptr(attr->license), sizeof(license) - 1) < 0) {
2171         return -EFAULT;
2172     }
2173     license[sizeof(license) - 1] = 0;
2174 
2175     /* eBPF programs must be GPL compatible to use GPL-ed functions */
2176     is_gpl = license_is_gpl_compatible(license);
2177 
2178     if (attr->insn_cnt == 0 || attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) {
2179         return -E2BIG;
2180     }
2181     if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && !bpf_capable()) {
2182         return -EPERM;
2183     }
2184 
2185     if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) {
2186         return -EPERM;
2187     }
2188     if (is_perfmon_prog_type(type) && !perfmon_capable()) {
2189         return -EPERM;
2190     }
2191 
2192     bpf_prog_load_fixup_attach_type(attr);
2193     if (bpf_prog_load_check_attach(type, attr->expected_attach_type, attr->attach_btf_id, attr->attach_prog_fd)) {
2194         return -EINVAL;
2195     }
2196 
2197     /* plain bpf_prog allocation */
2198     prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
2199     if (!prog) {
2200         return -ENOMEM;
2201     }
2202 
2203     prog->expected_attach_type = attr->expected_attach_type;
2204     prog->aux->attach_btf_id = attr->attach_btf_id;
2205     if (attr->attach_prog_fd) {
2206         struct bpf_prog *dst_prog;
2207 
2208         dst_prog = bpf_prog_get(attr->attach_prog_fd);
2209         if (IS_ERR(dst_prog)) {
2210             err = PTR_ERR(dst_prog);
2211             goto free_prog_nouncharge;
2212         }
2213         prog->aux->dst_prog = dst_prog;
2214     }
2215 
2216     prog->aux->offload_requested = !!attr->prog_ifindex;
2217     prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
2218 
2219     err = security_bpf_prog_alloc(prog->aux);
2220     if (err) {
2221         goto free_prog_nouncharge;
2222     }
2223 
2224     err = bpf_prog_charge_memlock(prog);
2225     if (err) {
2226         goto free_prog_sec;
2227     }
2228 
2229     prog->len = attr->insn_cnt;
2230 
2231     err = -EFAULT;
2232     if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), bpf_prog_insn_size(prog)) != 0) {
2233         goto free_prog;
2234     }
2235 
2236     prog->orig_prog = NULL;
2237     prog->jited = 0;
2238 
2239     atomic64_set(&prog->aux->refcnt, 1);
2240     prog->gpl_compatible = is_gpl ? 1 : 0;
2241 
2242     if (bpf_prog_is_dev_bound(prog->aux)) {
2243         err = bpf_prog_offload_init(prog, attr);
2244         if (err) {
2245             goto free_prog;
2246         }
2247     }
2248 
2249     /* find program type: socket_filter vs tracing_filter */
2250     err = find_prog_type(type, prog);
2251     if (err < 0) {
2252         goto free_prog;
2253     }
2254 
2255     prog->aux->load_time = ktime_get_boottime_ns();
2256     err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, sizeof(attr->prog_name));
2257     if (err < 0) {
2258         goto free_prog;
2259     }
2260 
2261     /* run eBPF verifier */
2262     err = bpf_check(&prog, attr, uattr);
2263     if (err < 0) {
2264         goto free_used_maps;
2265     }
2266 
2267     prog = bpf_prog_select_runtime(prog, &err);
2268     if (err < 0) {
2269         goto free_used_maps;
2270     }
2271 
2272     err = bpf_prog_alloc_id(prog);
2273     if (err) {
2274         goto free_used_maps;
2275     }
2276 
2277     /* Upon success of bpf_prog_alloc_id(), the BPF prog is
2278      * effectively publicly exposed. However, retrieving via
2279      * bpf_prog_get_fd_by_id() will take another reference,
2280      * therefore it cannot be gone underneath us.
2281      *
2282      * Only for the time /after/ successful bpf_prog_new_fd()
2283      * and before returning to userspace, we might just hold
2284      * one reference and any parallel close on that fd could
2285      * rip everything out. Hence, below notifications must
2286      * happen before bpf_prog_new_fd().
2287      *
2288      * Also, any failure handling from this point onwards must
2289      * be using bpf_prog_put() given the program is exposed.
2290      */
2291     bpf_prog_kallsyms_add(prog);
2292     perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
2293     bpf_audit_prog(prog, BPF_AUDIT_LOAD);
2294 
2295     err = bpf_prog_new_fd(prog);
2296     if (err < 0) {
2297         bpf_prog_put(prog);
2298     }
2299     return err;
2300 
2301 free_used_maps:
2302     /* In case we have subprogs, we need to wait for a grace
2303      * period before we can tear down JIT memory since symbols
2304      * are already exposed under kallsyms.
2305      */
2306     _bpf_prog_put_noref(prog, prog->aux->func_cnt);
2307     return err;
2308 free_prog:
2309     bpf_prog_uncharge_memlock(prog);
2310 free_prog_sec:
2311     security_bpf_prog_free(prog->aux);
2312 free_prog_nouncharge:
2313     bpf_prog_free(prog);
2314     return err;
2315 }
2316 
2317 #define BPF_OBJ_LAST_FIELD file_flags
2318 
bpf_obj_pin(const union bpf_attr *attr)2319 static int bpf_obj_pin(const union bpf_attr *attr)
2320 {
2321     if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) {
2322         return -EINVAL;
2323     }
2324 
2325     return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
2326 }
2327 
bpf_obj_get(const union bpf_attr *attr)2328 static int bpf_obj_get(const union bpf_attr *attr)
2329 {
2330     if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || attr->file_flags & ~BPF_OBJ_FLAG_MASK) {
2331         return -EINVAL;
2332     }
2333 
2334     return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), attr->file_flags);
2335 }
2336 
bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog)2337 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops,
2338                    struct bpf_prog *prog)
2339 {
2340     atomic64_set(&link->refcnt, 1);
2341     link->type = type;
2342     link->id = 0;
2343     link->ops = ops;
2344     link->prog = prog;
2345 }
2346 
bpf_link_free_id(int id)2347 static void bpf_link_free_id(int id)
2348 {
2349     if (!id) {
2350         return;
2351     }
2352 
2353     spin_lock_bh(&link_idr_lock);
2354     idr_remove(&link_idr, id);
2355     spin_unlock_bh(&link_idr_lock);
2356 }
2357 
2358 /* Clean up bpf_link and corresponding anon_inode file and FD. After
2359  * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
2360  * anon_inode's release() call. This helper marksbpf_link as
2361  * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
2362  * is not decremented, it's the responsibility of a calling code that failed
2363  * to complete bpf_link initialization.
2364  */
bpf_link_cleanup(struct bpf_link_primer *primer)2365 void bpf_link_cleanup(struct bpf_link_primer *primer)
2366 {
2367     primer->link->prog = NULL;
2368     bpf_link_free_id(primer->id);
2369     fput(primer->file);
2370     put_unused_fd(primer->fd);
2371 }
2372 
bpf_link_inc(struct bpf_link *link)2373 void bpf_link_inc(struct bpf_link *link)
2374 {
2375     atomic64_inc(&link->refcnt);
2376 }
2377 
2378 /* bpf_link_free is guaranteed to be called from process context */
bpf_link_free(struct bpf_link *link)2379 static void bpf_link_free(struct bpf_link *link)
2380 {
2381     bpf_link_free_id(link->id);
2382     if (link->prog) {
2383         /* detach BPF program, clean up used resources */
2384         link->ops->release(link);
2385         bpf_prog_put(link->prog);
2386     }
2387     /* free bpf_link and its containing memory */
2388     link->ops->dealloc(link);
2389 }
2390 
bpf_link_put_deferred(struct work_struct *work)2391 static void bpf_link_put_deferred(struct work_struct *work)
2392 {
2393     struct bpf_link *link = container_of(work, struct bpf_link, work);
2394 
2395     bpf_link_free(link);
2396 }
2397 
2398 /* bpf_link_put can be called from atomic context, but ensures that resources
2399  * are freed from process context
2400  */
bpf_link_put(struct bpf_link *link)2401 void bpf_link_put(struct bpf_link *link)
2402 {
2403     if (!atomic64_dec_and_test(&link->refcnt)) {
2404         return;
2405     }
2406 
2407     if (in_atomic()) {
2408         INIT_WORK(&link->work, bpf_link_put_deferred);
2409         schedule_work(&link->work);
2410     } else {
2411         bpf_link_free(link);
2412     }
2413 }
2414 
bpf_link_release(struct inode *inode, struct file *filp)2415 static int bpf_link_release(struct inode *inode, struct file *filp)
2416 {
2417     struct bpf_link *link = filp->private_data;
2418 
2419     bpf_link_put(link);
2420     return 0;
2421 }
2422 
2423 #ifdef CONFIG_PROC_FS
2424 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
2425 #define BPF_MAP_TYPE(_id, _ops)
2426 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
2427 static const char *bpf_link_type_strs[] = {
2428     [BPF_LINK_TYPE_UNSPEC] = "<invalid>",
2429 #include <linux/bpf_types.h>
2430 };
2431 #undef BPF_PROG_TYPE
2432 #undef BPF_MAP_TYPE
2433 #undef BPF_LINK_TYPE
2434 
bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)2435 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
2436 {
2437     const struct bpf_link *link = filp->private_data;
2438     const struct bpf_prog *prog = link->prog;
2439     char prog_tag[sizeof(prog->tag) * 0x2 + 1] = {};
2440 
2441     bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
2442     seq_printf(m,
2443                "link_type:\t%s\n"
2444                "link_id:\t%u\n"
2445                "prog_tag:\t%s\n"
2446                "prog_id:\t%u\n",
2447                bpf_link_type_strs[link->type], link->id, prog_tag, prog->aux->id);
2448     if (link->ops->show_fdinfo) {
2449         link->ops->show_fdinfo(link, m);
2450     }
2451 }
2452 #endif
2453 
2454 static const struct file_operations bpf_link_fops = {
2455 #ifdef CONFIG_PROC_FS
2456     .show_fdinfo = bpf_link_show_fdinfo,
2457 #endif
2458     .release = bpf_link_release,
2459     .read = bpf_dummy_read,
2460     .write = bpf_dummy_write,
2461 };
2462 
bpf_link_alloc_id(struct bpf_link *link)2463 static int bpf_link_alloc_id(struct bpf_link *link)
2464 {
2465     int id;
2466 
2467     idr_preload(GFP_KERNEL);
2468     spin_lock_bh(&link_idr_lock);
2469     id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
2470     spin_unlock_bh(&link_idr_lock);
2471     idr_preload_end();
2472 
2473     return id;
2474 }
2475 
2476 /* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
2477  * reserving unused FD and allocating ID from link_idr. This is to be paired
2478  * with bpf_link_settle() to install FD and ID and expose bpf_link to
2479  * user-space, if bpf_link is successfully attached. If not, bpf_link and
2480  * pre-allocated resources are to be freed with bpf_cleanup() call. All the
2481  * transient state is passed around in struct bpf_link_primer.
2482  * This is preferred way to create and initialize bpf_link, especially when
2483  * there are complicated and expensive operations inbetween creating bpf_link
2484  * itself and attaching it to BPF hook. By using bpf_link_prime() and
2485  * bpf_link_settle() kernel code using bpf_link doesn't have to perform
2486  * expensive (and potentially failing) roll back operations in a rare case
2487  * that file, FD, or ID can't be allocated.
2488  */
bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)2489 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
2490 {
2491     struct file *file;
2492     int fd, id;
2493 
2494     fd = get_unused_fd_flags(O_CLOEXEC);
2495     if (fd < 0) {
2496         return fd;
2497     }
2498 
2499     id = bpf_link_alloc_id(link);
2500     if (id < 0) {
2501         put_unused_fd(fd);
2502         return id;
2503     }
2504 
2505     file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
2506     if (IS_ERR(file)) {
2507         bpf_link_free_id(id);
2508         put_unused_fd(fd);
2509         return PTR_ERR(file);
2510     }
2511 
2512     primer->link = link;
2513     primer->file = file;
2514     primer->fd = fd;
2515     primer->id = id;
2516     return 0;
2517 }
2518 
bpf_link_settle(struct bpf_link_primer *primer)2519 int bpf_link_settle(struct bpf_link_primer *primer)
2520 {
2521     /* make bpf_link fetchable by ID */
2522     spin_lock_bh(&link_idr_lock);
2523     primer->link->id = primer->id;
2524     spin_unlock_bh(&link_idr_lock);
2525     /* make bpf_link fetchable by FD */
2526     fd_install(primer->fd, primer->file);
2527     /* pass through installed FD */
2528     return primer->fd;
2529 }
2530 
bpf_link_new_fd(struct bpf_link *link)2531 int bpf_link_new_fd(struct bpf_link *link)
2532 {
2533     return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
2534 }
2535 
bpf_link_get_from_fd(u32 ufd)2536 struct bpf_link *bpf_link_get_from_fd(u32 ufd)
2537 {
2538     struct fd f = fdget(ufd);
2539     struct bpf_link *link;
2540 
2541     if (!f.file) {
2542         return ERR_PTR(-EBADF);
2543     }
2544     if (f.file->f_op != &bpf_link_fops) {
2545         fdput(f);
2546         return ERR_PTR(-EINVAL);
2547     }
2548 
2549     link = f.file->private_data;
2550     bpf_link_inc(link);
2551     fdput(f);
2552 
2553     return link;
2554 }
2555 
2556 struct bpf_tracing_link {
2557     struct bpf_link link;
2558     enum bpf_attach_type attach_type;
2559     struct bpf_trampoline *trampoline;
2560     struct bpf_prog *tgt_prog;
2561 };
2562 
bpf_tracing_link_release(struct bpf_link *link)2563 static void bpf_tracing_link_release(struct bpf_link *link)
2564 {
2565     struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link);
2566 
2567     WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog, tr_link->trampoline));
2568 
2569     bpf_trampoline_put(tr_link->trampoline);
2570 
2571     /* tgt_prog is NULL if target is a kernel function */
2572     if (tr_link->tgt_prog) {
2573         bpf_prog_put(tr_link->tgt_prog);
2574     }
2575 }
2576 
bpf_tracing_link_dealloc(struct bpf_link *link)2577 static void bpf_tracing_link_dealloc(struct bpf_link *link)
2578 {
2579     struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link);
2580 
2581     kfree(tr_link);
2582 }
2583 
bpf_tracing_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq)2584 static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq)
2585 {
2586     struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link);
2587 
2588     seq_printf(seq, "attach_type:\t%d\n", tr_link->attach_type);
2589 }
2590 
bpf_tracing_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info)2591 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info)
2592 {
2593     struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link);
2594 
2595     info->tracing.attach_type = tr_link->attach_type;
2596 
2597     return 0;
2598 }
2599 
2600 static const struct bpf_link_ops bpf_tracing_link_lops = {
2601     .release = bpf_tracing_link_release,
2602     .dealloc = bpf_tracing_link_dealloc,
2603     .show_fdinfo = bpf_tracing_link_show_fdinfo,
2604     .fill_link_info = bpf_tracing_link_fill_link_info,
2605 };
2606 
bpf_tracing_prog_attach(struct bpf_prog *prog, int tgt_prog_fd, u32 btf_id)2607 static int bpf_tracing_prog_attach(struct bpf_prog *prog, int tgt_prog_fd, u32 btf_id)
2608 {
2609     struct bpf_link_primer link_primer;
2610     struct bpf_prog *tgt_prog = NULL;
2611     struct bpf_trampoline *tr = NULL;
2612     struct bpf_tracing_link *link;
2613     u64 key = 0;
2614     int err;
2615 
2616     switch (prog->type) {
2617         case BPF_PROG_TYPE_TRACING:
2618             if (prog->expected_attach_type != BPF_TRACE_FENTRY && prog->expected_attach_type != BPF_TRACE_FEXIT &&
2619                 prog->expected_attach_type != BPF_MODIFY_RETURN) {
2620                 err = -EINVAL;
2621                 goto out_put_prog;
2622             }
2623             break;
2624         case BPF_PROG_TYPE_EXT:
2625             if (prog->expected_attach_type != 0) {
2626                 err = -EINVAL;
2627                 goto out_put_prog;
2628             }
2629             break;
2630         case BPF_PROG_TYPE_LSM:
2631             if (prog->expected_attach_type != BPF_LSM_MAC) {
2632                 err = -EINVAL;
2633                 goto out_put_prog;
2634             }
2635             break;
2636         default:
2637             err = -EINVAL;
2638             goto out_put_prog;
2639     }
2640 
2641     if (!!tgt_prog_fd != !!btf_id) {
2642         err = -EINVAL;
2643         goto out_put_prog;
2644     }
2645 
2646     if (tgt_prog_fd) {
2647         /* For now we only allow new targets for BPF_PROG_TYPE_EXT */
2648         if (prog->type != BPF_PROG_TYPE_EXT) {
2649             err = -EINVAL;
2650             goto out_put_prog;
2651         }
2652 
2653         tgt_prog = bpf_prog_get(tgt_prog_fd);
2654         if (IS_ERR(tgt_prog)) {
2655             err = PTR_ERR(tgt_prog);
2656             tgt_prog = NULL;
2657             goto out_put_prog;
2658         }
2659 
2660         key = bpf_trampoline_compute_key(tgt_prog, btf_id);
2661     }
2662 
2663     link = kzalloc(sizeof(*link), GFP_USER);
2664     if (!link) {
2665         err = -ENOMEM;
2666         goto out_put_prog;
2667     }
2668     bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING, &bpf_tracing_link_lops, prog);
2669     link->attach_type = prog->expected_attach_type;
2670 
2671     mutex_lock(&prog->aux->dst_mutex);
2672 
2673     /* There are a few possible cases here:
2674      *
2675      * - if prog->aux->dst_trampoline is set, the program was just loaded
2676      *   and not yet attached to anything, so we can use the values stored
2677      *   in prog->aux
2678      *
2679      * - if prog->aux->dst_trampoline is NULL, the program has already been
2680      *   attached to a target and its initial target was cleared (below)
2681      *
2682      * - if tgt_prog != NULL, the caller specified tgt_prog_fd +
2683      *   target_btf_id using the link_create API.
2684      *
2685      * - if tgt_prog == NULL when this function was called using the old
2686      *   raw_tracepoint_open API, and we need a target from prog->aux
2687      *
2688      * The combination of no saved target in prog->aux, and no target
2689      * specified on load is illegal, and we reject that here.
2690      */
2691     if (!prog->aux->dst_trampoline && !tgt_prog) {
2692         err = -ENOENT;
2693         goto out_unlock;
2694     }
2695 
2696     if (!prog->aux->dst_trampoline || (key && key != prog->aux->dst_trampoline->key)) {
2697         /* If there is no saved target, or the specified target is
2698          * different from the destination specified at load time, we
2699          * need a new trampoline and a check for compatibility
2700          */
2701         struct bpf_attach_target_info tgt_info = {};
2702 
2703         err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, &tgt_info);
2704         if (err) {
2705             goto out_unlock;
2706         }
2707 
2708         tr = bpf_trampoline_get(key, &tgt_info);
2709         if (!tr) {
2710             err = -ENOMEM;
2711             goto out_unlock;
2712         }
2713     } else {
2714         /* The caller didn't specify a target, or the target was the
2715          * same as the destination supplied during program load. This
2716          * means we can reuse the trampoline and reference from program
2717          * load time, and there is no need to allocate a new one. This
2718          * can only happen once for any program, as the saved values in
2719          * prog->aux are cleared below.
2720          */
2721         tr = prog->aux->dst_trampoline;
2722         tgt_prog = prog->aux->dst_prog;
2723     }
2724 
2725     err = bpf_link_prime(&link->link, &link_primer);
2726     if (err) {
2727         goto out_unlock;
2728     }
2729 
2730     err = bpf_trampoline_link_prog(prog, tr);
2731     if (err) {
2732         bpf_link_cleanup(&link_primer);
2733         link = NULL;
2734         goto out_unlock;
2735     }
2736 
2737     link->tgt_prog = tgt_prog;
2738     link->trampoline = tr;
2739 
2740     /* Always clear the trampoline and target prog from prog->aux to make
2741      * sure the original attach destination is not kept alive after a
2742      * program is (re-)attached to another target.
2743      */
2744     if (prog->aux->dst_prog && (tgt_prog_fd || tr != prog->aux->dst_trampoline)) {
2745         /* got extra prog ref from syscall, or attaching to different prog */
2746         bpf_prog_put(prog->aux->dst_prog);
2747     }
2748     if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) {
2749         /* we allocated a new trampoline, so free the old one */
2750         bpf_trampoline_put(prog->aux->dst_trampoline);
2751     }
2752 
2753     prog->aux->dst_prog = NULL;
2754     prog->aux->dst_trampoline = NULL;
2755     mutex_unlock(&prog->aux->dst_mutex);
2756 
2757     return bpf_link_settle(&link_primer);
2758 out_unlock:
2759     if (tr && tr != prog->aux->dst_trampoline) {
2760         bpf_trampoline_put(tr);
2761     }
2762     mutex_unlock(&prog->aux->dst_mutex);
2763     kfree(link);
2764 out_put_prog:
2765     if (tgt_prog_fd && tgt_prog) {
2766         bpf_prog_put(tgt_prog);
2767     }
2768     return err;
2769 }
2770 
2771 struct bpf_raw_tp_link {
2772     struct bpf_link link;
2773     struct bpf_raw_event_map *btp;
2774 };
2775 
bpf_raw_tp_link_release(struct bpf_link *link)2776 static void bpf_raw_tp_link_release(struct bpf_link *link)
2777 {
2778     struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link);
2779 
2780     bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
2781     bpf_put_raw_tracepoint(raw_tp->btp);
2782 }
2783 
bpf_raw_tp_link_dealloc(struct bpf_link *link)2784 static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
2785 {
2786     struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link);
2787 
2788     kfree(raw_tp);
2789 }
2790 
bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq)2791 static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq)
2792 {
2793     struct bpf_raw_tp_link *raw_tp_link = container_of(link, struct bpf_raw_tp_link, link);
2794 
2795     seq_printf(seq, "tp_name:\t%s\n", raw_tp_link->btp->tp->name);
2796 }
2797 
bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info)2798 static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info)
2799 {
2800     struct bpf_raw_tp_link *raw_tp_link = container_of(link, struct bpf_raw_tp_link, link);
2801     char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name);
2802     const char *tp_name = raw_tp_link->btp->tp->name;
2803     u32 ulen = info->raw_tracepoint.tp_name_len;
2804     size_t tp_len = strlen(tp_name);
2805 
2806     if (!ulen ^ !ubuf) {
2807         return -EINVAL;
2808     }
2809 
2810     info->raw_tracepoint.tp_name_len = tp_len + 1;
2811 
2812     if (!ubuf) {
2813         return 0;
2814     }
2815 
2816     if (ulen >= tp_len + 1) {
2817         if (copy_to_user(ubuf, tp_name, tp_len + 1)) {
2818             return -EFAULT;
2819         }
2820     } else {
2821         char zero = '\0';
2822 
2823         if (copy_to_user(ubuf, tp_name, ulen - 1)) {
2824             return -EFAULT;
2825         }
2826         if (put_user(zero, ubuf + ulen - 1)) {
2827             return -EFAULT;
2828         }
2829         return -ENOSPC;
2830     }
2831 
2832     return 0;
2833 }
2834 
2835 static const struct bpf_link_ops bpf_raw_tp_link_lops = {
2836     .release = bpf_raw_tp_link_release,
2837     .dealloc = bpf_raw_tp_link_dealloc,
2838     .show_fdinfo = bpf_raw_tp_link_show_fdinfo,
2839     .fill_link_info = bpf_raw_tp_link_fill_link_info,
2840 };
2841 
2842 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
2843 
bpf_raw_tracepoint_open(const union bpf_attr *attr)2844 static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
2845 {
2846     struct bpf_link_primer link_primer;
2847     struct bpf_raw_tp_link *link;
2848     struct bpf_raw_event_map *btp;
2849     struct bpf_prog *prog;
2850     const char *tp_name;
2851     char buf[128];
2852     int err;
2853 
2854     if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) {
2855         return -EINVAL;
2856     }
2857 
2858     prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
2859     if (IS_ERR(prog)) {
2860         return PTR_ERR(prog);
2861     }
2862 
2863     switch (prog->type) {
2864         case BPF_PROG_TYPE_TRACING:
2865         case BPF_PROG_TYPE_EXT:
2866         case BPF_PROG_TYPE_LSM:
2867             if (attr->raw_tracepoint.name) {
2868                 /* The attach point for this category of programs
2869                  * should be specified via btf_id during program load.
2870                  */
2871                 err = -EINVAL;
2872                 goto out_put_prog;
2873             }
2874             if (prog->type == BPF_PROG_TYPE_TRACING && prog->expected_attach_type == BPF_TRACE_RAW_TP) {
2875                 tp_name = prog->aux->attach_func_name;
2876                 break;
2877             }
2878             err = bpf_tracing_prog_attach(prog, 0, 0);
2879             if (err >= 0) {
2880                 return err;
2881             }
2882             goto out_put_prog;
2883         case BPF_PROG_TYPE_RAW_TRACEPOINT:
2884         case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2885             if (strncpy_from_user(buf, u64_to_user_ptr(attr->raw_tracepoint.name), sizeof(buf) - 1) < 0) {
2886                 err = -EFAULT;
2887                 goto out_put_prog;
2888             }
2889             buf[sizeof(buf) - 1] = 0;
2890             tp_name = buf;
2891             break;
2892         default:
2893             err = -EINVAL;
2894             goto out_put_prog;
2895     }
2896 
2897     btp = bpf_get_raw_tracepoint(tp_name);
2898     if (!btp) {
2899         err = -ENOENT;
2900         goto out_put_prog;
2901     }
2902 
2903     link = kzalloc(sizeof(*link), GFP_USER);
2904     if (!link) {
2905         err = -ENOMEM;
2906         goto out_put_btp;
2907     }
2908     bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, &bpf_raw_tp_link_lops, prog);
2909     link->btp = btp;
2910 
2911     err = bpf_link_prime(&link->link, &link_primer);
2912     if (err) {
2913         kfree(link);
2914         goto out_put_btp;
2915     }
2916 
2917     err = bpf_probe_register(link->btp, prog);
2918     if (err) {
2919         bpf_link_cleanup(&link_primer);
2920         goto out_put_btp;
2921     }
2922 
2923     return bpf_link_settle(&link_primer);
2924 
2925 out_put_btp:
2926     bpf_put_raw_tracepoint(btp);
2927 out_put_prog:
2928     bpf_prog_put(prog);
2929     return err;
2930 }
2931 
bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, enum bpf_attach_type attach_type)2932 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, enum bpf_attach_type attach_type)
2933 {
2934     switch (prog->type) {
2935         case BPF_PROG_TYPE_CGROUP_SOCK:
2936         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2937         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2938         case BPF_PROG_TYPE_SK_LOOKUP:
2939             return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
2940         case BPF_PROG_TYPE_CGROUP_SKB:
2941             if (!capable(CAP_NET_ADMIN)) {
2942                 /* cg-skb progs can be loaded by unpriv user.
2943                  * check permissions at attach time.
2944                  */
2945                 return -EPERM;
2946             }
2947             return prog->enforce_expected_attach_type && prog->expected_attach_type != attach_type ? -EINVAL : 0;
2948         default:
2949             return 0;
2950     }
2951 }
2952 
attach_type_to_prog_type(enum bpf_attach_type attach_type)2953 static enum bpf_prog_type attach_type_to_prog_type(enum bpf_attach_type attach_type)
2954 {
2955     switch (attach_type) {
2956         case BPF_CGROUP_INET_INGRESS:
2957         case BPF_CGROUP_INET_EGRESS:
2958             return BPF_PROG_TYPE_CGROUP_SKB;
2959         case BPF_CGROUP_INET_SOCK_CREATE:
2960         case BPF_CGROUP_INET_SOCK_RELEASE:
2961         case BPF_CGROUP_INET4_POST_BIND:
2962         case BPF_CGROUP_INET6_POST_BIND:
2963             return BPF_PROG_TYPE_CGROUP_SOCK;
2964         case BPF_CGROUP_INET4_BIND:
2965         case BPF_CGROUP_INET6_BIND:
2966         case BPF_CGROUP_INET4_CONNECT:
2967         case BPF_CGROUP_INET6_CONNECT:
2968         case BPF_CGROUP_INET4_GETPEERNAME:
2969         case BPF_CGROUP_INET6_GETPEERNAME:
2970         case BPF_CGROUP_INET4_GETSOCKNAME:
2971         case BPF_CGROUP_INET6_GETSOCKNAME:
2972         case BPF_CGROUP_UDP4_SENDMSG:
2973         case BPF_CGROUP_UDP6_SENDMSG:
2974         case BPF_CGROUP_UDP4_RECVMSG:
2975         case BPF_CGROUP_UDP6_RECVMSG:
2976             return BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
2977         case BPF_CGROUP_SOCK_OPS:
2978             return BPF_PROG_TYPE_SOCK_OPS;
2979         case BPF_CGROUP_DEVICE:
2980             return BPF_PROG_TYPE_CGROUP_DEVICE;
2981         case BPF_SK_MSG_VERDICT:
2982             return BPF_PROG_TYPE_SK_MSG;
2983         case BPF_SK_SKB_STREAM_PARSER:
2984         case BPF_SK_SKB_STREAM_VERDICT:
2985             return BPF_PROG_TYPE_SK_SKB;
2986         case BPF_LIRC_MODE2:
2987             return BPF_PROG_TYPE_LIRC_MODE2;
2988         case BPF_FLOW_DISSECTOR:
2989             return BPF_PROG_TYPE_FLOW_DISSECTOR;
2990         case BPF_CGROUP_SYSCTL:
2991             return BPF_PROG_TYPE_CGROUP_SYSCTL;
2992         case BPF_CGROUP_GETSOCKOPT:
2993         case BPF_CGROUP_SETSOCKOPT:
2994             return BPF_PROG_TYPE_CGROUP_SOCKOPT;
2995         case BPF_TRACE_ITER:
2996             return BPF_PROG_TYPE_TRACING;
2997         case BPF_SK_LOOKUP:
2998             return BPF_PROG_TYPE_SK_LOOKUP;
2999         case BPF_XDP:
3000             return BPF_PROG_TYPE_XDP;
3001         default:
3002             return BPF_PROG_TYPE_UNSPEC;
3003     }
3004 }
3005 
3006 #define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
3007 
3008 #define BPF_F_ATTACH_MASK (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
3009 
bpf_prog_attach(const union bpf_attr *attr)3010 static int bpf_prog_attach(const union bpf_attr *attr)
3011 {
3012     enum bpf_prog_type ptype;
3013     struct bpf_prog *prog;
3014     int ret;
3015 
3016     if (CHECK_ATTR(BPF_PROG_ATTACH)) {
3017         return -EINVAL;
3018     }
3019 
3020     if (attr->attach_flags & ~BPF_F_ATTACH_MASK) {
3021         return -EINVAL;
3022     }
3023 
3024     ptype = attach_type_to_prog_type(attr->attach_type);
3025     if (ptype == BPF_PROG_TYPE_UNSPEC) {
3026         return -EINVAL;
3027     }
3028 
3029     prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
3030     if (IS_ERR(prog)) {
3031         return PTR_ERR(prog);
3032     }
3033 
3034     if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) {
3035         bpf_prog_put(prog);
3036         return -EINVAL;
3037     }
3038 
3039     switch (ptype) {
3040         case BPF_PROG_TYPE_SK_SKB:
3041         case BPF_PROG_TYPE_SK_MSG:
3042             ret = sock_map_get_from_fd(attr, prog);
3043             break;
3044         case BPF_PROG_TYPE_LIRC_MODE2:
3045             ret = lirc_prog_attach(attr, prog);
3046             break;
3047         case BPF_PROG_TYPE_FLOW_DISSECTOR:
3048             ret = netns_bpf_prog_attach(attr, prog);
3049             break;
3050         case BPF_PROG_TYPE_CGROUP_DEVICE:
3051         case BPF_PROG_TYPE_CGROUP_SKB:
3052         case BPF_PROG_TYPE_CGROUP_SOCK:
3053         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3054         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3055         case BPF_PROG_TYPE_CGROUP_SYSCTL:
3056         case BPF_PROG_TYPE_SOCK_OPS:
3057             ret = cgroup_bpf_prog_attach(attr, ptype, prog);
3058             break;
3059         default:
3060             ret = -EINVAL;
3061     }
3062 
3063     if (ret) {
3064         bpf_prog_put(prog);
3065     }
3066     return ret;
3067 }
3068 
3069 #define BPF_PROG_DETACH_LAST_FIELD attach_type
3070 
bpf_prog_detach(const union bpf_attr *attr)3071 static int bpf_prog_detach(const union bpf_attr *attr)
3072 {
3073     enum bpf_prog_type ptype;
3074 
3075     if (CHECK_ATTR(BPF_PROG_DETACH)) {
3076         return -EINVAL;
3077     }
3078 
3079     ptype = attach_type_to_prog_type(attr->attach_type);
3080 
3081     switch (ptype) {
3082         case BPF_PROG_TYPE_SK_MSG:
3083         case BPF_PROG_TYPE_SK_SKB:
3084             return sock_map_prog_detach(attr, ptype);
3085         case BPF_PROG_TYPE_LIRC_MODE2:
3086             return lirc_prog_detach(attr);
3087         case BPF_PROG_TYPE_FLOW_DISSECTOR:
3088             return netns_bpf_prog_detach(attr, ptype);
3089         case BPF_PROG_TYPE_CGROUP_DEVICE:
3090         case BPF_PROG_TYPE_CGROUP_SKB:
3091         case BPF_PROG_TYPE_CGROUP_SOCK:
3092         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
3093         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
3094         case BPF_PROG_TYPE_CGROUP_SYSCTL:
3095         case BPF_PROG_TYPE_SOCK_OPS:
3096             return cgroup_bpf_prog_detach(attr, ptype);
3097         default:
3098             return -EINVAL;
3099     }
3100 }
3101 
3102 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
3103 
bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)3104 static int bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
3105 {
3106     if (!capable(CAP_NET_ADMIN)) {
3107         return -EPERM;
3108     }
3109     if (CHECK_ATTR(BPF_PROG_QUERY)) {
3110         return -EINVAL;
3111     }
3112     if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) {
3113         return -EINVAL;
3114     }
3115 
3116     switch (attr->query.attach_type) {
3117         case BPF_CGROUP_INET_INGRESS:
3118         case BPF_CGROUP_INET_EGRESS:
3119         case BPF_CGROUP_INET_SOCK_CREATE:
3120         case BPF_CGROUP_INET_SOCK_RELEASE:
3121         case BPF_CGROUP_INET4_BIND:
3122         case BPF_CGROUP_INET6_BIND:
3123         case BPF_CGROUP_INET4_POST_BIND:
3124         case BPF_CGROUP_INET6_POST_BIND:
3125         case BPF_CGROUP_INET4_CONNECT:
3126         case BPF_CGROUP_INET6_CONNECT:
3127         case BPF_CGROUP_INET4_GETPEERNAME:
3128         case BPF_CGROUP_INET6_GETPEERNAME:
3129         case BPF_CGROUP_INET4_GETSOCKNAME:
3130         case BPF_CGROUP_INET6_GETSOCKNAME:
3131         case BPF_CGROUP_UDP4_SENDMSG:
3132         case BPF_CGROUP_UDP6_SENDMSG:
3133         case BPF_CGROUP_UDP4_RECVMSG:
3134         case BPF_CGROUP_UDP6_RECVMSG:
3135         case BPF_CGROUP_SOCK_OPS:
3136         case BPF_CGROUP_DEVICE:
3137         case BPF_CGROUP_SYSCTL:
3138         case BPF_CGROUP_GETSOCKOPT:
3139         case BPF_CGROUP_SETSOCKOPT:
3140             return cgroup_bpf_prog_query(attr, uattr);
3141         case BPF_LIRC_MODE2:
3142             return lirc_prog_query(attr, uattr);
3143         case BPF_FLOW_DISSECTOR:
3144         case BPF_SK_LOOKUP:
3145             return netns_bpf_prog_query(attr, uattr);
3146         default:
3147             return -EINVAL;
3148     }
3149 }
3150 
3151 #define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
3152 
bpf_prog_test_run(const union bpf_attr *attr, union bpf_attr __user *uattr)3153 static int bpf_prog_test_run(const union bpf_attr *attr, union bpf_attr __user *uattr)
3154 {
3155     struct bpf_prog *prog;
3156     int ret = -ENOTSUPP;
3157 
3158     if (CHECK_ATTR(BPF_PROG_TEST_RUN)) {
3159         return -EINVAL;
3160     }
3161 
3162     if ((attr->test.ctx_size_in && !attr->test.ctx_in) || (!attr->test.ctx_size_in && attr->test.ctx_in)) {
3163         return -EINVAL;
3164     }
3165 
3166     if ((attr->test.ctx_size_out && !attr->test.ctx_out) || (!attr->test.ctx_size_out && attr->test.ctx_out)) {
3167         return -EINVAL;
3168     }
3169 
3170     prog = bpf_prog_get(attr->test.prog_fd);
3171     if (IS_ERR(prog)) {
3172         return PTR_ERR(prog);
3173     }
3174 
3175     if (prog->aux->ops->test_run) {
3176         ret = prog->aux->ops->test_run(prog, attr, uattr);
3177     }
3178 
3179     bpf_prog_put(prog);
3180     return ret;
3181 }
3182 
3183 #define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
3184 
bpf_obj_get_next_id(const union bpf_attr *attr, union bpf_attr __user *uattr, struct idr *idr, spinlock_t *lock)3185 static int bpf_obj_get_next_id(const union bpf_attr *attr, union bpf_attr __user *uattr, struct idr *idr,
3186                                spinlock_t *lock)
3187 {
3188     u32 next_id = attr->start_id;
3189     int err = 0;
3190 
3191     if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) {
3192         return -EINVAL;
3193     }
3194 
3195     if (!capable(CAP_SYS_ADMIN)) {
3196         return -EPERM;
3197     }
3198 
3199     next_id++;
3200     spin_lock_bh(lock);
3201     if (!idr_get_next(idr, &next_id)) {
3202         err = -ENOENT;
3203     }
3204     spin_unlock_bh(lock);
3205 
3206     if (!err) {
3207         err = put_user(next_id, &uattr->next_id);
3208     }
3209 
3210     return err;
3211 }
3212 
bpf_map_get_curr_or_next(u32 *id)3213 struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
3214 {
3215     struct bpf_map *map;
3216 
3217     spin_lock_bh(&map_idr_lock);
3218 
3219     while (1) {
3220         map = idr_get_next(&map_idr, id);
3221         if (map) {
3222             map = _bpf_map_inc_not_zero(map, false);
3223             if (IS_ERR(map)) {
3224                 (*id)++;
3225                 continue;
3226             }
3227         }
3228         break;
3229     }
3230     spin_unlock_bh(&map_idr_lock);
3231 
3232     return map;
3233 }
3234 
bpf_prog_get_curr_or_next(u32 *id)3235 struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id)
3236 {
3237     struct bpf_prog *prog;
3238 
3239     spin_lock_bh(&prog_idr_lock);
3240     while (1) {
3241         prog = idr_get_next(&prog_idr, id);
3242         if (prog) {
3243             prog = bpf_prog_inc_not_zero(prog);
3244             if (IS_ERR(prog)) {
3245                 (*id)++;
3246                 continue;
3247             }
3248         }
3249         break;
3250     }
3251     spin_unlock_bh(&prog_idr_lock);
3252 
3253     return prog;
3254 }
3255 
3256 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
3257 
bpf_prog_by_id(u32 id)3258 struct bpf_prog *bpf_prog_by_id(u32 id)
3259 {
3260     struct bpf_prog *prog;
3261 
3262     if (!id) {
3263         return ERR_PTR(-ENOENT);
3264     }
3265 
3266     spin_lock_bh(&prog_idr_lock);
3267     prog = idr_find(&prog_idr, id);
3268     if (prog) {
3269         prog = bpf_prog_inc_not_zero(prog);
3270     } else {
3271         prog = ERR_PTR(-ENOENT);
3272     }
3273     spin_unlock_bh(&prog_idr_lock);
3274     return prog;
3275 }
3276 
bpf_prog_get_fd_by_id(const union bpf_attr *attr)3277 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
3278 {
3279     struct bpf_prog *prog;
3280     u32 id = attr->prog_id;
3281     int fd;
3282 
3283     if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) {
3284         return -EINVAL;
3285     }
3286 
3287     if (!capable(CAP_SYS_ADMIN)) {
3288         return -EPERM;
3289     }
3290 
3291     prog = bpf_prog_by_id(id);
3292     if (IS_ERR(prog)) {
3293         return PTR_ERR(prog);
3294     }
3295 
3296     fd = bpf_prog_new_fd(prog);
3297     if (fd < 0) {
3298         bpf_prog_put(prog);
3299     }
3300 
3301     return fd;
3302 }
3303 
3304 #define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
3305 
bpf_map_get_fd_by_id(const union bpf_attr *attr)3306 static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
3307 {
3308     struct bpf_map *map;
3309     u32 id = attr->map_id;
3310     int f_flags;
3311     int fd;
3312 
3313     if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || attr->open_flags & ~BPF_OBJ_FLAG_MASK) {
3314         return -EINVAL;
3315     }
3316 
3317     if (!capable(CAP_SYS_ADMIN)) {
3318         return -EPERM;
3319     }
3320 
3321     f_flags = bpf_get_file_flag(attr->open_flags);
3322     if (f_flags < 0) {
3323         return f_flags;
3324     }
3325 
3326     spin_lock_bh(&map_idr_lock);
3327     map = idr_find(&map_idr, id);
3328     if (map) {
3329         map = _bpf_map_inc_not_zero(map, true);
3330     } else {
3331         map = ERR_PTR(-ENOENT);
3332     }
3333     spin_unlock_bh(&map_idr_lock);
3334 
3335     if (IS_ERR(map)) {
3336         return PTR_ERR(map);
3337     }
3338 
3339     fd = bpf_map_new_fd(map, f_flags);
3340     if (fd < 0) {
3341         bpf_map_put_with_uref(map);
3342     }
3343 
3344     return fd;
3345 }
3346 
bpf_map_from_imm(const struct bpf_prog *prog, unsigned long addr, u32 *off, u32 *type)3347 static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, unsigned long addr, u32 *off, u32 *type)
3348 {
3349     const struct bpf_map *map;
3350     int i;
3351 
3352     mutex_lock(&prog->aux->used_maps_mutex);
3353     for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) {
3354         map = prog->aux->used_maps[i];
3355         if (map == (void *)addr) {
3356             *type = BPF_PSEUDO_MAP_FD;
3357             goto out;
3358         }
3359         if (!map->ops->map_direct_value_meta) {
3360             continue;
3361         }
3362         if (!map->ops->map_direct_value_meta(map, addr, off)) {
3363             *type = BPF_PSEUDO_MAP_VALUE;
3364             goto out;
3365         }
3366     }
3367     map = NULL;
3368 
3369 out:
3370     mutex_unlock(&prog->aux->used_maps_mutex);
3371     return map;
3372 }
3373 
bpf_insn_prepare_dump(const struct bpf_prog *prog, const struct cred *f_cred)3374 static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, const struct cred *f_cred)
3375 {
3376     const struct bpf_map *map;
3377     struct bpf_insn *insns;
3378     u32 off, type;
3379     u64 imm;
3380     u8 code;
3381     int i;
3382 
3383     insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), GFP_USER);
3384     if (!insns) {
3385         return insns;
3386     }
3387 
3388     for (i = 0; i < prog->len; i++) {
3389         code = insns[i].code;
3390 
3391         if (code == (BPF_JMP | BPF_TAIL_CALL)) {
3392             insns[i].code = BPF_JMP | BPF_CALL;
3393             insns[i].imm = BPF_FUNC_tail_call;
3394             /* fall-through */
3395         }
3396         if (code == (BPF_JMP | BPF_CALL) || code == (BPF_JMP | BPF_CALL_ARGS)) {
3397             if (code == (BPF_JMP | BPF_CALL_ARGS)) {
3398                 insns[i].code = BPF_JMP | BPF_CALL;
3399             }
3400             if (!bpf_dump_raw_ok(f_cred)) {
3401                 insns[i].imm = 0;
3402             }
3403             continue;
3404         }
3405         if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) {
3406             insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM;
3407             continue;
3408         }
3409 
3410         if (code != (BPF_LD | BPF_IMM | BPF_DW)) {
3411             continue;
3412         }
3413 
3414         imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
3415         map = bpf_map_from_imm(prog, imm, &off, &type);
3416         if (map) {
3417             insns[i].src_reg = type;
3418             insns[i].imm = map->id;
3419             insns[i + 1].imm = off;
3420             continue;
3421         }
3422     }
3423 
3424     return insns;
3425 }
3426 
set_info_rec_size(struct bpf_prog_info *info)3427 static int set_info_rec_size(struct bpf_prog_info *info)
3428 {
3429     /*
3430      * Ensure info.*_rec_size is the same as kernel expected size
3431      *
3432      * or
3433      *
3434      * Only allow zero *_rec_size if both _rec_size and _cnt are
3435      * zero.  In this case, the kernel will set the expected
3436      * _rec_size back to the info.
3437      */
3438 
3439     if ((info->nr_func_info || info->func_info_rec_size) && info->func_info_rec_size != sizeof(struct bpf_func_info)) {
3440         return -EINVAL;
3441     }
3442 
3443     if ((info->nr_line_info || info->line_info_rec_size) && info->line_info_rec_size != sizeof(struct bpf_line_info)) {
3444         return -EINVAL;
3445     }
3446 
3447     if ((info->nr_jited_line_info || info->jited_line_info_rec_size) &&
3448         info->jited_line_info_rec_size != sizeof(__u64)) {
3449         return -EINVAL;
3450     }
3451 
3452     info->func_info_rec_size = sizeof(struct bpf_func_info);
3453     info->line_info_rec_size = sizeof(struct bpf_line_info);
3454     info->jited_line_info_rec_size = sizeof(__u64);
3455 
3456     return 0;
3457 }
3458 
bpf_prog_get_info_by_fd(struct file *file, struct bpf_prog *prog, const union bpf_attr *attr, union bpf_attr __user *uattr)3459 static int bpf_prog_get_info_by_fd(struct file *file, struct bpf_prog *prog, const union bpf_attr *attr,
3460                                    union bpf_attr __user *uattr)
3461 {
3462     struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3463     struct bpf_prog_info info;
3464     u32 info_len = attr->info.info_len;
3465     struct bpf_prog_stats stats;
3466     char __user *uinsns;
3467     u32 ulen;
3468     int err;
3469 
3470     err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
3471     if (err) {
3472         return err;
3473     }
3474     info_len = min_t(u32, sizeof(info), info_len);
3475 
3476     memset(&info, 0, sizeof(info));
3477     if (copy_from_user(&info, uinfo, info_len)) {
3478         return -EFAULT;
3479     }
3480 
3481     info.type = prog->type;
3482     info.id = prog->aux->id;
3483     info.load_time = prog->aux->load_time;
3484     info.created_by_uid = from_kuid_munged(current_user_ns(), prog->aux->user->uid);
3485     info.gpl_compatible = prog->gpl_compatible;
3486 
3487     memcpy(info.tag, prog->tag, sizeof(prog->tag));
3488     memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
3489 
3490     mutex_lock(&prog->aux->used_maps_mutex);
3491     ulen = info.nr_map_ids;
3492     info.nr_map_ids = prog->aux->used_map_cnt;
3493     ulen = min_t(u32, info.nr_map_ids, ulen);
3494     if (ulen) {
3495         u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
3496         u32 i;
3497 
3498         for (i = 0; i < ulen; i++) {
3499             if (put_user(prog->aux->used_maps[i]->id, &user_map_ids[i])) {
3500                 mutex_unlock(&prog->aux->used_maps_mutex);
3501                 return -EFAULT;
3502             }
3503         }
3504     }
3505     mutex_unlock(&prog->aux->used_maps_mutex);
3506 
3507     err = set_info_rec_size(&info);
3508     if (err) {
3509         return err;
3510     }
3511 
3512     bpf_prog_get_stats(prog, &stats);
3513     info.run_time_ns = stats.nsecs;
3514     info.run_cnt = stats.cnt;
3515 
3516     if (!bpf_capable()) {
3517         info.jited_prog_len = 0;
3518         info.xlated_prog_len = 0;
3519         info.nr_jited_ksyms = 0;
3520         info.nr_jited_func_lens = 0;
3521         info.nr_func_info = 0;
3522         info.nr_line_info = 0;
3523         info.nr_jited_line_info = 0;
3524         goto done;
3525     }
3526 
3527     ulen = info.xlated_prog_len;
3528     info.xlated_prog_len = bpf_prog_insn_size(prog);
3529     if (info.xlated_prog_len && ulen) {
3530         struct bpf_insn *insns_sanitized;
3531         bool fault;
3532 
3533         if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
3534             info.xlated_prog_insns = 0;
3535             goto done;
3536         }
3537         insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
3538         if (!insns_sanitized) {
3539             return -ENOMEM;
3540         }
3541         uinsns = u64_to_user_ptr(info.xlated_prog_insns);
3542         ulen = min_t(u32, info.xlated_prog_len, ulen);
3543         fault = copy_to_user(uinsns, insns_sanitized, ulen);
3544         kfree(insns_sanitized);
3545         if (fault) {
3546             return -EFAULT;
3547         }
3548     }
3549 
3550     if (bpf_prog_is_dev_bound(prog->aux)) {
3551         err = bpf_prog_offload_info_fill(&info, prog);
3552         if (err) {
3553             return err;
3554         }
3555         goto done;
3556     }
3557 
3558     /* NOTE: the following code is supposed to be skipped for offload.
3559      * bpf_prog_offload_info_fill() is the place to fill similar fields
3560      * for offload.
3561      */
3562     ulen = info.jited_prog_len;
3563     if (prog->aux->func_cnt) {
3564         u32 i;
3565 
3566         info.jited_prog_len = 0;
3567         for (i = 0; i < prog->aux->func_cnt; i++) {
3568             info.jited_prog_len += prog->aux->func[i]->jited_len;
3569         }
3570     } else {
3571         info.jited_prog_len = prog->jited_len;
3572     }
3573 
3574     if (info.jited_prog_len && ulen) {
3575         if (bpf_dump_raw_ok(file->f_cred)) {
3576             uinsns = u64_to_user_ptr(info.jited_prog_insns);
3577             ulen = min_t(u32, info.jited_prog_len, ulen);
3578 
3579             /* for multi-function programs, copy the JITed
3580              * instructions for all the functions
3581              */
3582             if (prog->aux->func_cnt) {
3583                 u32 len, free, i;
3584                 u8 *img;
3585 
3586                 free = ulen;
3587                 for (i = 0; i < prog->aux->func_cnt; i++) {
3588                     len = prog->aux->func[i]->jited_len;
3589                     len = min_t(u32, len, free);
3590                     img = (u8 *)prog->aux->func[i]->bpf_func;
3591                     if (copy_to_user(uinsns, img, len)) {
3592                         return -EFAULT;
3593                     }
3594                     uinsns += len;
3595                     free -= len;
3596                     if (!free) {
3597                         break;
3598                     }
3599                 }
3600             } else {
3601                 if (copy_to_user(uinsns, prog->bpf_func, ulen)) {
3602                     return -EFAULT;
3603                 }
3604             }
3605         } else {
3606             info.jited_prog_insns = 0;
3607         }
3608     }
3609 
3610     ulen = info.nr_jited_ksyms;
3611     info.nr_jited_ksyms = prog->aux->func_cnt ?: 1;
3612     if (ulen) {
3613         if (bpf_dump_raw_ok(file->f_cred)) {
3614             unsigned long ksym_addr;
3615             u64 __user *user_ksyms;
3616             u32 i;
3617 
3618             /* copy the address of the kernel symbol
3619              * corresponding to each function
3620              */
3621             ulen = min_t(u32, info.nr_jited_ksyms, ulen);
3622             user_ksyms = u64_to_user_ptr(info.jited_ksyms);
3623             if (prog->aux->func_cnt) {
3624                 for (i = 0; i < ulen; i++) {
3625                     ksym_addr = (unsigned long)prog->aux->func[i]->bpf_func;
3626                     if (put_user((u64)ksym_addr, &user_ksyms[i])) {
3627                         return -EFAULT;
3628                     }
3629                 }
3630             } else {
3631                 ksym_addr = (unsigned long)prog->bpf_func;
3632                 if (put_user((u64)ksym_addr, &user_ksyms[0])) {
3633                     return -EFAULT;
3634                 }
3635             }
3636         } else {
3637             info.jited_ksyms = 0;
3638         }
3639     }
3640 
3641     ulen = info.nr_jited_func_lens;
3642     info.nr_jited_func_lens = prog->aux->func_cnt ?: 1;
3643     if (ulen) {
3644         if (bpf_dump_raw_ok(file->f_cred)) {
3645             u32 __user *user_lens;
3646             u32 func_len, i;
3647 
3648             /* copy the JITed image lengths for each function */
3649             ulen = min_t(u32, info.nr_jited_func_lens, ulen);
3650             user_lens = u64_to_user_ptr(info.jited_func_lens);
3651             if (prog->aux->func_cnt) {
3652                 for (i = 0; i < ulen; i++) {
3653                     func_len = prog->aux->func[i]->jited_len;
3654                     if (put_user(func_len, &user_lens[i])) {
3655                         return -EFAULT;
3656                     }
3657                 }
3658             } else {
3659                 func_len = prog->jited_len;
3660                 if (put_user(func_len, &user_lens[0])) {
3661                     return -EFAULT;
3662                 }
3663             }
3664         } else {
3665             info.jited_func_lens = 0;
3666         }
3667     }
3668 
3669     if (prog->aux->btf) {
3670         info.btf_id = btf_id(prog->aux->btf);
3671     }
3672 
3673     ulen = info.nr_func_info;
3674     info.nr_func_info = prog->aux->func_info_cnt;
3675     if (info.nr_func_info && ulen) {
3676         char __user *user_finfo;
3677 
3678         user_finfo = u64_to_user_ptr(info.func_info);
3679         ulen = min_t(u32, info.nr_func_info, ulen);
3680         if (copy_to_user(user_finfo, prog->aux->func_info, info.func_info_rec_size * ulen)) {
3681             return -EFAULT;
3682         }
3683     }
3684 
3685     ulen = info.nr_line_info;
3686     info.nr_line_info = prog->aux->nr_linfo;
3687     if (info.nr_line_info && ulen) {
3688         __u8 __user *user_linfo;
3689 
3690         user_linfo = u64_to_user_ptr(info.line_info);
3691         ulen = min_t(u32, info.nr_line_info, ulen);
3692         if (copy_to_user(user_linfo, prog->aux->linfo, info.line_info_rec_size * ulen)) {
3693             return -EFAULT;
3694         }
3695     }
3696 
3697     ulen = info.nr_jited_line_info;
3698     if (prog->aux->jited_linfo) {
3699         info.nr_jited_line_info = prog->aux->nr_linfo;
3700     } else {
3701         info.nr_jited_line_info = 0;
3702     }
3703     if (info.nr_jited_line_info && ulen) {
3704         if (bpf_dump_raw_ok(file->f_cred)) {
3705             __u64 __user *user_linfo;
3706             u32 i;
3707 
3708             user_linfo = u64_to_user_ptr(info.jited_line_info);
3709             ulen = min_t(u32, info.nr_jited_line_info, ulen);
3710             for (i = 0; i < ulen; i++) {
3711                 if (put_user((__u64)(long)prog->aux->jited_linfo[i], &user_linfo[i])) {
3712                     return -EFAULT;
3713                 }
3714             }
3715         } else {
3716             info.jited_line_info = 0;
3717         }
3718     }
3719 
3720     ulen = info.nr_prog_tags;
3721     info.nr_prog_tags = prog->aux->func_cnt ?: 1;
3722     if (ulen) {
3723         __u8 __user(*user_prog_tags)[BPF_TAG_SIZE];
3724         u32 i;
3725 
3726         user_prog_tags = u64_to_user_ptr(info.prog_tags);
3727         ulen = min_t(u32, info.nr_prog_tags, ulen);
3728         if (prog->aux->func_cnt) {
3729             for (i = 0; i < ulen; i++) {
3730                 if (copy_to_user(user_prog_tags[i], prog->aux->func[i]->tag, BPF_TAG_SIZE)) {
3731                     return -EFAULT;
3732                 }
3733             }
3734         } else {
3735             if (copy_to_user(user_prog_tags[0], prog->tag, BPF_TAG_SIZE)) {
3736                 return -EFAULT;
3737             }
3738         }
3739     }
3740 
3741 done:
3742     if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) {
3743         return -EFAULT;
3744     }
3745 
3746     return 0;
3747 }
3748 
bpf_map_get_info_by_fd(struct file *file, struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr)3749 static int bpf_map_get_info_by_fd(struct file *file, struct bpf_map *map, const union bpf_attr *attr,
3750                                   union bpf_attr __user *uattr)
3751 {
3752     struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3753     struct bpf_map_info info;
3754     u32 info_len = attr->info.info_len;
3755     int err;
3756 
3757     err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
3758     if (err) {
3759         return err;
3760     }
3761     info_len = min_t(u32, sizeof(info), info_len);
3762 
3763     memset(&info, 0, sizeof(info));
3764     info.type = map->map_type;
3765     info.id = map->id;
3766     info.key_size = map->key_size;
3767     info.value_size = map->value_size;
3768     info.max_entries = map->max_entries;
3769     info.map_flags = map->map_flags;
3770     memcpy(info.name, map->name, sizeof(map->name));
3771 
3772     if (map->btf) {
3773         info.btf_id = btf_id(map->btf);
3774         info.btf_key_type_id = map->btf_key_type_id;
3775         info.btf_value_type_id = map->btf_value_type_id;
3776     }
3777     info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
3778 
3779     if (bpf_map_is_dev_bound(map)) {
3780         err = bpf_map_offload_info_fill(&info, map);
3781         if (err) {
3782             return err;
3783         }
3784     }
3785 
3786     if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) {
3787         return -EFAULT;
3788     }
3789 
3790     return 0;
3791 }
3792 
bpf_btf_get_info_by_fd(struct file *file, struct btf *btf, const union bpf_attr *attr, union bpf_attr __user *uattr)3793 static int bpf_btf_get_info_by_fd(struct file *file, struct btf *btf, const union bpf_attr *attr,
3794                                   union bpf_attr __user *uattr)
3795 {
3796     struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3797     u32 info_len = attr->info.info_len;
3798     int err;
3799 
3800     err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
3801     if (err) {
3802         return err;
3803     }
3804 
3805     return btf_get_info_by_fd(btf, attr, uattr);
3806 }
3807 
bpf_link_get_info_by_fd(struct file *file, struct bpf_link *link, const union bpf_attr *attr, union bpf_attr __user *uattr)3808 static int bpf_link_get_info_by_fd(struct file *file, struct bpf_link *link, const union bpf_attr *attr,
3809                                    union bpf_attr __user *uattr)
3810 {
3811     struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info);
3812     struct bpf_link_info info;
3813     u32 info_len = attr->info.info_len;
3814     int err;
3815 
3816     err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
3817     if (err) {
3818         return err;
3819     }
3820     info_len = min_t(u32, sizeof(info), info_len);
3821 
3822     memset(&info, 0, sizeof(info));
3823     if (copy_from_user(&info, uinfo, info_len)) {
3824         return -EFAULT;
3825     }
3826 
3827     info.type = link->type;
3828     info.id = link->id;
3829     info.prog_id = link->prog->aux->id;
3830 
3831     if (link->ops->fill_link_info) {
3832         err = link->ops->fill_link_info(link, &info);
3833         if (err) {
3834             return err;
3835         }
3836     }
3837 
3838     if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) {
3839         return -EFAULT;
3840     }
3841 
3842     return 0;
3843 }
3844 
3845 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
3846 
bpf_obj_get_info_by_fd(const union bpf_attr *attr, union bpf_attr __user *uattr)3847 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, union bpf_attr __user *uattr)
3848 {
3849     int ufd = attr->info.bpf_fd;
3850     struct fd f;
3851     int err;
3852 
3853     if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) {
3854         return -EINVAL;
3855     }
3856 
3857     f = fdget(ufd);
3858     if (!f.file) {
3859         return -EBADFD;
3860     }
3861 
3862     if (f.file->f_op == &bpf_prog_fops) {
3863         err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
3864     } else if (f.file->f_op == &bpf_map_fops) {
3865         err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
3866     } else if (f.file->f_op == &btf_fops) {
3867         err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
3868     } else if (f.file->f_op == &bpf_link_fops) {
3869         err = bpf_link_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
3870     } else {
3871         err = -EINVAL;
3872     }
3873 
3874     fdput(f);
3875     return err;
3876 }
3877 
3878 #define BPF_BTF_LOAD_LAST_FIELD btf_log_level
3879 
bpf_btf_load(const union bpf_attr *attr)3880 static int bpf_btf_load(const union bpf_attr *attr)
3881 {
3882     if (CHECK_ATTR(BPF_BTF_LOAD)) {
3883         return -EINVAL;
3884     }
3885 
3886     if (!bpf_capable()) {
3887         return -EPERM;
3888     }
3889 
3890     return btf_new_fd(attr);
3891 }
3892 
3893 #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
3894 
bpf_btf_get_fd_by_id(const union bpf_attr *attr)3895 static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
3896 {
3897     if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) {
3898         return -EINVAL;
3899     }
3900 
3901     if (!capable(CAP_SYS_ADMIN)) {
3902         return -EPERM;
3903     }
3904 
3905     return btf_get_fd_by_id(attr->btf_id);
3906 }
3907 
bpf_task_fd_query_copy(const union bpf_attr *attr, union bpf_attr __user *uattr, u32 prog_id, u32 fd_type, const char *buf, u64 probe_offset, u64 probe_addr)3908 static int bpf_task_fd_query_copy(const union bpf_attr *attr, union bpf_attr __user *uattr, u32 prog_id, u32 fd_type,
3909                                   const char *buf, u64 probe_offset, u64 probe_addr)
3910 {
3911     char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
3912     u32 len = buf ? strlen(buf) : 0, input_len;
3913     int err = 0;
3914 
3915     if (put_user(len, &uattr->task_fd_query.buf_len)) {
3916         return -EFAULT;
3917     }
3918     input_len = attr->task_fd_query.buf_len;
3919     if (input_len && ubuf) {
3920         if (!len) {
3921             /* nothing to copy, just make ubuf NULL terminated */
3922             char zero = '\0';
3923 
3924             if (put_user(zero, ubuf)) {
3925                 return -EFAULT;
3926             }
3927         } else if (input_len >= len + 1) {
3928             /* ubuf can hold the string with NULL terminator */
3929             if (copy_to_user(ubuf, buf, len + 1)) {
3930                 return -EFAULT;
3931             }
3932         } else {
3933             /* ubuf cannot hold the string with NULL terminator,
3934              * do a partial copy with NULL terminator.
3935              */
3936             char zero = '\0';
3937 
3938             err = -ENOSPC;
3939             if (copy_to_user(ubuf, buf, input_len - 1)) {
3940                 return -EFAULT;
3941             }
3942             if (put_user(zero, ubuf + input_len - 1)) {
3943                 return -EFAULT;
3944             }
3945         }
3946     }
3947 
3948     if (put_user(prog_id, &uattr->task_fd_query.prog_id) || put_user(fd_type, &uattr->task_fd_query.fd_type) ||
3949         put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
3950         put_user(probe_addr, &uattr->task_fd_query.probe_addr)) {
3951         return -EFAULT;
3952     }
3953 
3954     return err;
3955 }
3956 
3957 #define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
3958 
bpf_task_fd_query(const union bpf_attr *attr, union bpf_attr __user *uattr)3959 static int bpf_task_fd_query(const union bpf_attr *attr, union bpf_attr __user *uattr)
3960 {
3961     pid_t pid = attr->task_fd_query.pid;
3962     u32 fd = attr->task_fd_query.fd;
3963     const struct perf_event *event;
3964     struct files_struct *files;
3965     struct task_struct *task;
3966     struct file *file;
3967     int err;
3968 
3969     if (CHECK_ATTR(BPF_TASK_FD_QUERY)) {
3970         return -EINVAL;
3971     }
3972 
3973     if (!capable(CAP_SYS_ADMIN)) {
3974         return -EPERM;
3975     }
3976 
3977     if (attr->task_fd_query.flags != 0) {
3978         return -EINVAL;
3979     }
3980 
3981     task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
3982     if (!task) {
3983         return -ENOENT;
3984     }
3985 
3986     files = get_files_struct(task);
3987     put_task_struct(task);
3988     if (!files) {
3989         return -ENOENT;
3990     }
3991 
3992     err = 0;
3993     spin_lock(&files->file_lock);
3994     file = fcheck_files(files, fd);
3995     if (!file) {
3996         err = -EBADF;
3997     } else {
3998         get_file(file);
3999     }
4000     spin_unlock(&files->file_lock);
4001     put_files_struct(files);
4002 
4003     if (err) {
4004         goto out;
4005     }
4006 
4007     if (file->f_op == &bpf_link_fops) {
4008         struct bpf_link *link = file->private_data;
4009 
4010         if (link->ops == &bpf_raw_tp_link_lops) {
4011             struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link);
4012             struct bpf_raw_event_map *btp = raw_tp->btp;
4013 
4014             err = bpf_task_fd_query_copy(attr, uattr, raw_tp->link.prog->aux->id, BPF_FD_TYPE_RAW_TRACEPOINT,
4015                                          btp->tp->name, 0, 0);
4016             goto put_file;
4017         }
4018         goto out_not_supp;
4019     }
4020 
4021     event = perf_get_event(file);
4022     if (!IS_ERR(event)) {
4023         u64 probe_offset, probe_addr;
4024         u32 prog_id, fd_type;
4025         const char *buf;
4026 
4027         err = bpf_get_perf_event_info(event, &prog_id, &fd_type, &buf, &probe_offset, &probe_addr);
4028         if (!err) {
4029             err = bpf_task_fd_query_copy(attr, uattr, prog_id, fd_type, buf, probe_offset, probe_addr);
4030         }
4031         goto put_file;
4032     }
4033 
4034 out_not_supp:
4035     err = -ENOTSUPP;
4036 put_file:
4037     fput(file);
4038 out:
4039     return err;
4040 }
4041 
4042 #define BPF_MAP_BATCH_LAST_FIELD batch.flags
4043 
4044 #define BPF_DO_BATCH(fn)                                                                                               \
4045     do {                                                                                                               \
4046         if (!(fn)) {                                                                                                   \
4047             err = -ENOTSUPP;                                                                                           \
4048             goto err_put;                                                                                              \
4049         }                                                                                                              \
4050         err = fn(map, attr, uattr);                                                                                    \
4051     } while (0)
4052 
bpf_map_do_batch(const union bpf_attr *attr, union bpf_attr __user *uattr, int cmd)4053 static int bpf_map_do_batch(const union bpf_attr *attr, union bpf_attr __user *uattr, int cmd)
4054 {
4055     struct bpf_map *map;
4056     int err, ufd;
4057     struct fd f;
4058 
4059     if (CHECK_ATTR(BPF_MAP_BATCH)) {
4060         return -EINVAL;
4061     }
4062 
4063     ufd = attr->batch.map_fd;
4064     f = fdget(ufd);
4065     map = __bpf_map_get(f);
4066     if (IS_ERR(map)) {
4067         return PTR_ERR(map);
4068     }
4069 
4070     if ((cmd == BPF_MAP_LOOKUP_BATCH || cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
4071         !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
4072         err = -EPERM;
4073         goto err_put;
4074     }
4075 
4076     if (cmd != BPF_MAP_LOOKUP_BATCH && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
4077         err = -EPERM;
4078         goto err_put;
4079     }
4080 
4081     if (cmd == BPF_MAP_LOOKUP_BATCH) {
4082         BPF_DO_BATCH(map->ops->map_lookup_batch);
4083     } else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) {
4084         BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
4085     } else if (cmd == BPF_MAP_UPDATE_BATCH) {
4086         BPF_DO_BATCH(map->ops->map_update_batch);
4087     } else {
4088         BPF_DO_BATCH(map->ops->map_delete_batch);
4089     }
4090 
4091 err_put:
4092     fdput(f);
4093     return err;
4094 }
4095 
tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)4096 static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
4097 {
4098     if (attr->link_create.attach_type != prog->expected_attach_type) {
4099         return -EINVAL;
4100     }
4101 
4102     if (prog->expected_attach_type == BPF_TRACE_ITER) {
4103         return bpf_iter_link_attach(attr, prog);
4104     } else if (prog->type == BPF_PROG_TYPE_EXT) {
4105         return bpf_tracing_prog_attach(prog, attr->link_create.target_fd, attr->link_create.target_btf_id);
4106     }
4107     return -EINVAL;
4108 }
4109 
4110 #define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
link_create(union bpf_attr *attr)4111 static int link_create(union bpf_attr *attr)
4112 {
4113     enum bpf_prog_type ptype;
4114     struct bpf_prog *prog;
4115     int ret;
4116 
4117     if (CHECK_ATTR(BPF_LINK_CREATE)) {
4118         return -EINVAL;
4119     }
4120 
4121     prog = bpf_prog_get(attr->link_create.prog_fd);
4122     if (IS_ERR(prog)) {
4123         return PTR_ERR(prog);
4124     }
4125 
4126     ret = bpf_prog_attach_check_attach_type(prog, attr->link_create.attach_type);
4127     if (ret) {
4128         goto out;
4129     }
4130 
4131     if (prog->type == BPF_PROG_TYPE_EXT) {
4132         ret = tracing_bpf_link_attach(attr, prog);
4133         goto out;
4134     }
4135 
4136     ptype = attach_type_to_prog_type(attr->link_create.attach_type);
4137     if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
4138         ret = -EINVAL;
4139         goto out;
4140     }
4141 
4142     switch (ptype) {
4143         case BPF_PROG_TYPE_CGROUP_SKB:
4144         case BPF_PROG_TYPE_CGROUP_SOCK:
4145         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
4146         case BPF_PROG_TYPE_SOCK_OPS:
4147         case BPF_PROG_TYPE_CGROUP_DEVICE:
4148         case BPF_PROG_TYPE_CGROUP_SYSCTL:
4149         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
4150             ret = cgroup_bpf_link_attach(attr, prog);
4151             break;
4152         case BPF_PROG_TYPE_TRACING:
4153             ret = tracing_bpf_link_attach(attr, prog);
4154             break;
4155         case BPF_PROG_TYPE_FLOW_DISSECTOR:
4156         case BPF_PROG_TYPE_SK_LOOKUP:
4157             ret = netns_bpf_link_create(attr, prog);
4158             break;
4159 #ifdef CONFIG_NET
4160         case BPF_PROG_TYPE_XDP:
4161             ret = bpf_xdp_link_attach(attr, prog);
4162             break;
4163 #endif
4164         default:
4165             ret = -EINVAL;
4166     }
4167 
4168 out:
4169     if (ret < 0) {
4170         bpf_prog_put(prog);
4171     }
4172     return ret;
4173 }
4174 
4175 #define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
4176 
link_update(union bpf_attr *attr)4177 static int link_update(union bpf_attr *attr)
4178 {
4179     struct bpf_prog *old_prog = NULL, *new_prog;
4180     struct bpf_link *link;
4181     u32 flags;
4182     int ret;
4183 
4184     if (CHECK_ATTR(BPF_LINK_UPDATE)) {
4185         return -EINVAL;
4186     }
4187 
4188     flags = attr->link_update.flags;
4189     if (flags & ~BPF_F_REPLACE) {
4190         return -EINVAL;
4191     }
4192 
4193     link = bpf_link_get_from_fd(attr->link_update.link_fd);
4194     if (IS_ERR(link)) {
4195         return PTR_ERR(link);
4196     }
4197 
4198     new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
4199     if (IS_ERR(new_prog)) {
4200         ret = PTR_ERR(new_prog);
4201         goto out_put_link;
4202     }
4203 
4204     if (flags & BPF_F_REPLACE) {
4205         old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
4206         if (IS_ERR(old_prog)) {
4207             ret = PTR_ERR(old_prog);
4208             old_prog = NULL;
4209             goto out_put_progs;
4210         }
4211     } else if (attr->link_update.old_prog_fd) {
4212         ret = -EINVAL;
4213         goto out_put_progs;
4214     }
4215 
4216     if (link->ops->update_prog) {
4217         ret = link->ops->update_prog(link, new_prog, old_prog);
4218     } else {
4219         ret = -EINVAL;
4220     }
4221 
4222 out_put_progs:
4223     if (old_prog) {
4224         bpf_prog_put(old_prog);
4225     }
4226     if (ret) {
4227         bpf_prog_put(new_prog);
4228     }
4229 out_put_link:
4230     bpf_link_put(link);
4231     return ret;
4232 }
4233 
4234 #define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd
4235 
link_detach(union bpf_attr *attr)4236 static int link_detach(union bpf_attr *attr)
4237 {
4238     struct bpf_link *link;
4239     int ret;
4240 
4241     if (CHECK_ATTR(BPF_LINK_DETACH)) {
4242         return -EINVAL;
4243     }
4244 
4245     link = bpf_link_get_from_fd(attr->link_detach.link_fd);
4246     if (IS_ERR(link)) {
4247         return PTR_ERR(link);
4248     }
4249 
4250     if (link->ops->detach) {
4251         ret = link->ops->detach(link);
4252     } else {
4253         ret = -EOPNOTSUPP;
4254     }
4255 
4256     bpf_link_put(link);
4257     return ret;
4258 }
4259 
bpf_link_inc_not_zero(struct bpf_link *link)4260 static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link)
4261 {
4262     return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT);
4263 }
4264 
bpf_link_by_id(u32 id)4265 struct bpf_link *bpf_link_by_id(u32 id)
4266 {
4267     struct bpf_link *link;
4268 
4269     if (!id) {
4270         return ERR_PTR(-ENOENT);
4271     }
4272 
4273     spin_lock_bh(&link_idr_lock);
4274     /* before link is "settled", ID is 0, pretend it doesn't exist yet */
4275     link = idr_find(&link_idr, id);
4276     if (link) {
4277         if (link->id) {
4278             link = bpf_link_inc_not_zero(link);
4279         } else {
4280             link = ERR_PTR(-EAGAIN);
4281         }
4282     } else {
4283         link = ERR_PTR(-ENOENT);
4284     }
4285     spin_unlock_bh(&link_idr_lock);
4286     return link;
4287 }
4288 
4289 #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
4290 
bpf_link_get_fd_by_id(const union bpf_attr *attr)4291 static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
4292 {
4293     struct bpf_link *link;
4294     u32 id = attr->link_id;
4295     int fd;
4296 
4297     if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) {
4298         return -EINVAL;
4299     }
4300 
4301     if (!capable(CAP_SYS_ADMIN)) {
4302         return -EPERM;
4303     }
4304 
4305     link = bpf_link_by_id(id);
4306     if (IS_ERR(link)) {
4307         return PTR_ERR(link);
4308     }
4309 
4310     fd = bpf_link_new_fd(link);
4311     if (fd < 0) {
4312         bpf_link_put(link);
4313     }
4314 
4315     return fd;
4316 }
4317 
4318 DEFINE_MUTEX(bpf_stats_enabled_mutex);
4319 
bpf_stats_release(struct inode *inode, struct file *file)4320 static int bpf_stats_release(struct inode *inode, struct file *file)
4321 {
4322     mutex_lock(&bpf_stats_enabled_mutex);
4323     static_key_slow_dec(&bpf_stats_enabled_key.key);
4324     mutex_unlock(&bpf_stats_enabled_mutex);
4325     return 0;
4326 }
4327 
4328 static const struct file_operations bpf_stats_fops = {
4329     .release = bpf_stats_release,
4330 };
4331 
bpf_enable_runtime_stats(void)4332 static int bpf_enable_runtime_stats(void)
4333 {
4334     int fd;
4335 
4336     mutex_lock(&bpf_stats_enabled_mutex);
4337 
4338     /* Set a very high limit to avoid overflow */
4339     if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 0x2) {
4340         mutex_unlock(&bpf_stats_enabled_mutex);
4341         return -EBUSY;
4342     }
4343 
4344     fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
4345     if (fd >= 0) {
4346         static_key_slow_inc(&bpf_stats_enabled_key.key);
4347     }
4348 
4349     mutex_unlock(&bpf_stats_enabled_mutex);
4350     return fd;
4351 }
4352 
4353 #define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
4354 
bpf_enable_stats(union bpf_attr *attr)4355 static int bpf_enable_stats(union bpf_attr *attr)
4356 {
4357     if (CHECK_ATTR(BPF_ENABLE_STATS)) {
4358         return -EINVAL;
4359     }
4360 
4361     if (!capable(CAP_SYS_ADMIN)) {
4362         return -EPERM;
4363     }
4364 
4365     switch (attr->enable_stats.type) {
4366         case BPF_STATS_RUN_TIME:
4367             return bpf_enable_runtime_stats();
4368         default:
4369             break;
4370     }
4371     return -EINVAL;
4372 }
4373 
4374 #define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
4375 
bpf_iter_create(union bpf_attr *attr)4376 static int bpf_iter_create(union bpf_attr *attr)
4377 {
4378     struct bpf_link *link;
4379     int err;
4380 
4381     if (CHECK_ATTR(BPF_ITER_CREATE)) {
4382         return -EINVAL;
4383     }
4384 
4385     if (attr->iter_create.flags) {
4386         return -EINVAL;
4387     }
4388 
4389     link = bpf_link_get_from_fd(attr->iter_create.link_fd);
4390     if (IS_ERR(link)) {
4391         return PTR_ERR(link);
4392     }
4393 
4394     err = bpf_iter_new_fd(link);
4395     bpf_link_put(link);
4396 
4397     return err;
4398 }
4399 
4400 #define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags
4401 
bpf_prog_bind_map(union bpf_attr *attr)4402 static int bpf_prog_bind_map(union bpf_attr *attr)
4403 {
4404     struct bpf_prog *prog;
4405     struct bpf_map *map;
4406     struct bpf_map **used_maps_old, **used_maps_new;
4407     int i, ret = 0;
4408 
4409     if (CHECK_ATTR(BPF_PROG_BIND_MAP)) {
4410         return -EINVAL;
4411     }
4412 
4413     if (attr->prog_bind_map.flags) {
4414         return -EINVAL;
4415     }
4416 
4417     prog = bpf_prog_get(attr->prog_bind_map.prog_fd);
4418     if (IS_ERR(prog)) {
4419         return PTR_ERR(prog);
4420     }
4421 
4422     map = bpf_map_get(attr->prog_bind_map.map_fd);
4423     if (IS_ERR(map)) {
4424         ret = PTR_ERR(map);
4425         goto out_prog_put;
4426     }
4427 
4428     mutex_lock(&prog->aux->used_maps_mutex);
4429 
4430     used_maps_old = prog->aux->used_maps;
4431 
4432     for (i = 0; i < prog->aux->used_map_cnt; i++) {
4433         if (used_maps_old[i] == map) {
4434             bpf_map_put(map);
4435             goto out_unlock;
4436         }
4437     }
4438 
4439     used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, sizeof(used_maps_new[0]), GFP_KERNEL);
4440     if (!used_maps_new) {
4441         ret = -ENOMEM;
4442         goto out_unlock;
4443     }
4444 
4445     memcpy(used_maps_new, used_maps_old, sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
4446     used_maps_new[prog->aux->used_map_cnt] = map;
4447 
4448     prog->aux->used_map_cnt++;
4449     prog->aux->used_maps = used_maps_new;
4450 
4451     kfree(used_maps_old);
4452 
4453 out_unlock:
4454     mutex_unlock(&prog->aux->used_maps_mutex);
4455 
4456     if (ret) {
4457         bpf_map_put(map);
4458     }
4459 out_prog_put:
4460     bpf_prog_put(prog);
4461     return ret;
4462 }
4463 
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)4464 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
4465 {
4466     union bpf_attr attr;
4467     int err;
4468 
4469     if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) {
4470         return -EPERM;
4471     }
4472 
4473     err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
4474     if (err) {
4475         return err;
4476     }
4477     size = min_t(u32, size, sizeof(attr));
4478 
4479     /* copy attributes from user space, may be less than sizeof(bpf_attr) */
4480     memset(&attr, 0, sizeof(attr));
4481     if (copy_from_user(&attr, uattr, size) != 0) {
4482         return -EFAULT;
4483     }
4484 
4485     err = security_bpf(cmd, &attr, size);
4486     if (err < 0) {
4487         return err;
4488     }
4489 
4490     switch (cmd) {
4491         case BPF_MAP_CREATE:
4492             err = map_create(&attr);
4493             break;
4494         case BPF_MAP_LOOKUP_ELEM:
4495             err = map_lookup_elem(&attr);
4496             break;
4497         case BPF_MAP_UPDATE_ELEM:
4498             err = map_update_elem(&attr);
4499             break;
4500         case BPF_MAP_DELETE_ELEM:
4501             err = map_delete_elem(&attr);
4502             break;
4503         case BPF_MAP_GET_NEXT_KEY:
4504             err = map_get_next_key(&attr);
4505             break;
4506         case BPF_MAP_FREEZE:
4507             err = map_freeze(&attr);
4508             break;
4509         case BPF_PROG_LOAD:
4510             err = bpf_prog_load(&attr, uattr);
4511             break;
4512         case BPF_OBJ_PIN:
4513             err = bpf_obj_pin(&attr);
4514             break;
4515         case BPF_OBJ_GET:
4516             err = bpf_obj_get(&attr);
4517             break;
4518         case BPF_PROG_ATTACH:
4519             err = bpf_prog_attach(&attr);
4520             break;
4521         case BPF_PROG_DETACH:
4522             err = bpf_prog_detach(&attr);
4523             break;
4524         case BPF_PROG_QUERY:
4525             err = bpf_prog_query(&attr, uattr);
4526             break;
4527         case BPF_PROG_TEST_RUN:
4528             err = bpf_prog_test_run(&attr, uattr);
4529             break;
4530         case BPF_PROG_GET_NEXT_ID:
4531             err = bpf_obj_get_next_id(&attr, uattr, &prog_idr, &prog_idr_lock);
4532             break;
4533         case BPF_MAP_GET_NEXT_ID:
4534             err = bpf_obj_get_next_id(&attr, uattr, &map_idr, &map_idr_lock);
4535             break;
4536         case BPF_BTF_GET_NEXT_ID:
4537             err = bpf_obj_get_next_id(&attr, uattr, &btf_idr, &btf_idr_lock);
4538             break;
4539         case BPF_PROG_GET_FD_BY_ID:
4540             err = bpf_prog_get_fd_by_id(&attr);
4541             break;
4542         case BPF_MAP_GET_FD_BY_ID:
4543             err = bpf_map_get_fd_by_id(&attr);
4544             break;
4545         case BPF_OBJ_GET_INFO_BY_FD:
4546             err = bpf_obj_get_info_by_fd(&attr, uattr);
4547             break;
4548         case BPF_RAW_TRACEPOINT_OPEN:
4549             err = bpf_raw_tracepoint_open(&attr);
4550             break;
4551         case BPF_BTF_LOAD:
4552             err = bpf_btf_load(&attr);
4553             break;
4554         case BPF_BTF_GET_FD_BY_ID:
4555             err = bpf_btf_get_fd_by_id(&attr);
4556             break;
4557         case BPF_TASK_FD_QUERY:
4558             err = bpf_task_fd_query(&attr, uattr);
4559             break;
4560         case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
4561             err = map_lookup_and_delete_elem(&attr);
4562             break;
4563         case BPF_MAP_LOOKUP_BATCH:
4564             err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH);
4565             break;
4566         case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
4567             err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_AND_DELETE_BATCH);
4568             break;
4569         case BPF_MAP_UPDATE_BATCH:
4570             err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH);
4571             break;
4572         case BPF_MAP_DELETE_BATCH:
4573             err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
4574             break;
4575         case BPF_LINK_CREATE:
4576             err = link_create(&attr);
4577             break;
4578         case BPF_LINK_UPDATE:
4579             err = link_update(&attr);
4580             break;
4581         case BPF_LINK_GET_FD_BY_ID:
4582             err = bpf_link_get_fd_by_id(&attr);
4583             break;
4584         case BPF_LINK_GET_NEXT_ID:
4585             err = bpf_obj_get_next_id(&attr, uattr, &link_idr, &link_idr_lock);
4586             break;
4587         case BPF_ENABLE_STATS:
4588             err = bpf_enable_stats(&attr);
4589             break;
4590         case BPF_ITER_CREATE:
4591             err = bpf_iter_create(&attr);
4592             break;
4593         case BPF_LINK_DETACH:
4594             err = link_detach(&attr);
4595             break;
4596         case BPF_PROG_BIND_MAP:
4597             err = bpf_prog_bind_map(&attr);
4598             break;
4599         default:
4600             err = -EINVAL;
4601             break;
4602     }
4603 
4604     return err;
4605 }
4606