1// SPDX-License-Identifier: GPL-2.0-only 2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 */ 4#include <linux/bpf.h> 5#include <linux/bpf_trace.h> 6#include <linux/bpf_lirc.h> 7#include <linux/bpf_verifier.h> 8#include <linux/btf.h> 9#include <linux/syscalls.h> 10#include <linux/slab.h> 11#include <linux/sched/signal.h> 12#include <linux/vmalloc.h> 13#include <linux/mmzone.h> 14#include <linux/anon_inodes.h> 15#include <linux/fdtable.h> 16#include <linux/file.h> 17#include <linux/fs.h> 18#include <linux/license.h> 19#include <linux/filter.h> 20#include <linux/version.h> 21#include <linux/kernel.h> 22#include <linux/idr.h> 23#include <linux/cred.h> 24#include <linux/timekeeping.h> 25#include <linux/ctype.h> 26#include <linux/nospec.h> 27#include <linux/audit.h> 28#include <uapi/linux/btf.h> 29#include <linux/pgtable.h> 30#include <linux/bpf_lsm.h> 31#include <linux/poll.h> 32#include <linux/bpf-netns.h> 33#include <linux/rcupdate_trace.h> 34 35#define IS_FD_ARRAY(map) \ 36 ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ 37 (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) 38#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY) 39#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 40#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || IS_FD_HASH(map)) 41 42#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) 43 44DEFINE_PER_CPU(int, bpf_prog_active); 45static DEFINE_IDR(prog_idr); 46static DEFINE_SPINLOCK(prog_idr_lock); 47static DEFINE_IDR(map_idr); 48static DEFINE_SPINLOCK(map_idr_lock); 49static DEFINE_IDR(link_idr); 50static DEFINE_SPINLOCK(link_idr_lock); 51 52int sysctl_unprivileged_bpf_disabled __read_mostly = IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; 53 54static const struct bpf_map_ops *const bpf_map_types[] = { 55#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 56#define BPF_MAP_TYPE(_id, _ops) [_id] = &(_ops), 57#define BPF_LINK_TYPE(_id, _name) 58#include <linux/bpf_types.h> 59#undef BPF_PROG_TYPE 60#undef BPF_MAP_TYPE 61#undef BPF_LINK_TYPE 62}; 63 64/* 65 * If we're handed a bigger struct than we know of, ensure all the unknown bits 66 * are 0 - i.e. new user-space does not rely on any kernel feature extensions 67 * we don't know about yet. 68 * 69 * There is a ToCToU between this function call and the following 70 * copy_from_user() call. However, this is not a concern since this function is 71 * meant to be a future-proofing of bits. 72 */ 73int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size, size_t actual_size) 74{ 75 unsigned char __user *addr = uaddr + expected_size; 76 int res; 77 78 if (unlikely(actual_size > PAGE_SIZE)) { /* silly large */ 79 return -E2BIG; 80 } 81 82 if (actual_size <= expected_size) { 83 return 0; 84 } 85 86 res = check_zeroed_user(addr, actual_size - expected_size); 87 if (res < 0) { 88 return res; 89 } 90 return res ? 0 : -E2BIG; 91} 92 93const struct bpf_map_ops bpf_map_offload_ops = { 94 .map_meta_equal = bpf_map_meta_equal, 95 .map_alloc = bpf_map_offload_map_alloc, 96 .map_free = bpf_map_offload_map_free, 97 .map_check_btf = map_check_no_btf, 98}; 99 100static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) 101{ 102 const struct bpf_map_ops *ops; 103 u32 type = attr->map_type; 104 struct bpf_map *map; 105 int err; 106 107 if (type >= ARRAY_SIZE(bpf_map_types)) { 108 return ERR_PTR(-EINVAL); 109 } 110 type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); 111 ops = bpf_map_types[type]; 112 if (!ops) { 113 return ERR_PTR(-EINVAL); 114 } 115 116 if (ops->map_alloc_check) { 117 err = ops->map_alloc_check(attr); 118 if (err) { 119 return ERR_PTR(err); 120 } 121 } 122 if (attr->map_ifindex) { 123 ops = &bpf_map_offload_ops; 124 } 125 map = ops->map_alloc(attr); 126 if (IS_ERR(map)) { 127 return map; 128 } 129 map->ops = ops; 130 map->map_type = type; 131 return map; 132} 133 134static u32 bpf_map_value_size(struct bpf_map *map) 135{ 136 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 137 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 138 return round_up(map->value_size, 0x8) * num_possible_cpus(); 139 } else if (IS_FD_MAP(map)) { 140 return sizeof(u32); 141 } else { 142 return map->value_size; 143 } 144} 145 146static void maybe_wait_bpf_programs(struct bpf_map *map) 147{ 148 /* Wait for any running BPF programs to complete so that 149 * userspace, when we return to it, knows that all programs 150 * that could be running use the new map value. 151 */ 152 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS || map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { 153 synchronize_rcu(); 154 } 155} 156 157static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key, void *value, __u64 flags) 158{ 159 int err; 160 161 /* Need to create a kthread, thus must support schedule */ 162 if (bpf_map_is_dev_bound(map)) { 163 return bpf_map_offload_update_elem(map, key, value, flags); 164 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP || map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 165 return map->ops->map_update_elem(map, key, value, flags); 166 } else if (map->map_type == BPF_MAP_TYPE_SOCKHASH || map->map_type == BPF_MAP_TYPE_SOCKMAP) { 167 return sock_map_update_elem_sys(map, key, value, flags); 168 } else if (IS_FD_PROG_ARRAY(map)) { 169 return bpf_fd_array_map_update_elem(map, f.file, key, value, flags); 170 } 171 172 bpf_disable_instrumentation(); 173 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 174 err = bpf_percpu_hash_update(map, key, value, flags); 175 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 176 err = bpf_percpu_array_update(map, key, value, flags); 177 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 178 err = bpf_percpu_cgroup_storage_update(map, key, value, flags); 179 } else if (IS_FD_ARRAY(map)) { 180 rcu_read_lock(); 181 err = bpf_fd_array_map_update_elem(map, f.file, key, value, flags); 182 rcu_read_unlock(); 183 } else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 184 rcu_read_lock(); 185 err = bpf_fd_htab_map_update_elem(map, f.file, key, value, flags); 186 rcu_read_unlock(); 187 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 188 /* rcu_read_lock() is not needed */ 189 err = bpf_fd_reuseport_array_update_elem(map, key, value, flags); 190 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) { 191 err = map->ops->map_push_elem(map, value, flags); 192 } else { 193 rcu_read_lock(); 194 err = map->ops->map_update_elem(map, key, value, flags); 195 rcu_read_unlock(); 196 } 197 bpf_enable_instrumentation(); 198 maybe_wait_bpf_programs(map); 199 200 return err; 201} 202 203static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, __u64 flags) 204{ 205 void *ptr; 206 int err; 207 208 if (bpf_map_is_dev_bound(map)) { 209 return bpf_map_offload_lookup_elem(map, key, value); 210 } 211 212 bpf_disable_instrumentation(); 213 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 214 err = bpf_percpu_hash_copy(map, key, value); 215 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 216 err = bpf_percpu_array_copy(map, key, value); 217 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 218 err = bpf_percpu_cgroup_storage_copy(map, key, value); 219 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { 220 err = bpf_stackmap_copy(map, key, value); 221 } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { 222 err = bpf_fd_array_map_lookup_elem(map, key, value); 223 } else if (IS_FD_HASH(map)) { 224 err = bpf_fd_htab_map_lookup_elem(map, key, value); 225 } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) { 226 err = bpf_fd_reuseport_array_lookup_elem(map, key, value); 227 } else if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) { 228 err = map->ops->map_peek_elem(map, value); 229 } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 230 /* struct_ops map requires directly updating "value" */ 231 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 232 } else { 233 rcu_read_lock(); 234 if (map->ops->map_lookup_elem_sys_only) { 235 ptr = map->ops->map_lookup_elem_sys_only(map, key); 236 } else { 237 ptr = map->ops->map_lookup_elem(map, key); 238 } 239 if (IS_ERR(ptr)) { 240 err = PTR_ERR(ptr); 241 } else if (!ptr) { 242 err = -ENOENT; 243 } else { 244 err = 0; 245 if (flags & BPF_F_LOCK) { 246 /* lock 'ptr' and copy everything but lock */ 247 copy_map_value_locked(map, value, ptr, true); 248 } else { 249 copy_map_value(map, value, ptr); 250 } 251 /* mask lock, since value wasn't zero inited */ 252 check_and_init_map_lock(map, value); 253 } 254 rcu_read_unlock(); 255 } 256 257 bpf_enable_instrumentation(); 258 maybe_wait_bpf_programs(map); 259 260 return err; 261} 262 263static void *_bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 264{ 265 /* We really just want to fail instead of triggering OOM killer 266 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, 267 * which is used for lower order allocation requests. 268 * 269 * It has been observed that higher order allocation requests done by 270 * vmalloc with __GFP_NORETRY being set might fail due to not trying 271 * to reclaim memory from the page cache, thus we set 272 * __GFP_RETRY_MAYFAIL to avoid such situations. 273 */ 274 275 const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO; 276 unsigned int flags = 0; 277 unsigned long align = 1; 278 void *area; 279 280 if (size >= SIZE_MAX) { 281 return NULL; 282 } 283 284 /* kmalloc()'ed memory can't be mmap()'ed */ 285 if (mmapable) { 286 BUG_ON(!PAGE_ALIGNED(size)); 287 align = SHMLBA; 288 flags = VM_USERMAP; 289 } else if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 290 area = kmalloc_node(size, gfp | GFP_USER | __GFP_NORETRY, numa_node); 291 if (area != NULL) { 292 return area; 293 } 294 } 295 296 return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, 297 PAGE_KERNEL, flags, numa_node, __builtin_return_address(0)); 298} 299 300void *bpf_map_area_alloc(u64 size, int numa_node) 301{ 302 return _bpf_map_area_alloc(size, numa_node, false); 303} 304 305void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 306{ 307 return _bpf_map_area_alloc(size, numa_node, true); 308} 309 310void bpf_map_area_free(void *area) 311{ 312 kvfree(area); 313} 314 315static u32 bpf_map_flags_retain_permanent(u32 flags) 316{ 317 /* Some map creation flags are not tied to the map object but 318 * rather to the map fd instead, so they have no meaning upon 319 * map object inspection since multiple file descriptors with 320 * different (access) properties can exist here. Thus, given 321 * this has zero meaning for the map itself, lets clear these 322 * from here. 323 */ 324 return flags & ~(BPF_F_RDONLY | BPF_F_WRONLY); 325} 326 327void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) 328{ 329 map->map_type = attr->map_type; 330 map->key_size = attr->key_size; 331 map->value_size = attr->value_size; 332 map->max_entries = attr->max_entries; 333 map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags); 334 map->numa_node = bpf_map_attr_numa_node(attr); 335} 336 337static int bpf_charge_memlock(struct user_struct *user, u32 pages) 338{ 339 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 340 if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) { 341 atomic_long_sub(pages, &user->locked_vm); 342 return -EPERM; 343 } 344 return 0; 345} 346 347static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) 348{ 349 if (user) { 350 atomic_long_sub(pages, &user->locked_vm); 351 } 352} 353 354int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size) 355{ 356 u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; 357 struct user_struct *user; 358 int ret; 359 360 if (size >= U32_MAX - PAGE_SIZE) { 361 return -E2BIG; 362 } 363 364 user = get_current_user(); 365 ret = bpf_charge_memlock(user, pages); 366 if (ret) { 367 free_uid(user); 368 return ret; 369 } 370 371 mem->pages = pages; 372 mem->user = user; 373 374 return 0; 375} 376 377void bpf_map_charge_finish(struct bpf_map_memory *mem) 378{ 379 bpf_uncharge_memlock(mem->user, mem->pages); 380 free_uid(mem->user); 381} 382 383void bpf_map_charge_move(struct bpf_map_memory *dst, struct bpf_map_memory *src) 384{ 385 *dst = *src; 386 387 /* Make sure src will not be used for the redundant uncharging. */ 388 memset(src, 0, sizeof(struct bpf_map_memory)); 389} 390 391int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) 392{ 393 int ret; 394 395 ret = bpf_charge_memlock(map->memory.user, pages); 396 if (ret) { 397 return ret; 398 } 399 map->memory.pages += pages; 400 return ret; 401} 402 403void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) 404{ 405 bpf_uncharge_memlock(map->memory.user, pages); 406 map->memory.pages -= pages; 407} 408 409static int bpf_map_alloc_id(struct bpf_map *map) 410{ 411 int id; 412 413 idr_preload(GFP_KERNEL); 414 spin_lock_bh(&map_idr_lock); 415 id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC); 416 if (id > 0) { 417 map->id = id; 418 } 419 spin_unlock_bh(&map_idr_lock); 420 idr_preload_end(); 421 422 if (WARN_ON_ONCE(!id)) { 423 return -ENOSPC; 424 } 425 426 return id > 0 ? 0 : id; 427} 428 429void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) 430{ 431 unsigned long flags; 432 433 /* Offloaded maps are removed from the IDR store when their device 434 * disappears - even if someone holds an fd to them they are unusable, 435 * the memory is gone, all ops will fail; they are simply waiting for 436 * refcnt to drop to be freed. 437 */ 438 if (!map->id) { 439 return; 440 } 441 442 if (do_idr_lock) { 443 spin_lock_irqsave(&map_idr_lock, flags); 444 } else { 445 __acquire(&map_idr_lock); 446 } 447 448 idr_remove(&map_idr, map->id); 449 map->id = 0; 450 451 if (do_idr_lock) { 452 spin_unlock_irqrestore(&map_idr_lock, flags); 453 } else { 454 __release(&map_idr_lock); 455 } 456} 457 458/* called from workqueue */ 459static void bpf_map_free_deferred(struct work_struct *work) 460{ 461 struct bpf_map *map = container_of(work, struct bpf_map, work); 462 struct bpf_map_memory mem; 463 464 bpf_map_charge_move(&mem, &map->memory); 465 security_bpf_map_free(map); 466 /* implementation dependent freeing */ 467 map->ops->map_free(map); 468 bpf_map_charge_finish(&mem); 469} 470 471static void bpf_map_put_uref(struct bpf_map *map) 472{ 473 if (atomic64_dec_and_test(&map->usercnt)) { 474 if (map->ops->map_release_uref) { 475 map->ops->map_release_uref(map); 476 } 477 } 478} 479 480/* decrement map refcnt and schedule it for freeing via workqueue 481 * (unrelying map implementation ops->map_free() might sleep) 482 */ 483static void _bpf_map_put(struct bpf_map *map, bool do_idr_lock) 484{ 485 if (atomic64_dec_and_test(&map->refcnt)) { 486 /* bpf_map_free_id() must be called first */ 487 bpf_map_free_id(map, do_idr_lock); 488 btf_put(map->btf); 489 INIT_WORK(&map->work, bpf_map_free_deferred); 490 schedule_work(&map->work); 491 } 492} 493 494void bpf_map_put(struct bpf_map *map) 495{ 496 _bpf_map_put(map, true); 497} 498EXPORT_SYMBOL_GPL(bpf_map_put); 499 500void bpf_map_put_with_uref(struct bpf_map *map) 501{ 502 bpf_map_put_uref(map); 503 bpf_map_put(map); 504} 505 506static int bpf_map_release(struct inode *inode, struct file *filp) 507{ 508 struct bpf_map *map = filp->private_data; 509 510 if (map->ops->map_release) { 511 map->ops->map_release(map, filp); 512 } 513 514 bpf_map_put_with_uref(map); 515 return 0; 516} 517 518static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) 519{ 520 fmode_t mode = f.file->f_mode; 521 522 /* Our file permissions may have been overridden by global 523 * map permissions facing syscall side. 524 */ 525 if (READ_ONCE(map->frozen)) { 526 mode &= ~FMODE_CAN_WRITE; 527 } 528 return mode; 529} 530 531#ifdef CONFIG_PROC_FS 532static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 533{ 534 const struct bpf_map *map = filp->private_data; 535 const struct bpf_array *array; 536 u32 type = 0, jited = 0; 537 538 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 539 array = container_of(map, struct bpf_array, map); 540 spin_lock(&array->aux->owner.lock); 541 type = array->aux->owner.type; 542 jited = array->aux->owner.jited; 543 spin_unlock(&array->aux->owner.lock); 544 } 545 546 seq_printf(m, 547 "map_type:\t%u\n" 548 "key_size:\t%u\n" 549 "value_size:\t%u\n" 550 "max_entries:\t%u\n" 551 "map_flags:\t%#x\n" 552 "memlock:\t%llu\n" 553 "map_id:\t%u\n" 554 "frozen:\t%u\n", 555 map->map_type, map->key_size, map->value_size, map->max_entries, map->map_flags, 556 map->memory.pages * 1ULL << PAGE_SHIFT, map->id, READ_ONCE(map->frozen)); 557 if (type) { 558 seq_printf(m, "owner_prog_type:\t%u\n", type); 559 seq_printf(m, "owner_jited:\t%u\n", jited); 560 } 561} 562#endif 563 564static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) 565{ 566 /* We need this handler such that alloc_file() enables 567 * f_mode with FMODE_CAN_READ. 568 */ 569 return -EINVAL; 570} 571 572static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf, size_t siz, loff_t *ppos) 573{ 574 /* We need this handler such that alloc_file() enables 575 * f_mode with FMODE_CAN_WRITE. 576 */ 577 return -EINVAL; 578} 579 580/* called for any extra memory-mapped regions (except initial) */ 581static void bpf_map_mmap_open(struct vm_area_struct *vma) 582{ 583 struct bpf_map *map = vma->vm_file->private_data; 584 585 if (vma->vm_flags & VM_MAYWRITE) { 586 mutex_lock(&map->freeze_mutex); 587 map->writecnt++; 588 mutex_unlock(&map->freeze_mutex); 589 } 590} 591 592/* called for all unmapped memory region (including initial) */ 593static void bpf_map_mmap_close(struct vm_area_struct *vma) 594{ 595 struct bpf_map *map = vma->vm_file->private_data; 596 597 if (vma->vm_flags & VM_MAYWRITE) { 598 mutex_lock(&map->freeze_mutex); 599 map->writecnt--; 600 mutex_unlock(&map->freeze_mutex); 601 } 602} 603 604static const struct vm_operations_struct bpf_map_default_vmops = { 605 .open = bpf_map_mmap_open, 606 .close = bpf_map_mmap_close, 607}; 608 609static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 610{ 611 struct bpf_map *map = filp->private_data; 612 int err; 613 614 if (!map->ops->map_mmap || map_value_has_spin_lock(map)) { 615 return -ENOTSUPP; 616 } 617 618 if (!(vma->vm_flags & VM_SHARED)) { 619 return -EINVAL; 620 } 621 622 mutex_lock(&map->freeze_mutex); 623 624 if (vma->vm_flags & VM_WRITE) { 625 if (map->frozen) { 626 err = -EPERM; 627 goto out; 628 } 629 /* map is meant to be read-only, so do not allow mapping as 630 * writable, because it's possible to leak a writable page 631 * reference and allows user-space to still modify it after 632 * freezing, while verifier will assume contents do not change 633 */ 634 if (map->map_flags & BPF_F_RDONLY_PROG) { 635 err = -EACCES; 636 goto out; 637 } 638 } 639 640 /* set default open/close callbacks */ 641 vma->vm_ops = &bpf_map_default_vmops; 642 vma->vm_private_data = map; 643 vma->vm_flags &= ~VM_MAYEXEC; 644 if (!(vma->vm_flags & VM_WRITE)) { 645 /* disallow re-mapping with PROT_WRITE */ 646 vma->vm_flags &= ~VM_MAYWRITE; 647 } 648 649 err = map->ops->map_mmap(map, vma); 650 if (err) { 651 goto out; 652 } 653 654 if (vma->vm_flags & VM_MAYWRITE) { 655 map->writecnt++; 656 } 657out: 658 mutex_unlock(&map->freeze_mutex); 659 return err; 660} 661 662static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts) 663{ 664 struct bpf_map *map = filp->private_data; 665 666 if (map->ops->map_poll) { 667 return map->ops->map_poll(map, filp, pts); 668 } 669 670 return EPOLLERR; 671} 672 673const struct file_operations bpf_map_fops = { 674#ifdef CONFIG_PROC_FS 675 .show_fdinfo = bpf_map_show_fdinfo, 676#endif 677 .release = bpf_map_release, 678 .read = bpf_dummy_read, 679 .write = bpf_dummy_write, 680 .mmap = bpf_map_mmap, 681 .poll = bpf_map_poll, 682}; 683 684int bpf_map_new_fd(struct bpf_map *map, int flags) 685{ 686 int ret; 687 688 ret = security_bpf_map(map, OPEN_FMODE(flags)); 689 if (ret < 0) { 690 return ret; 691 } 692 693 return anon_inode_getfd("bpf-map", &bpf_map_fops, map, flags | O_CLOEXEC); 694} 695 696int bpf_get_file_flag(int flags) 697{ 698 if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) { 699 return -EINVAL; 700 } 701 if (flags & BPF_F_RDONLY) { 702 return O_RDONLY; 703 } 704 if (flags & BPF_F_WRONLY) { 705 return O_WRONLY; 706 } 707 return O_RDWR; 708} 709 710/* helper macro to check that unused fields 'union bpf_attr' are zero */ 711#define CHECK_ATTR(CMD) \ 712 memchr_inv((void *)&attr->CMD##_LAST_FIELD + sizeof(attr->CMD##_LAST_FIELD), 0, \ 713 sizeof(*attr) - offsetof(union bpf_attr, CMD##_LAST_FIELD) - sizeof(attr->CMD##_LAST_FIELD)) != NULL 714 715/* dst and src must have at least "size" number of bytes. 716 * Return strlen on success and < 0 on error. 717 */ 718int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) 719{ 720 const char *end = src + size; 721 const char *orig_src = src; 722 723 memset(dst, 0, size); 724 /* Copy all isalnum(), '_' and '.' chars. */ 725 while (src < end && *src) { 726 if (!isalnum(*src) && *src != '_' && *src != '.') { 727 return -EINVAL; 728 } 729 *dst++ = *src++; 730 } 731 732 /* No '\0' found in "size" number of bytes */ 733 if (src == end) { 734 return -EINVAL; 735 } 736 737 return src - orig_src; 738} 739 740int map_check_no_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, 741 const struct btf_type *value_type) 742{ 743 return -ENOTSUPP; 744} 745 746static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 btf_key_id, u32 btf_value_id) 747{ 748 const struct btf_type *key_type, *value_type; 749 u32 key_size, value_size; 750 int ret = 0; 751 752 /* Some maps allow key to be unspecified. */ 753 if (btf_key_id) { 754 key_type = btf_type_id_size(btf, &btf_key_id, &key_size); 755 if (!key_type || key_size != map->key_size) { 756 return -EINVAL; 757 } 758 } else { 759 key_type = btf_type_by_id(btf, 0); 760 if (!map->ops->map_check_btf) { 761 return -EINVAL; 762 } 763 } 764 765 value_type = btf_type_id_size(btf, &btf_value_id, &value_size); 766 if (!value_type || value_size != map->value_size) { 767 return -EINVAL; 768 } 769 770 map->spin_lock_off = btf_find_spin_lock(btf, value_type); 771 772 if (map_value_has_spin_lock(map)) { 773 if (map->map_flags & BPF_F_RDONLY_PROG) { 774 return -EACCES; 775 } 776 if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_ARRAY && 777 map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && map->map_type != BPF_MAP_TYPE_SK_STORAGE && 778 map->map_type != BPF_MAP_TYPE_INODE_STORAGE) { 779 return -ENOTSUPP; 780 } 781 if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > map->value_size) { 782 WARN_ONCE(1, "verifier bug spin_lock_off %d value_size %d\n", map->spin_lock_off, map->value_size); 783 return -EFAULT; 784 } 785 } 786 787 if (map->ops->map_check_btf) { 788 ret = map->ops->map_check_btf(map, btf, key_type, value_type); 789 } 790 791 return ret; 792} 793 794#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id 795/* called via syscall */ 796static int map_create(union bpf_attr *attr) 797{ 798 int numa_node = bpf_map_attr_numa_node(attr); 799 struct bpf_map_memory mem; 800 struct bpf_map *map; 801 int f_flags; 802 int err; 803 804 err = CHECK_ATTR(BPF_MAP_CREATE); 805 if (err) { 806 return -EINVAL; 807 } 808 809 if (attr->btf_vmlinux_value_type_id) { 810 if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || attr->btf_key_type_id || attr->btf_value_type_id) { 811 return -EINVAL; 812 } 813 } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { 814 return -EINVAL; 815 } 816 817 f_flags = bpf_get_file_flag(attr->map_flags); 818 if (f_flags < 0) { 819 return f_flags; 820 } 821 822 if (numa_node != NUMA_NO_NODE && ((unsigned int)numa_node >= nr_node_ids || !node_online(numa_node))) { 823 return -EINVAL; 824 } 825 826 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ 827 map = find_and_alloc_map(attr); 828 if (IS_ERR(map)) { 829 return PTR_ERR(map); 830 } 831 832 err = bpf_obj_name_cpy(map->name, attr->map_name, sizeof(attr->map_name)); 833 if (err < 0) { 834 goto free_map; 835 } 836 837 atomic64_set(&map->refcnt, 1); 838 atomic64_set(&map->usercnt, 1); 839 mutex_init(&map->freeze_mutex); 840 841 map->spin_lock_off = -EINVAL; 842 if (attr->btf_key_type_id || attr->btf_value_type_id || 843 /* Even the map's value is a kernel's struct, 844 * the bpf_prog.o must have BTF to begin with 845 * to figure out the corresponding kernel's 846 * counter part. Thus, attr->btf_fd has 847 * to be valid also. 848 */ 849 attr->btf_vmlinux_value_type_id) { 850 struct btf *btf; 851 852 btf = btf_get_by_fd(attr->btf_fd); 853 if (IS_ERR(btf)) { 854 err = PTR_ERR(btf); 855 goto free_map; 856 } 857 map->btf = btf; 858 859 if (attr->btf_value_type_id) { 860 err = map_check_btf(map, btf, attr->btf_key_type_id, attr->btf_value_type_id); 861 if (err) { 862 goto free_map; 863 } 864 } 865 866 map->btf_key_type_id = attr->btf_key_type_id; 867 map->btf_value_type_id = attr->btf_value_type_id; 868 map->btf_vmlinux_value_type_id = attr->btf_vmlinux_value_type_id; 869 } 870 871 err = security_bpf_map_alloc(map); 872 if (err) { 873 goto free_map; 874 } 875 876 err = bpf_map_alloc_id(map); 877 if (err) { 878 goto free_map_sec; 879 } 880 881 err = bpf_map_new_fd(map, f_flags); 882 if (err < 0) { 883 /* failed to allocate fd. 884 * bpf_map_put_with_uref() is needed because the above 885 * bpf_map_alloc_id() has published the map 886 * to the userspace and the userspace may 887 * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. 888 */ 889 bpf_map_put_with_uref(map); 890 return err; 891 } 892 893 return err; 894 895free_map_sec: 896 security_bpf_map_free(map); 897free_map: 898 btf_put(map->btf); 899 bpf_map_charge_move(&mem, &map->memory); 900 map->ops->map_free(map); 901 bpf_map_charge_finish(&mem); 902 return err; 903} 904 905/* if error is returned, fd is released. 906 * On success caller should complete fd access with matching fdput() 907 */ 908struct bpf_map *__bpf_map_get(struct fd f) 909{ 910 if (!f.file) { 911 return ERR_PTR(-EBADF); 912 } 913 if (f.file->f_op != &bpf_map_fops) { 914 fdput(f); 915 return ERR_PTR(-EINVAL); 916 } 917 918 return f.file->private_data; 919} 920 921void bpf_map_inc(struct bpf_map *map) 922{ 923 atomic64_inc(&map->refcnt); 924} 925EXPORT_SYMBOL_GPL(bpf_map_inc); 926 927void bpf_map_inc_with_uref(struct bpf_map *map) 928{ 929 atomic64_inc(&map->refcnt); 930 atomic64_inc(&map->usercnt); 931} 932EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 933 934struct bpf_map *bpf_map_get(u32 ufd) 935{ 936 struct fd f = fdget(ufd); 937 struct bpf_map *map; 938 939 map = __bpf_map_get(f); 940 if (IS_ERR(map)) { 941 return map; 942 } 943 944 bpf_map_inc(map); 945 fdput(f); 946 947 return map; 948} 949 950struct bpf_map *bpf_map_get_with_uref(u32 ufd) 951{ 952 struct fd f = fdget(ufd); 953 struct bpf_map *map; 954 955 map = __bpf_map_get(f); 956 if (IS_ERR(map)) { 957 return map; 958 } 959 960 bpf_map_inc_with_uref(map); 961 fdput(f); 962 963 return map; 964} 965 966/* map_idr_lock should have been held */ 967static struct bpf_map *_bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 968{ 969 int refold; 970 971 refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 972 if (!refold) { 973 return ERR_PTR(-ENOENT); 974 } 975 if (uref) { 976 atomic64_inc(&map->usercnt); 977 } 978 979 return map; 980} 981 982struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 983{ 984 spin_lock_bh(&map_idr_lock); 985 map = _bpf_map_inc_not_zero(map, false); 986 spin_unlock_bh(&map_idr_lock); 987 988 return map; 989} 990EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); 991 992int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) 993{ 994 return -ENOTSUPP; 995} 996 997static void *__bpf_copy_key(void __user *ukey, u64 key_size) 998{ 999 if (key_size) { 1000 return memdup_user(ukey, key_size); 1001 } 1002 1003 if (ukey) { 1004 return ERR_PTR(-EINVAL); 1005 } 1006 1007 return NULL; 1008} 1009 1010/* last field in 'union bpf_attr' used by this command */ 1011#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags 1012 1013static int map_lookup_elem(union bpf_attr *attr) 1014{ 1015 void __user *ukey = u64_to_user_ptr(attr->key); 1016 void __user *uvalue = u64_to_user_ptr(attr->value); 1017 int ufd = attr->map_fd; 1018 struct bpf_map *map; 1019 void *key, *value; 1020 u32 value_size; 1021 struct fd f; 1022 int err; 1023 1024 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) { 1025 return -EINVAL; 1026 } 1027 1028 if (attr->flags & ~BPF_F_LOCK) { 1029 return -EINVAL; 1030 } 1031 1032 f = fdget(ufd); 1033 map = __bpf_map_get(f); 1034 if (IS_ERR(map)) { 1035 return PTR_ERR(map); 1036 } 1037 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 1038 err = -EPERM; 1039 goto err_put; 1040 } 1041 1042 if ((attr->flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) { 1043 err = -EINVAL; 1044 goto err_put; 1045 } 1046 1047 key = __bpf_copy_key(ukey, map->key_size); 1048 if (IS_ERR(key)) { 1049 err = PTR_ERR(key); 1050 goto err_put; 1051 } 1052 1053 value_size = bpf_map_value_size(map); 1054 1055 err = -ENOMEM; 1056 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 1057 if (!value) { 1058 goto free_key; 1059 } 1060 1061 err = bpf_map_copy_value(map, key, value, attr->flags); 1062 if (err) { 1063 goto free_value; 1064 } 1065 1066 err = -EFAULT; 1067 if (copy_to_user(uvalue, value, value_size) != 0) { 1068 goto free_value; 1069 } 1070 1071 err = 0; 1072 1073free_value: 1074 kfree(value); 1075free_key: 1076 kfree(key); 1077err_put: 1078 fdput(f); 1079 return err; 1080} 1081 1082#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags 1083 1084static int map_update_elem(union bpf_attr *attr) 1085{ 1086 void __user *ukey = u64_to_user_ptr(attr->key); 1087 void __user *uvalue = u64_to_user_ptr(attr->value); 1088 int ufd = attr->map_fd; 1089 struct bpf_map *map; 1090 void *key, *value; 1091 u32 value_size; 1092 struct fd f; 1093 int err; 1094 1095 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) { 1096 return -EINVAL; 1097 } 1098 1099 f = fdget(ufd); 1100 map = __bpf_map_get(f); 1101 if (IS_ERR(map)) { 1102 return PTR_ERR(map); 1103 } 1104 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1105 err = -EPERM; 1106 goto err_put; 1107 } 1108 1109 if ((attr->flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) { 1110 err = -EINVAL; 1111 goto err_put; 1112 } 1113 1114 key = __bpf_copy_key(ukey, map->key_size); 1115 if (IS_ERR(key)) { 1116 err = PTR_ERR(key); 1117 goto err_put; 1118 } 1119 1120 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 1121 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { 1122 value_size = round_up(map->value_size, 0x8) * num_possible_cpus(); 1123 } else { 1124 value_size = map->value_size; 1125 } 1126 1127 err = -ENOMEM; 1128 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 1129 if (!value) { 1130 goto free_key; 1131 } 1132 1133 err = -EFAULT; 1134 if (copy_from_user(value, uvalue, value_size) != 0) { 1135 goto free_value; 1136 } 1137 1138 err = bpf_map_update_value(map, f, key, value, attr->flags); 1139 1140free_value: 1141 kfree(value); 1142free_key: 1143 kfree(key); 1144err_put: 1145 fdput(f); 1146 return err; 1147} 1148 1149#define BPF_MAP_DELETE_ELEM_LAST_FIELD key 1150 1151static int map_delete_elem(union bpf_attr *attr) 1152{ 1153 void __user *ukey = u64_to_user_ptr(attr->key); 1154 int ufd = attr->map_fd; 1155 struct bpf_map *map; 1156 struct fd f; 1157 void *key; 1158 int err; 1159 1160 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) { 1161 return -EINVAL; 1162 } 1163 1164 f = fdget(ufd); 1165 map = __bpf_map_get(f); 1166 if (IS_ERR(map)) { 1167 return PTR_ERR(map); 1168 } 1169 if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1170 err = -EPERM; 1171 goto err_put; 1172 } 1173 1174 key = __bpf_copy_key(ukey, map->key_size); 1175 if (IS_ERR(key)) { 1176 err = PTR_ERR(key); 1177 goto err_put; 1178 } 1179 1180 if (bpf_map_is_dev_bound(map)) { 1181 err = bpf_map_offload_delete_elem(map, key); 1182 goto out; 1183 } else if (IS_FD_PROG_ARRAY(map) || map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1184 /* These maps require sleepable context */ 1185 err = map->ops->map_delete_elem(map, key); 1186 goto out; 1187 } 1188 1189 bpf_disable_instrumentation(); 1190 rcu_read_lock(); 1191 err = map->ops->map_delete_elem(map, key); 1192 rcu_read_unlock(); 1193 bpf_enable_instrumentation(); 1194 maybe_wait_bpf_programs(map); 1195out: 1196 kfree(key); 1197err_put: 1198 fdput(f); 1199 return err; 1200} 1201 1202/* last field in 'union bpf_attr' used by this command */ 1203#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key 1204 1205static int map_get_next_key(union bpf_attr *attr) 1206{ 1207 void __user *ukey = u64_to_user_ptr(attr->key); 1208 void __user *unext_key = u64_to_user_ptr(attr->next_key); 1209 int ufd = attr->map_fd; 1210 struct bpf_map *map; 1211 void *key, *next_key; 1212 struct fd f; 1213 int err; 1214 1215 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) { 1216 return -EINVAL; 1217 } 1218 1219 f = fdget(ufd); 1220 map = __bpf_map_get(f); 1221 if (IS_ERR(map)) { 1222 return PTR_ERR(map); 1223 } 1224 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 1225 err = -EPERM; 1226 goto err_put; 1227 } 1228 1229 if (ukey) { 1230 key = __bpf_copy_key(ukey, map->key_size); 1231 if (IS_ERR(key)) { 1232 err = PTR_ERR(key); 1233 goto err_put; 1234 } 1235 } else { 1236 key = NULL; 1237 } 1238 1239 err = -ENOMEM; 1240 next_key = kmalloc(map->key_size, GFP_USER); 1241 if (!next_key) { 1242 goto free_key; 1243 } 1244 1245 if (bpf_map_is_dev_bound(map)) { 1246 err = bpf_map_offload_get_next_key(map, key, next_key); 1247 goto out; 1248 } 1249 1250 rcu_read_lock(); 1251 err = map->ops->map_get_next_key(map, key, next_key); 1252 rcu_read_unlock(); 1253out: 1254 if (err) { 1255 goto free_next_key; 1256 } 1257 1258 err = -EFAULT; 1259 if (copy_to_user(unext_key, next_key, map->key_size) != 0) { 1260 goto free_next_key; 1261 } 1262 1263 err = 0; 1264 1265free_next_key: 1266 kfree(next_key); 1267free_key: 1268 kfree(key); 1269err_put: 1270 fdput(f); 1271 return err; 1272} 1273 1274int generic_map_delete_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr) 1275{ 1276 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1277 u32 cp, max_count; 1278 int err = 0; 1279 void *key; 1280 1281 if (attr->batch.elem_flags & ~BPF_F_LOCK) { 1282 return -EINVAL; 1283 } 1284 1285 if ((attr->batch.elem_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) { 1286 return -EINVAL; 1287 } 1288 1289 max_count = attr->batch.count; 1290 if (!max_count) { 1291 return 0; 1292 } 1293 1294 key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1295 if (!key) { 1296 return -ENOMEM; 1297 } 1298 1299 for (cp = 0; cp < max_count; cp++) { 1300 err = -EFAULT; 1301 if (copy_from_user(key, keys + cp * map->key_size, map->key_size)) { 1302 break; 1303 } 1304 1305 if (bpf_map_is_dev_bound(map)) { 1306 err = bpf_map_offload_delete_elem(map, key); 1307 break; 1308 } 1309 1310 bpf_disable_instrumentation(); 1311 rcu_read_lock(); 1312 err = map->ops->map_delete_elem(map, key); 1313 rcu_read_unlock(); 1314 bpf_enable_instrumentation(); 1315 maybe_wait_bpf_programs(map); 1316 if (err) { 1317 break; 1318 } 1319 } 1320 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) { 1321 err = -EFAULT; 1322 } 1323 1324 kfree(key); 1325 return err; 1326} 1327 1328int generic_map_update_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr) 1329{ 1330 void __user *values = u64_to_user_ptr(attr->batch.values); 1331 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1332 u32 value_size, cp, max_count; 1333 int ufd = attr->batch.map_fd; 1334 void *key, *value; 1335 struct fd f; 1336 int err = 0; 1337 1338 if (attr->batch.elem_flags & ~BPF_F_LOCK) { 1339 return -EINVAL; 1340 } 1341 1342 if ((attr->batch.elem_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) { 1343 return -EINVAL; 1344 } 1345 1346 value_size = bpf_map_value_size(map); 1347 1348 max_count = attr->batch.count; 1349 if (!max_count) { 1350 return 0; 1351 } 1352 1353 key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1354 if (!key) { 1355 return -ENOMEM; 1356 } 1357 1358 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 1359 if (!value) { 1360 kfree(key); 1361 return -ENOMEM; 1362 } 1363 1364 f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */ 1365 for (cp = 0; cp < max_count; cp++) { 1366 err = -EFAULT; 1367 if (copy_from_user(key, keys + cp * map->key_size, map->key_size) || 1368 copy_from_user(value, values + cp * value_size, value_size)) { 1369 break; 1370 } 1371 1372 err = bpf_map_update_value(map, f, key, value, attr->batch.elem_flags); 1373 1374 if (err) { 1375 break; 1376 cond_resched(); 1377 } 1378 } 1379 1380 if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) { 1381 err = -EFAULT; 1382 } 1383 1384 kfree(value); 1385 kfree(key); 1386 fdput(f); 1387 return err; 1388} 1389 1390#define MAP_LOOKUP_RETRIES 3 1391 1392int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr) 1393{ 1394 void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); 1395 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1396 void __user *values = u64_to_user_ptr(attr->batch.values); 1397 void __user *keys = u64_to_user_ptr(attr->batch.keys); 1398 void *buf, *buf_prevkey, *prev_key, *key, *value; 1399 int err, retry = MAP_LOOKUP_RETRIES; 1400 u32 value_size, cp, max_count; 1401 1402 if (attr->batch.elem_flags & ~BPF_F_LOCK) { 1403 return -EINVAL; 1404 } 1405 1406 if ((attr->batch.elem_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)) { 1407 return -EINVAL; 1408 } 1409 1410 value_size = bpf_map_value_size(map); 1411 1412 max_count = attr->batch.count; 1413 if (!max_count) { 1414 return 0; 1415 } 1416 1417 if (put_user(0, &uattr->batch.count)) { 1418 return -EFAULT; 1419 } 1420 1421 buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN); 1422 if (!buf_prevkey) { 1423 return -ENOMEM; 1424 } 1425 1426 buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN); 1427 if (!buf) { 1428 kfree(buf_prevkey); 1429 return -ENOMEM; 1430 } 1431 1432 err = -EFAULT; 1433 prev_key = NULL; 1434 if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size)) { 1435 goto free_buf; 1436 } 1437 key = buf; 1438 value = key + map->key_size; 1439 if (ubatch) { 1440 prev_key = buf_prevkey; 1441 } 1442 1443 for (cp = 0; cp < max_count;) { 1444 rcu_read_lock(); 1445 err = map->ops->map_get_next_key(map, prev_key, key); 1446 rcu_read_unlock(); 1447 if (err) { 1448 break; 1449 } 1450 err = bpf_map_copy_value(map, key, value, attr->batch.elem_flags); 1451 1452 if (err == -ENOENT) { 1453 if (retry) { 1454 retry--; 1455 continue; 1456 } 1457 err = -EINTR; 1458 break; 1459 } 1460 1461 if (err) { 1462 goto free_buf; 1463 } 1464 1465 if (copy_to_user(keys + cp * map->key_size, key, map->key_size)) { 1466 err = -EFAULT; 1467 goto free_buf; 1468 } 1469 if (copy_to_user(values + cp * value_size, value, value_size)) { 1470 err = -EFAULT; 1471 goto free_buf; 1472 } 1473 1474 if (!prev_key) { 1475 prev_key = buf_prevkey; 1476 } 1477 1478 swap(prev_key, key); 1479 retry = MAP_LOOKUP_RETRIES; 1480 cp++; 1481 cond_resched(); 1482 } 1483 1484 if (err == -EFAULT) { 1485 goto free_buf; 1486 } 1487 1488 if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || 1489 (cp && copy_to_user(uobatch, prev_key, map->key_size)))) { 1490 err = -EFAULT; 1491 } 1492 1493free_buf: 1494 kfree(buf_prevkey); 1495 kfree(buf); 1496 return err; 1497} 1498 1499#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value 1500 1501static int map_lookup_and_delete_elem(union bpf_attr *attr) 1502{ 1503 void __user *ukey = u64_to_user_ptr(attr->key); 1504 void __user *uvalue = u64_to_user_ptr(attr->value); 1505 int ufd = attr->map_fd; 1506 struct bpf_map *map; 1507 void *key, *value; 1508 u32 value_size; 1509 struct fd f; 1510 int err; 1511 1512 if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) { 1513 return -EINVAL; 1514 } 1515 1516 f = fdget(ufd); 1517 map = __bpf_map_get(f); 1518 if (IS_ERR(map)) { 1519 return PTR_ERR(map); 1520 } 1521 if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1522 err = -EPERM; 1523 goto err_put; 1524 } 1525 1526 key = __bpf_copy_key(ukey, map->key_size); 1527 if (IS_ERR(key)) { 1528 err = PTR_ERR(key); 1529 goto err_put; 1530 } 1531 1532 value_size = map->value_size; 1533 1534 err = -ENOMEM; 1535 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); 1536 if (!value) { 1537 goto free_key; 1538 } 1539 1540 if (map->map_type == BPF_MAP_TYPE_QUEUE || map->map_type == BPF_MAP_TYPE_STACK) { 1541 err = map->ops->map_pop_elem(map, value); 1542 } else { 1543 err = -ENOTSUPP; 1544 } 1545 1546 if (err) { 1547 goto free_value; 1548 } 1549 1550 if (copy_to_user(uvalue, value, value_size) != 0) { 1551 err = -EFAULT; 1552 goto free_value; 1553 } 1554 1555 err = 0; 1556 1557free_value: 1558 kfree(value); 1559free_key: 1560 kfree(key); 1561err_put: 1562 fdput(f); 1563 return err; 1564} 1565 1566#define BPF_MAP_FREEZE_LAST_FIELD map_fd 1567 1568static int map_freeze(const union bpf_attr *attr) 1569{ 1570 int err = 0, ufd = attr->map_fd; 1571 struct bpf_map *map; 1572 struct fd f; 1573 1574 if (CHECK_ATTR(BPF_MAP_FREEZE)) { 1575 return -EINVAL; 1576 } 1577 1578 f = fdget(ufd); 1579 map = __bpf_map_get(f); 1580 if (IS_ERR(map)) { 1581 return PTR_ERR(map); 1582 } 1583 1584 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { 1585 fdput(f); 1586 return -ENOTSUPP; 1587 } 1588 1589 mutex_lock(&map->freeze_mutex); 1590 1591 if (map->writecnt) { 1592 err = -EBUSY; 1593 goto err_put; 1594 } 1595 if (READ_ONCE(map->frozen)) { 1596 err = -EBUSY; 1597 goto err_put; 1598 } 1599 if (!bpf_capable()) { 1600 err = -EPERM; 1601 goto err_put; 1602 } 1603 1604 WRITE_ONCE(map->frozen, true); 1605err_put: 1606 mutex_unlock(&map->freeze_mutex); 1607 fdput(f); 1608 return err; 1609} 1610 1611static const struct bpf_prog_ops *const bpf_prog_types[] = { 1612#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) [_id] = &_name##_prog_ops, 1613#define BPF_MAP_TYPE(_id, _ops) 1614#define BPF_LINK_TYPE(_id, _name) 1615#include <linux/bpf_types.h> 1616#undef BPF_PROG_TYPE 1617#undef BPF_MAP_TYPE 1618#undef BPF_LINK_TYPE 1619}; 1620 1621static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) 1622{ 1623 const struct bpf_prog_ops *ops; 1624 1625 if (type >= ARRAY_SIZE(bpf_prog_types)) { 1626 return -EINVAL; 1627 } 1628 type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); 1629 ops = bpf_prog_types[type]; 1630 if (!ops) { 1631 return -EINVAL; 1632 } 1633 1634 if (!bpf_prog_is_dev_bound(prog->aux)) { 1635 prog->aux->ops = ops; 1636 } else { 1637 prog->aux->ops = &bpf_offload_prog_ops; 1638 } 1639 prog->type = type; 1640 return 0; 1641} 1642 1643enum bpf_audit { 1644 BPF_AUDIT_LOAD, 1645 BPF_AUDIT_UNLOAD, 1646 BPF_AUDIT_MAX, 1647}; 1648 1649static const char *const bpf_audit_str[BPF_AUDIT_MAX] = { 1650 [BPF_AUDIT_LOAD] = "LOAD", 1651 [BPF_AUDIT_UNLOAD] = "UNLOAD", 1652}; 1653 1654static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 1655{ 1656 struct audit_context *ctx = NULL; 1657 struct audit_buffer *ab; 1658 1659 if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) { 1660 return; 1661 } 1662 if (audit_enabled == AUDIT_OFF) { 1663 return; 1664 } 1665 if (op == BPF_AUDIT_LOAD) { 1666 ctx = audit_context(); 1667 } 1668 ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 1669 if (unlikely(!ab)) { 1670 return; 1671 } 1672 audit_log_format(ab, "prog-id=%u op=%s", prog->aux->id, bpf_audit_str[op]); 1673 audit_log_end(ab); 1674} 1675 1676int __bpf_prog_charge(struct user_struct *user, u32 pages) 1677{ 1678 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 1679 unsigned long user_bufs; 1680 1681 if (user) { 1682 user_bufs = atomic_long_add_return(pages, &user->locked_vm); 1683 if (user_bufs > memlock_limit) { 1684 atomic_long_sub(pages, &user->locked_vm); 1685 return -EPERM; 1686 } 1687 } 1688 1689 return 0; 1690} 1691 1692void __bpf_prog_uncharge(struct user_struct *user, u32 pages) 1693{ 1694 if (user) { 1695 atomic_long_sub(pages, &user->locked_vm); 1696 } 1697} 1698 1699static int bpf_prog_charge_memlock(struct bpf_prog *prog) 1700{ 1701 struct user_struct *user = get_current_user(); 1702 int ret; 1703 1704 ret = __bpf_prog_charge(user, prog->pages); 1705 if (ret) { 1706 free_uid(user); 1707 return ret; 1708 } 1709 1710 prog->aux->user = user; 1711 return 0; 1712} 1713 1714static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) 1715{ 1716 struct user_struct *user = prog->aux->user; 1717 1718 __bpf_prog_uncharge(user, prog->pages); 1719 free_uid(user); 1720} 1721 1722static int bpf_prog_alloc_id(struct bpf_prog *prog) 1723{ 1724 int id; 1725 1726 idr_preload(GFP_KERNEL); 1727 spin_lock_bh(&prog_idr_lock); 1728 id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC); 1729 if (id > 0) { 1730 prog->aux->id = id; 1731 } 1732 spin_unlock_bh(&prog_idr_lock); 1733 idr_preload_end(); 1734 1735 /* id is in [1, INT_MAX) */ 1736 if (WARN_ON_ONCE(!id)) { 1737 return -ENOSPC; 1738 } 1739 1740 return id > 0 ? 0 : id; 1741} 1742 1743void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 1744{ 1745 /* cBPF to eBPF migrations are currently not in the idr store. 1746 * Offloaded programs are removed from the store when their device 1747 * disappears - even if someone grabs an fd to them they are unusable, 1748 * simply waiting for refcnt to drop to be freed. 1749 */ 1750 if (!prog->aux->id) { 1751 return; 1752 } 1753 1754 if (do_idr_lock) { 1755 spin_lock_bh(&prog_idr_lock); 1756 } else { 1757 __acquire(&prog_idr_lock); 1758 } 1759 1760 idr_remove(&prog_idr, prog->aux->id); 1761 prog->aux->id = 0; 1762 1763 if (do_idr_lock) { 1764 spin_unlock_bh(&prog_idr_lock); 1765 } else { 1766 __release(&prog_idr_lock); 1767 } 1768} 1769 1770static void _bpf_prog_put_rcu(struct rcu_head *rcu) 1771{ 1772 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 1773 1774 kvfree(aux->func_info); 1775 kfree(aux->func_info_aux); 1776 bpf_prog_uncharge_memlock(aux->prog); 1777 security_bpf_prog_free(aux); 1778 bpf_prog_free(aux->prog); 1779} 1780 1781static void _bpf_prog_put_noref(struct bpf_prog *prog, bool deferred) 1782{ 1783 bpf_prog_kallsyms_del_all(prog); 1784 btf_put(prog->aux->btf); 1785 bpf_prog_free_linfo(prog); 1786 1787 if (deferred) { 1788 if (prog->aux->sleepable) { 1789 call_rcu_tasks_trace(&prog->aux->rcu, _bpf_prog_put_rcu); 1790 } else { 1791 call_rcu(&prog->aux->rcu, _bpf_prog_put_rcu); 1792 } 1793 } else { 1794 _bpf_prog_put_rcu(&prog->aux->rcu); 1795 } 1796} 1797 1798static void _bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 1799{ 1800 if (atomic64_dec_and_test(&prog->aux->refcnt)) { 1801 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 1802 bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 1803 /* bpf_prog_free_id() must be called first */ 1804 bpf_prog_free_id(prog, do_idr_lock); 1805 _bpf_prog_put_noref(prog, true); 1806 } 1807} 1808 1809void bpf_prog_put(struct bpf_prog *prog) 1810{ 1811 _bpf_prog_put(prog, true); 1812} 1813EXPORT_SYMBOL_GPL(bpf_prog_put); 1814 1815static int bpf_prog_release(struct inode *inode, struct file *filp) 1816{ 1817 struct bpf_prog *prog = filp->private_data; 1818 1819 bpf_prog_put(prog); 1820 return 0; 1821} 1822 1823static void bpf_prog_get_stats(const struct bpf_prog *prog, struct bpf_prog_stats *stats) 1824{ 1825 u64 nsecs = 0, cnt = 0; 1826 int cpu; 1827 1828 for_each_possible_cpu(cpu) 1829 { 1830 const struct bpf_prog_stats *st; 1831 unsigned int start; 1832 u64 tnsecs, tcnt; 1833 1834 st = per_cpu_ptr(prog->aux->stats, cpu); 1835 do { 1836 start = u64_stats_fetch_begin_irq(&st->syncp); 1837 tnsecs = st->nsecs; 1838 tcnt = st->cnt; 1839 } while (u64_stats_fetch_retry_irq(&st->syncp, start)); 1840 nsecs += tnsecs; 1841 cnt += tcnt; 1842 } 1843 stats->nsecs = nsecs; 1844 stats->cnt = cnt; 1845} 1846 1847#ifdef CONFIG_PROC_FS 1848static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 1849{ 1850 const struct bpf_prog *prog = filp->private_data; 1851 char prog_tag[sizeof(prog->tag) * 0x2 + 1] = {}; 1852 struct bpf_prog_stats stats; 1853 1854 bpf_prog_get_stats(prog, &stats); 1855 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 1856 seq_printf(m, 1857 "prog_type:\t%u\n" 1858 "prog_jited:\t%u\n" 1859 "prog_tag:\t%s\n" 1860 "memlock:\t%llu\n" 1861 "prog_id:\t%u\n" 1862 "run_time_ns:\t%llu\n" 1863 "run_cnt:\t%llu\n", 1864 prog->type, prog->jited, prog_tag, prog->pages * 1ULL << PAGE_SHIFT, prog->aux->id, stats.nsecs, 1865 stats.cnt); 1866} 1867#endif 1868 1869const struct file_operations bpf_prog_fops = { 1870#ifdef CONFIG_PROC_FS 1871 .show_fdinfo = bpf_prog_show_fdinfo, 1872#endif 1873 .release = bpf_prog_release, 1874 .read = bpf_dummy_read, 1875 .write = bpf_dummy_write, 1876}; 1877 1878int bpf_prog_new_fd(struct bpf_prog *prog) 1879{ 1880 int ret; 1881 1882 ret = security_bpf_prog(prog); 1883 if (ret < 0) { 1884 return ret; 1885 } 1886 1887 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC); 1888} 1889 1890static struct bpf_prog *i_bpf_prog_get(struct fd f) 1891{ 1892 if (!f.file) { 1893 return ERR_PTR(-EBADF); 1894 } 1895 if (f.file->f_op != &bpf_prog_fops) { 1896 fdput(f); 1897 return ERR_PTR(-EINVAL); 1898 } 1899 1900 return f.file->private_data; 1901} 1902 1903void bpf_prog_add(struct bpf_prog *prog, int i) 1904{ 1905 atomic64_add(i, &prog->aux->refcnt); 1906} 1907EXPORT_SYMBOL_GPL(bpf_prog_add); 1908 1909void bpf_prog_sub(struct bpf_prog *prog, int i) 1910{ 1911 /* Only to be used for undoing previous bpf_prog_add() in some 1912 * error path. We still know that another entity in our call 1913 * path holds a reference to the program, thus atomic_sub() can 1914 * be safely used in such cases! 1915 */ 1916 WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 1917} 1918EXPORT_SYMBOL_GPL(bpf_prog_sub); 1919 1920void bpf_prog_inc(struct bpf_prog *prog) 1921{ 1922 atomic64_inc(&prog->aux->refcnt); 1923} 1924EXPORT_SYMBOL_GPL(bpf_prog_inc); 1925 1926/* prog_idr_lock should have been held */ 1927struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog) 1928{ 1929 int refold; 1930 1931 refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 1932 if (!refold) { 1933 return ERR_PTR(-ENOENT); 1934 } 1935 1936 return prog; 1937} 1938EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero); 1939 1940bool bpf_prog_get_ok(struct bpf_prog *prog, enum bpf_prog_type *attach_type, bool attach_drv) 1941{ 1942 /* not an attachment, just a refcount inc, always allow */ 1943 if (!attach_type) { 1944 return true; 1945 } 1946 1947 if (prog->type != *attach_type) { 1948 return false; 1949 } 1950 if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv) { 1951 return false; 1952 } 1953 1954 return true; 1955} 1956 1957static struct bpf_prog *_bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, bool attach_drv) 1958{ 1959 struct fd f = fdget(ufd); 1960 struct bpf_prog *prog; 1961 1962 prog = i_bpf_prog_get(f); 1963 if (IS_ERR(prog)) { 1964 return prog; 1965 } 1966 if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { 1967 prog = ERR_PTR(-EINVAL); 1968 goto out; 1969 } 1970 1971 bpf_prog_inc(prog); 1972out: 1973 fdput(f); 1974 return prog; 1975} 1976 1977struct bpf_prog *bpf_prog_get(u32 ufd) 1978{ 1979 return _bpf_prog_get(ufd, NULL, false); 1980} 1981 1982struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv) 1983{ 1984 return _bpf_prog_get(ufd, &type, attach_drv); 1985} 1986EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); 1987 1988/* Initially all BPF programs could be loaded w/o specifying 1989 * expected_attach_type. Later for some of them specifying expected_attach_type 1990 * at load time became required so that program could be validated properly. 1991 * Programs of types that are allowed to be loaded both w/ and w/o (for 1992 * backward compatibility) expected_attach_type, should have the default attach 1993 * type assigned to expected_attach_type for the latter case, so that it can be 1994 * validated later at attach time. 1995 * 1996 * bpf_prog_load_fixup_attach_type() sets expected_attach_type in @attr if 1997 * prog type requires it but has some attach types that have to be backward 1998 * compatible. 1999 */ 2000static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) 2001{ 2002 if (attr->prog_type == BPF_PROG_TYPE_CGROUP_SOCK) { 2003 /* Unfortunately BPF_ATTACH_TYPE_UNSPEC enumeration doesn't 2004 * exist so checking for non-zero is the way to go here. 2005 */ 2006 if (!attr->expected_attach_type) { 2007 attr->expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE; 2008 } 2009 } 2010} 2011 2012static int bpf_prog_load_check_attach(enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type, 2013 u32 btf_id, u32 prog_fd) 2014{ 2015 if (btf_id) { 2016 if (btf_id > BTF_MAX_TYPE) { 2017 return -EINVAL; 2018 } 2019 2020 switch (prog_type) { 2021 case BPF_PROG_TYPE_TRACING: 2022 case BPF_PROG_TYPE_LSM: 2023 case BPF_PROG_TYPE_STRUCT_OPS: 2024 case BPF_PROG_TYPE_EXT: 2025 break; 2026 default: 2027 return -EINVAL; 2028 } 2029 } 2030 2031 if (prog_fd && prog_type != BPF_PROG_TYPE_TRACING && prog_type != BPF_PROG_TYPE_EXT) { 2032 return -EINVAL; 2033 } 2034 2035 switch (prog_type) { 2036 case BPF_PROG_TYPE_CGROUP_SOCK: 2037 switch (expected_attach_type) { 2038 case BPF_CGROUP_INET_SOCK_CREATE: 2039 case BPF_CGROUP_INET_SOCK_RELEASE: 2040 case BPF_CGROUP_INET4_POST_BIND: 2041 case BPF_CGROUP_INET6_POST_BIND: 2042 return 0; 2043 default: 2044 return -EINVAL; 2045 } 2046 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2047 switch (expected_attach_type) { 2048 case BPF_CGROUP_INET4_BIND: 2049 case BPF_CGROUP_INET6_BIND: 2050 case BPF_CGROUP_INET4_CONNECT: 2051 case BPF_CGROUP_INET6_CONNECT: 2052 case BPF_CGROUP_INET4_GETPEERNAME: 2053 case BPF_CGROUP_INET6_GETPEERNAME: 2054 case BPF_CGROUP_INET4_GETSOCKNAME: 2055 case BPF_CGROUP_INET6_GETSOCKNAME: 2056 case BPF_CGROUP_UDP4_SENDMSG: 2057 case BPF_CGROUP_UDP6_SENDMSG: 2058 case BPF_CGROUP_UDP4_RECVMSG: 2059 case BPF_CGROUP_UDP6_RECVMSG: 2060 return 0; 2061 default: 2062 return -EINVAL; 2063 } 2064 case BPF_PROG_TYPE_CGROUP_SKB: 2065 switch (expected_attach_type) { 2066 case BPF_CGROUP_INET_INGRESS: 2067 case BPF_CGROUP_INET_EGRESS: 2068 return 0; 2069 default: 2070 return -EINVAL; 2071 } 2072 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2073 switch (expected_attach_type) { 2074 case BPF_CGROUP_SETSOCKOPT: 2075 case BPF_CGROUP_GETSOCKOPT: 2076 return 0; 2077 default: 2078 return -EINVAL; 2079 } 2080 case BPF_PROG_TYPE_SK_LOOKUP: 2081 if (expected_attach_type == BPF_SK_LOOKUP) { 2082 return 0; 2083 } 2084 return -EINVAL; 2085 case BPF_PROG_TYPE_EXT: 2086 if (expected_attach_type) { 2087 return -EINVAL; 2088 } 2089 fallthrough; 2090 default: 2091 return 0; 2092 } 2093} 2094 2095static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) 2096{ 2097 switch (prog_type) { 2098 case BPF_PROG_TYPE_SCHED_CLS: 2099 case BPF_PROG_TYPE_SCHED_ACT: 2100 case BPF_PROG_TYPE_XDP: 2101 case BPF_PROG_TYPE_LWT_IN: 2102 case BPF_PROG_TYPE_LWT_OUT: 2103 case BPF_PROG_TYPE_LWT_XMIT: 2104 case BPF_PROG_TYPE_LWT_SEG6LOCAL: 2105 case BPF_PROG_TYPE_SK_SKB: 2106 case BPF_PROG_TYPE_SK_MSG: 2107 case BPF_PROG_TYPE_LIRC_MODE2: 2108 case BPF_PROG_TYPE_FLOW_DISSECTOR: 2109 case BPF_PROG_TYPE_CGROUP_DEVICE: 2110 case BPF_PROG_TYPE_CGROUP_SOCK: 2111 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2112 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2113 case BPF_PROG_TYPE_CGROUP_SYSCTL: 2114 case BPF_PROG_TYPE_SOCK_OPS: 2115 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2116 return true; 2117 case BPF_PROG_TYPE_CGROUP_SKB: 2118 /* always unpriv */ 2119 case BPF_PROG_TYPE_SK_REUSEPORT: 2120 /* equivalent to SOCKET_FILTER. need CAP_BPF only */ 2121 default: 2122 return false; 2123 } 2124} 2125 2126static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) 2127{ 2128 switch (prog_type) { 2129 case BPF_PROG_TYPE_KPROBE: 2130 case BPF_PROG_TYPE_TRACEPOINT: 2131 case BPF_PROG_TYPE_PERF_EVENT: 2132 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2133 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2134 case BPF_PROG_TYPE_TRACING: 2135 case BPF_PROG_TYPE_LSM: 2136 case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ 2137 case BPF_PROG_TYPE_EXT: /* extends any prog */ 2138 return true; 2139 default: 2140 return false; 2141 } 2142} 2143 2144/* last field in 'union bpf_attr' used by this command */ 2145#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd 2146 2147static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) 2148{ 2149 enum bpf_prog_type type = attr->prog_type; 2150 struct bpf_prog *prog; 2151 int err; 2152 char license[128]; 2153 bool is_gpl; 2154 2155 if (CHECK_ATTR(BPF_PROG_LOAD)) { 2156 return -EINVAL; 2157 } 2158 2159 if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT | BPF_F_ANY_ALIGNMENT | BPF_F_TEST_STATE_FREQ | BPF_F_SLEEPABLE | 2160 BPF_F_TEST_RND_HI32)) { 2161 return -EINVAL; 2162 } 2163 2164 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && (attr->prog_flags & BPF_F_ANY_ALIGNMENT) && 2165 !bpf_capable()) { 2166 return -EPERM; 2167 } 2168 2169 /* copy eBPF program license from user space */ 2170 if (strncpy_from_user(license, u64_to_user_ptr(attr->license), sizeof(license) - 1) < 0) { 2171 return -EFAULT; 2172 } 2173 license[sizeof(license) - 1] = 0; 2174 2175 /* eBPF programs must be GPL compatible to use GPL-ed functions */ 2176 is_gpl = license_is_gpl_compatible(license); 2177 2178 if (attr->insn_cnt == 0 || attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) { 2179 return -E2BIG; 2180 } 2181 if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && !bpf_capable()) { 2182 return -EPERM; 2183 } 2184 2185 if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN)) { 2186 return -EPERM; 2187 } 2188 if (is_perfmon_prog_type(type) && !perfmon_capable()) { 2189 return -EPERM; 2190 } 2191 2192 bpf_prog_load_fixup_attach_type(attr); 2193 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, attr->attach_btf_id, attr->attach_prog_fd)) { 2194 return -EINVAL; 2195 } 2196 2197 /* plain bpf_prog allocation */ 2198 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER); 2199 if (!prog) { 2200 return -ENOMEM; 2201 } 2202 2203 prog->expected_attach_type = attr->expected_attach_type; 2204 prog->aux->attach_btf_id = attr->attach_btf_id; 2205 if (attr->attach_prog_fd) { 2206 struct bpf_prog *dst_prog; 2207 2208 dst_prog = bpf_prog_get(attr->attach_prog_fd); 2209 if (IS_ERR(dst_prog)) { 2210 err = PTR_ERR(dst_prog); 2211 goto free_prog_nouncharge; 2212 } 2213 prog->aux->dst_prog = dst_prog; 2214 } 2215 2216 prog->aux->offload_requested = !!attr->prog_ifindex; 2217 prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; 2218 2219 err = security_bpf_prog_alloc(prog->aux); 2220 if (err) { 2221 goto free_prog_nouncharge; 2222 } 2223 2224 err = bpf_prog_charge_memlock(prog); 2225 if (err) { 2226 goto free_prog_sec; 2227 } 2228 2229 prog->len = attr->insn_cnt; 2230 2231 err = -EFAULT; 2232 if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), bpf_prog_insn_size(prog)) != 0) { 2233 goto free_prog; 2234 } 2235 2236 prog->orig_prog = NULL; 2237 prog->jited = 0; 2238 2239 atomic64_set(&prog->aux->refcnt, 1); 2240 prog->gpl_compatible = is_gpl ? 1 : 0; 2241 2242 if (bpf_prog_is_dev_bound(prog->aux)) { 2243 err = bpf_prog_offload_init(prog, attr); 2244 if (err) { 2245 goto free_prog; 2246 } 2247 } 2248 2249 /* find program type: socket_filter vs tracing_filter */ 2250 err = find_prog_type(type, prog); 2251 if (err < 0) { 2252 goto free_prog; 2253 } 2254 2255 prog->aux->load_time = ktime_get_boottime_ns(); 2256 err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, sizeof(attr->prog_name)); 2257 if (err < 0) { 2258 goto free_prog; 2259 } 2260 2261 /* run eBPF verifier */ 2262 err = bpf_check(&prog, attr, uattr); 2263 if (err < 0) { 2264 goto free_used_maps; 2265 } 2266 2267 prog = bpf_prog_select_runtime(prog, &err); 2268 if (err < 0) { 2269 goto free_used_maps; 2270 } 2271 2272 err = bpf_prog_alloc_id(prog); 2273 if (err) { 2274 goto free_used_maps; 2275 } 2276 2277 /* Upon success of bpf_prog_alloc_id(), the BPF prog is 2278 * effectively publicly exposed. However, retrieving via 2279 * bpf_prog_get_fd_by_id() will take another reference, 2280 * therefore it cannot be gone underneath us. 2281 * 2282 * Only for the time /after/ successful bpf_prog_new_fd() 2283 * and before returning to userspace, we might just hold 2284 * one reference and any parallel close on that fd could 2285 * rip everything out. Hence, below notifications must 2286 * happen before bpf_prog_new_fd(). 2287 * 2288 * Also, any failure handling from this point onwards must 2289 * be using bpf_prog_put() given the program is exposed. 2290 */ 2291 bpf_prog_kallsyms_add(prog); 2292 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 2293 bpf_audit_prog(prog, BPF_AUDIT_LOAD); 2294 2295 err = bpf_prog_new_fd(prog); 2296 if (err < 0) { 2297 bpf_prog_put(prog); 2298 } 2299 return err; 2300 2301free_used_maps: 2302 /* In case we have subprogs, we need to wait for a grace 2303 * period before we can tear down JIT memory since symbols 2304 * are already exposed under kallsyms. 2305 */ 2306 _bpf_prog_put_noref(prog, prog->aux->func_cnt); 2307 return err; 2308free_prog: 2309 bpf_prog_uncharge_memlock(prog); 2310free_prog_sec: 2311 security_bpf_prog_free(prog->aux); 2312free_prog_nouncharge: 2313 bpf_prog_free(prog); 2314 return err; 2315} 2316 2317#define BPF_OBJ_LAST_FIELD file_flags 2318 2319static int bpf_obj_pin(const union bpf_attr *attr) 2320{ 2321 if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) { 2322 return -EINVAL; 2323 } 2324 2325 return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 2326} 2327 2328static int bpf_obj_get(const union bpf_attr *attr) 2329{ 2330 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || attr->file_flags & ~BPF_OBJ_FLAG_MASK) { 2331 return -EINVAL; 2332 } 2333 2334 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), attr->file_flags); 2335} 2336 2337void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, 2338 struct bpf_prog *prog) 2339{ 2340 atomic64_set(&link->refcnt, 1); 2341 link->type = type; 2342 link->id = 0; 2343 link->ops = ops; 2344 link->prog = prog; 2345} 2346 2347static void bpf_link_free_id(int id) 2348{ 2349 if (!id) { 2350 return; 2351 } 2352 2353 spin_lock_bh(&link_idr_lock); 2354 idr_remove(&link_idr, id); 2355 spin_unlock_bh(&link_idr_lock); 2356} 2357 2358/* Clean up bpf_link and corresponding anon_inode file and FD. After 2359 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 2360 * anon_inode's release() call. This helper marksbpf_link as 2361 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 2362 * is not decremented, it's the responsibility of a calling code that failed 2363 * to complete bpf_link initialization. 2364 */ 2365void bpf_link_cleanup(struct bpf_link_primer *primer) 2366{ 2367 primer->link->prog = NULL; 2368 bpf_link_free_id(primer->id); 2369 fput(primer->file); 2370 put_unused_fd(primer->fd); 2371} 2372 2373void bpf_link_inc(struct bpf_link *link) 2374{ 2375 atomic64_inc(&link->refcnt); 2376} 2377 2378/* bpf_link_free is guaranteed to be called from process context */ 2379static void bpf_link_free(struct bpf_link *link) 2380{ 2381 bpf_link_free_id(link->id); 2382 if (link->prog) { 2383 /* detach BPF program, clean up used resources */ 2384 link->ops->release(link); 2385 bpf_prog_put(link->prog); 2386 } 2387 /* free bpf_link and its containing memory */ 2388 link->ops->dealloc(link); 2389} 2390 2391static void bpf_link_put_deferred(struct work_struct *work) 2392{ 2393 struct bpf_link *link = container_of(work, struct bpf_link, work); 2394 2395 bpf_link_free(link); 2396} 2397 2398/* bpf_link_put can be called from atomic context, but ensures that resources 2399 * are freed from process context 2400 */ 2401void bpf_link_put(struct bpf_link *link) 2402{ 2403 if (!atomic64_dec_and_test(&link->refcnt)) { 2404 return; 2405 } 2406 2407 if (in_atomic()) { 2408 INIT_WORK(&link->work, bpf_link_put_deferred); 2409 schedule_work(&link->work); 2410 } else { 2411 bpf_link_free(link); 2412 } 2413} 2414 2415static int bpf_link_release(struct inode *inode, struct file *filp) 2416{ 2417 struct bpf_link *link = filp->private_data; 2418 2419 bpf_link_put(link); 2420 return 0; 2421} 2422 2423#ifdef CONFIG_PROC_FS 2424#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 2425#define BPF_MAP_TYPE(_id, _ops) 2426#define BPF_LINK_TYPE(_id, _name) [_id] = #_name, 2427static const char *bpf_link_type_strs[] = { 2428 [BPF_LINK_TYPE_UNSPEC] = "<invalid>", 2429#include <linux/bpf_types.h> 2430}; 2431#undef BPF_PROG_TYPE 2432#undef BPF_MAP_TYPE 2433#undef BPF_LINK_TYPE 2434 2435static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 2436{ 2437 const struct bpf_link *link = filp->private_data; 2438 const struct bpf_prog *prog = link->prog; 2439 char prog_tag[sizeof(prog->tag) * 0x2 + 1] = {}; 2440 2441 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2442 seq_printf(m, 2443 "link_type:\t%s\n" 2444 "link_id:\t%u\n" 2445 "prog_tag:\t%s\n" 2446 "prog_id:\t%u\n", 2447 bpf_link_type_strs[link->type], link->id, prog_tag, prog->aux->id); 2448 if (link->ops->show_fdinfo) { 2449 link->ops->show_fdinfo(link, m); 2450 } 2451} 2452#endif 2453 2454static const struct file_operations bpf_link_fops = { 2455#ifdef CONFIG_PROC_FS 2456 .show_fdinfo = bpf_link_show_fdinfo, 2457#endif 2458 .release = bpf_link_release, 2459 .read = bpf_dummy_read, 2460 .write = bpf_dummy_write, 2461}; 2462 2463static int bpf_link_alloc_id(struct bpf_link *link) 2464{ 2465 int id; 2466 2467 idr_preload(GFP_KERNEL); 2468 spin_lock_bh(&link_idr_lock); 2469 id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); 2470 spin_unlock_bh(&link_idr_lock); 2471 idr_preload_end(); 2472 2473 return id; 2474} 2475 2476/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, 2477 * reserving unused FD and allocating ID from link_idr. This is to be paired 2478 * with bpf_link_settle() to install FD and ID and expose bpf_link to 2479 * user-space, if bpf_link is successfully attached. If not, bpf_link and 2480 * pre-allocated resources are to be freed with bpf_cleanup() call. All the 2481 * transient state is passed around in struct bpf_link_primer. 2482 * This is preferred way to create and initialize bpf_link, especially when 2483 * there are complicated and expensive operations inbetween creating bpf_link 2484 * itself and attaching it to BPF hook. By using bpf_link_prime() and 2485 * bpf_link_settle() kernel code using bpf_link doesn't have to perform 2486 * expensive (and potentially failing) roll back operations in a rare case 2487 * that file, FD, or ID can't be allocated. 2488 */ 2489int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) 2490{ 2491 struct file *file; 2492 int fd, id; 2493 2494 fd = get_unused_fd_flags(O_CLOEXEC); 2495 if (fd < 0) { 2496 return fd; 2497 } 2498 2499 id = bpf_link_alloc_id(link); 2500 if (id < 0) { 2501 put_unused_fd(fd); 2502 return id; 2503 } 2504 2505 file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); 2506 if (IS_ERR(file)) { 2507 bpf_link_free_id(id); 2508 put_unused_fd(fd); 2509 return PTR_ERR(file); 2510 } 2511 2512 primer->link = link; 2513 primer->file = file; 2514 primer->fd = fd; 2515 primer->id = id; 2516 return 0; 2517} 2518 2519int bpf_link_settle(struct bpf_link_primer *primer) 2520{ 2521 /* make bpf_link fetchable by ID */ 2522 spin_lock_bh(&link_idr_lock); 2523 primer->link->id = primer->id; 2524 spin_unlock_bh(&link_idr_lock); 2525 /* make bpf_link fetchable by FD */ 2526 fd_install(primer->fd, primer->file); 2527 /* pass through installed FD */ 2528 return primer->fd; 2529} 2530 2531int bpf_link_new_fd(struct bpf_link *link) 2532{ 2533 return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); 2534} 2535 2536struct bpf_link *bpf_link_get_from_fd(u32 ufd) 2537{ 2538 struct fd f = fdget(ufd); 2539 struct bpf_link *link; 2540 2541 if (!f.file) { 2542 return ERR_PTR(-EBADF); 2543 } 2544 if (f.file->f_op != &bpf_link_fops) { 2545 fdput(f); 2546 return ERR_PTR(-EINVAL); 2547 } 2548 2549 link = f.file->private_data; 2550 bpf_link_inc(link); 2551 fdput(f); 2552 2553 return link; 2554} 2555 2556struct bpf_tracing_link { 2557 struct bpf_link link; 2558 enum bpf_attach_type attach_type; 2559 struct bpf_trampoline *trampoline; 2560 struct bpf_prog *tgt_prog; 2561}; 2562 2563static void bpf_tracing_link_release(struct bpf_link *link) 2564{ 2565 struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link); 2566 2567 WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog, tr_link->trampoline)); 2568 2569 bpf_trampoline_put(tr_link->trampoline); 2570 2571 /* tgt_prog is NULL if target is a kernel function */ 2572 if (tr_link->tgt_prog) { 2573 bpf_prog_put(tr_link->tgt_prog); 2574 } 2575} 2576 2577static void bpf_tracing_link_dealloc(struct bpf_link *link) 2578{ 2579 struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link); 2580 2581 kfree(tr_link); 2582} 2583 2584static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) 2585{ 2586 struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link); 2587 2588 seq_printf(seq, "attach_type:\t%d\n", tr_link->attach_type); 2589} 2590 2591static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) 2592{ 2593 struct bpf_tracing_link *tr_link = container_of(link, struct bpf_tracing_link, link); 2594 2595 info->tracing.attach_type = tr_link->attach_type; 2596 2597 return 0; 2598} 2599 2600static const struct bpf_link_ops bpf_tracing_link_lops = { 2601 .release = bpf_tracing_link_release, 2602 .dealloc = bpf_tracing_link_dealloc, 2603 .show_fdinfo = bpf_tracing_link_show_fdinfo, 2604 .fill_link_info = bpf_tracing_link_fill_link_info, 2605}; 2606 2607static int bpf_tracing_prog_attach(struct bpf_prog *prog, int tgt_prog_fd, u32 btf_id) 2608{ 2609 struct bpf_link_primer link_primer; 2610 struct bpf_prog *tgt_prog = NULL; 2611 struct bpf_trampoline *tr = NULL; 2612 struct bpf_tracing_link *link; 2613 u64 key = 0; 2614 int err; 2615 2616 switch (prog->type) { 2617 case BPF_PROG_TYPE_TRACING: 2618 if (prog->expected_attach_type != BPF_TRACE_FENTRY && prog->expected_attach_type != BPF_TRACE_FEXIT && 2619 prog->expected_attach_type != BPF_MODIFY_RETURN) { 2620 err = -EINVAL; 2621 goto out_put_prog; 2622 } 2623 break; 2624 case BPF_PROG_TYPE_EXT: 2625 if (prog->expected_attach_type != 0) { 2626 err = -EINVAL; 2627 goto out_put_prog; 2628 } 2629 break; 2630 case BPF_PROG_TYPE_LSM: 2631 if (prog->expected_attach_type != BPF_LSM_MAC) { 2632 err = -EINVAL; 2633 goto out_put_prog; 2634 } 2635 break; 2636 default: 2637 err = -EINVAL; 2638 goto out_put_prog; 2639 } 2640 2641 if (!!tgt_prog_fd != !!btf_id) { 2642 err = -EINVAL; 2643 goto out_put_prog; 2644 } 2645 2646 if (tgt_prog_fd) { 2647 /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ 2648 if (prog->type != BPF_PROG_TYPE_EXT) { 2649 err = -EINVAL; 2650 goto out_put_prog; 2651 } 2652 2653 tgt_prog = bpf_prog_get(tgt_prog_fd); 2654 if (IS_ERR(tgt_prog)) { 2655 err = PTR_ERR(tgt_prog); 2656 tgt_prog = NULL; 2657 goto out_put_prog; 2658 } 2659 2660 key = bpf_trampoline_compute_key(tgt_prog, btf_id); 2661 } 2662 2663 link = kzalloc(sizeof(*link), GFP_USER); 2664 if (!link) { 2665 err = -ENOMEM; 2666 goto out_put_prog; 2667 } 2668 bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING, &bpf_tracing_link_lops, prog); 2669 link->attach_type = prog->expected_attach_type; 2670 2671 mutex_lock(&prog->aux->dst_mutex); 2672 2673 /* There are a few possible cases here: 2674 * 2675 * - if prog->aux->dst_trampoline is set, the program was just loaded 2676 * and not yet attached to anything, so we can use the values stored 2677 * in prog->aux 2678 * 2679 * - if prog->aux->dst_trampoline is NULL, the program has already been 2680 * attached to a target and its initial target was cleared (below) 2681 * 2682 * - if tgt_prog != NULL, the caller specified tgt_prog_fd + 2683 * target_btf_id using the link_create API. 2684 * 2685 * - if tgt_prog == NULL when this function was called using the old 2686 * raw_tracepoint_open API, and we need a target from prog->aux 2687 * 2688 * The combination of no saved target in prog->aux, and no target 2689 * specified on load is illegal, and we reject that here. 2690 */ 2691 if (!prog->aux->dst_trampoline && !tgt_prog) { 2692 err = -ENOENT; 2693 goto out_unlock; 2694 } 2695 2696 if (!prog->aux->dst_trampoline || (key && key != prog->aux->dst_trampoline->key)) { 2697 /* If there is no saved target, or the specified target is 2698 * different from the destination specified at load time, we 2699 * need a new trampoline and a check for compatibility 2700 */ 2701 struct bpf_attach_target_info tgt_info = {}; 2702 2703 err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, &tgt_info); 2704 if (err) { 2705 goto out_unlock; 2706 } 2707 2708 tr = bpf_trampoline_get(key, &tgt_info); 2709 if (!tr) { 2710 err = -ENOMEM; 2711 goto out_unlock; 2712 } 2713 } else { 2714 /* The caller didn't specify a target, or the target was the 2715 * same as the destination supplied during program load. This 2716 * means we can reuse the trampoline and reference from program 2717 * load time, and there is no need to allocate a new one. This 2718 * can only happen once for any program, as the saved values in 2719 * prog->aux are cleared below. 2720 */ 2721 tr = prog->aux->dst_trampoline; 2722 tgt_prog = prog->aux->dst_prog; 2723 } 2724 2725 err = bpf_link_prime(&link->link, &link_primer); 2726 if (err) { 2727 goto out_unlock; 2728 } 2729 2730 err = bpf_trampoline_link_prog(prog, tr); 2731 if (err) { 2732 bpf_link_cleanup(&link_primer); 2733 link = NULL; 2734 goto out_unlock; 2735 } 2736 2737 link->tgt_prog = tgt_prog; 2738 link->trampoline = tr; 2739 2740 /* Always clear the trampoline and target prog from prog->aux to make 2741 * sure the original attach destination is not kept alive after a 2742 * program is (re-)attached to another target. 2743 */ 2744 if (prog->aux->dst_prog && (tgt_prog_fd || tr != prog->aux->dst_trampoline)) { 2745 /* got extra prog ref from syscall, or attaching to different prog */ 2746 bpf_prog_put(prog->aux->dst_prog); 2747 } 2748 if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline) { 2749 /* we allocated a new trampoline, so free the old one */ 2750 bpf_trampoline_put(prog->aux->dst_trampoline); 2751 } 2752 2753 prog->aux->dst_prog = NULL; 2754 prog->aux->dst_trampoline = NULL; 2755 mutex_unlock(&prog->aux->dst_mutex); 2756 2757 return bpf_link_settle(&link_primer); 2758out_unlock: 2759 if (tr && tr != prog->aux->dst_trampoline) { 2760 bpf_trampoline_put(tr); 2761 } 2762 mutex_unlock(&prog->aux->dst_mutex); 2763 kfree(link); 2764out_put_prog: 2765 if (tgt_prog_fd && tgt_prog) { 2766 bpf_prog_put(tgt_prog); 2767 } 2768 return err; 2769} 2770 2771struct bpf_raw_tp_link { 2772 struct bpf_link link; 2773 struct bpf_raw_event_map *btp; 2774}; 2775 2776static void bpf_raw_tp_link_release(struct bpf_link *link) 2777{ 2778 struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link); 2779 2780 bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog); 2781 bpf_put_raw_tracepoint(raw_tp->btp); 2782} 2783 2784static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 2785{ 2786 struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link); 2787 2788 kfree(raw_tp); 2789} 2790 2791static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) 2792{ 2793 struct bpf_raw_tp_link *raw_tp_link = container_of(link, struct bpf_raw_tp_link, link); 2794 2795 seq_printf(seq, "tp_name:\t%s\n", raw_tp_link->btp->tp->name); 2796} 2797 2798static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) 2799{ 2800 struct bpf_raw_tp_link *raw_tp_link = container_of(link, struct bpf_raw_tp_link, link); 2801 char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); 2802 const char *tp_name = raw_tp_link->btp->tp->name; 2803 u32 ulen = info->raw_tracepoint.tp_name_len; 2804 size_t tp_len = strlen(tp_name); 2805 2806 if (!ulen ^ !ubuf) { 2807 return -EINVAL; 2808 } 2809 2810 info->raw_tracepoint.tp_name_len = tp_len + 1; 2811 2812 if (!ubuf) { 2813 return 0; 2814 } 2815 2816 if (ulen >= tp_len + 1) { 2817 if (copy_to_user(ubuf, tp_name, tp_len + 1)) { 2818 return -EFAULT; 2819 } 2820 } else { 2821 char zero = '\0'; 2822 2823 if (copy_to_user(ubuf, tp_name, ulen - 1)) { 2824 return -EFAULT; 2825 } 2826 if (put_user(zero, ubuf + ulen - 1)) { 2827 return -EFAULT; 2828 } 2829 return -ENOSPC; 2830 } 2831 2832 return 0; 2833} 2834 2835static const struct bpf_link_ops bpf_raw_tp_link_lops = { 2836 .release = bpf_raw_tp_link_release, 2837 .dealloc = bpf_raw_tp_link_dealloc, 2838 .show_fdinfo = bpf_raw_tp_link_show_fdinfo, 2839 .fill_link_info = bpf_raw_tp_link_fill_link_info, 2840}; 2841 2842#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd 2843 2844static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 2845{ 2846 struct bpf_link_primer link_primer; 2847 struct bpf_raw_tp_link *link; 2848 struct bpf_raw_event_map *btp; 2849 struct bpf_prog *prog; 2850 const char *tp_name; 2851 char buf[128]; 2852 int err; 2853 2854 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) { 2855 return -EINVAL; 2856 } 2857 2858 prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); 2859 if (IS_ERR(prog)) { 2860 return PTR_ERR(prog); 2861 } 2862 2863 switch (prog->type) { 2864 case BPF_PROG_TYPE_TRACING: 2865 case BPF_PROG_TYPE_EXT: 2866 case BPF_PROG_TYPE_LSM: 2867 if (attr->raw_tracepoint.name) { 2868 /* The attach point for this category of programs 2869 * should be specified via btf_id during program load. 2870 */ 2871 err = -EINVAL; 2872 goto out_put_prog; 2873 } 2874 if (prog->type == BPF_PROG_TYPE_TRACING && prog->expected_attach_type == BPF_TRACE_RAW_TP) { 2875 tp_name = prog->aux->attach_func_name; 2876 break; 2877 } 2878 err = bpf_tracing_prog_attach(prog, 0, 0); 2879 if (err >= 0) { 2880 return err; 2881 } 2882 goto out_put_prog; 2883 case BPF_PROG_TYPE_RAW_TRACEPOINT: 2884 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 2885 if (strncpy_from_user(buf, u64_to_user_ptr(attr->raw_tracepoint.name), sizeof(buf) - 1) < 0) { 2886 err = -EFAULT; 2887 goto out_put_prog; 2888 } 2889 buf[sizeof(buf) - 1] = 0; 2890 tp_name = buf; 2891 break; 2892 default: 2893 err = -EINVAL; 2894 goto out_put_prog; 2895 } 2896 2897 btp = bpf_get_raw_tracepoint(tp_name); 2898 if (!btp) { 2899 err = -ENOENT; 2900 goto out_put_prog; 2901 } 2902 2903 link = kzalloc(sizeof(*link), GFP_USER); 2904 if (!link) { 2905 err = -ENOMEM; 2906 goto out_put_btp; 2907 } 2908 bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, &bpf_raw_tp_link_lops, prog); 2909 link->btp = btp; 2910 2911 err = bpf_link_prime(&link->link, &link_primer); 2912 if (err) { 2913 kfree(link); 2914 goto out_put_btp; 2915 } 2916 2917 err = bpf_probe_register(link->btp, prog); 2918 if (err) { 2919 bpf_link_cleanup(&link_primer); 2920 goto out_put_btp; 2921 } 2922 2923 return bpf_link_settle(&link_primer); 2924 2925out_put_btp: 2926 bpf_put_raw_tracepoint(btp); 2927out_put_prog: 2928 bpf_prog_put(prog); 2929 return err; 2930} 2931 2932static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, enum bpf_attach_type attach_type) 2933{ 2934 switch (prog->type) { 2935 case BPF_PROG_TYPE_CGROUP_SOCK: 2936 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 2937 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 2938 case BPF_PROG_TYPE_SK_LOOKUP: 2939 return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 2940 case BPF_PROG_TYPE_CGROUP_SKB: 2941 if (!capable(CAP_NET_ADMIN)) { 2942 /* cg-skb progs can be loaded by unpriv user. 2943 * check permissions at attach time. 2944 */ 2945 return -EPERM; 2946 } 2947 return prog->enforce_expected_attach_type && prog->expected_attach_type != attach_type ? -EINVAL : 0; 2948 default: 2949 return 0; 2950 } 2951} 2952 2953static enum bpf_prog_type attach_type_to_prog_type(enum bpf_attach_type attach_type) 2954{ 2955 switch (attach_type) { 2956 case BPF_CGROUP_INET_INGRESS: 2957 case BPF_CGROUP_INET_EGRESS: 2958 return BPF_PROG_TYPE_CGROUP_SKB; 2959 case BPF_CGROUP_INET_SOCK_CREATE: 2960 case BPF_CGROUP_INET_SOCK_RELEASE: 2961 case BPF_CGROUP_INET4_POST_BIND: 2962 case BPF_CGROUP_INET6_POST_BIND: 2963 return BPF_PROG_TYPE_CGROUP_SOCK; 2964 case BPF_CGROUP_INET4_BIND: 2965 case BPF_CGROUP_INET6_BIND: 2966 case BPF_CGROUP_INET4_CONNECT: 2967 case BPF_CGROUP_INET6_CONNECT: 2968 case BPF_CGROUP_INET4_GETPEERNAME: 2969 case BPF_CGROUP_INET6_GETPEERNAME: 2970 case BPF_CGROUP_INET4_GETSOCKNAME: 2971 case BPF_CGROUP_INET6_GETSOCKNAME: 2972 case BPF_CGROUP_UDP4_SENDMSG: 2973 case BPF_CGROUP_UDP6_SENDMSG: 2974 case BPF_CGROUP_UDP4_RECVMSG: 2975 case BPF_CGROUP_UDP6_RECVMSG: 2976 return BPF_PROG_TYPE_CGROUP_SOCK_ADDR; 2977 case BPF_CGROUP_SOCK_OPS: 2978 return BPF_PROG_TYPE_SOCK_OPS; 2979 case BPF_CGROUP_DEVICE: 2980 return BPF_PROG_TYPE_CGROUP_DEVICE; 2981 case BPF_SK_MSG_VERDICT: 2982 return BPF_PROG_TYPE_SK_MSG; 2983 case BPF_SK_SKB_STREAM_PARSER: 2984 case BPF_SK_SKB_STREAM_VERDICT: 2985 return BPF_PROG_TYPE_SK_SKB; 2986 case BPF_LIRC_MODE2: 2987 return BPF_PROG_TYPE_LIRC_MODE2; 2988 case BPF_FLOW_DISSECTOR: 2989 return BPF_PROG_TYPE_FLOW_DISSECTOR; 2990 case BPF_CGROUP_SYSCTL: 2991 return BPF_PROG_TYPE_CGROUP_SYSCTL; 2992 case BPF_CGROUP_GETSOCKOPT: 2993 case BPF_CGROUP_SETSOCKOPT: 2994 return BPF_PROG_TYPE_CGROUP_SOCKOPT; 2995 case BPF_TRACE_ITER: 2996 return BPF_PROG_TYPE_TRACING; 2997 case BPF_SK_LOOKUP: 2998 return BPF_PROG_TYPE_SK_LOOKUP; 2999 case BPF_XDP: 3000 return BPF_PROG_TYPE_XDP; 3001 default: 3002 return BPF_PROG_TYPE_UNSPEC; 3003 } 3004} 3005 3006#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd 3007 3008#define BPF_F_ATTACH_MASK (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE) 3009 3010static int bpf_prog_attach(const union bpf_attr *attr) 3011{ 3012 enum bpf_prog_type ptype; 3013 struct bpf_prog *prog; 3014 int ret; 3015 3016 if (CHECK_ATTR(BPF_PROG_ATTACH)) { 3017 return -EINVAL; 3018 } 3019 3020 if (attr->attach_flags & ~BPF_F_ATTACH_MASK) { 3021 return -EINVAL; 3022 } 3023 3024 ptype = attach_type_to_prog_type(attr->attach_type); 3025 if (ptype == BPF_PROG_TYPE_UNSPEC) { 3026 return -EINVAL; 3027 } 3028 3029 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 3030 if (IS_ERR(prog)) { 3031 return PTR_ERR(prog); 3032 } 3033 3034 if (bpf_prog_attach_check_attach_type(prog, attr->attach_type)) { 3035 bpf_prog_put(prog); 3036 return -EINVAL; 3037 } 3038 3039 switch (ptype) { 3040 case BPF_PROG_TYPE_SK_SKB: 3041 case BPF_PROG_TYPE_SK_MSG: 3042 ret = sock_map_get_from_fd(attr, prog); 3043 break; 3044 case BPF_PROG_TYPE_LIRC_MODE2: 3045 ret = lirc_prog_attach(attr, prog); 3046 break; 3047 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3048 ret = netns_bpf_prog_attach(attr, prog); 3049 break; 3050 case BPF_PROG_TYPE_CGROUP_DEVICE: 3051 case BPF_PROG_TYPE_CGROUP_SKB: 3052 case BPF_PROG_TYPE_CGROUP_SOCK: 3053 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3054 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3055 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3056 case BPF_PROG_TYPE_SOCK_OPS: 3057 ret = cgroup_bpf_prog_attach(attr, ptype, prog); 3058 break; 3059 default: 3060 ret = -EINVAL; 3061 } 3062 3063 if (ret) { 3064 bpf_prog_put(prog); 3065 } 3066 return ret; 3067} 3068 3069#define BPF_PROG_DETACH_LAST_FIELD attach_type 3070 3071static int bpf_prog_detach(const union bpf_attr *attr) 3072{ 3073 enum bpf_prog_type ptype; 3074 3075 if (CHECK_ATTR(BPF_PROG_DETACH)) { 3076 return -EINVAL; 3077 } 3078 3079 ptype = attach_type_to_prog_type(attr->attach_type); 3080 3081 switch (ptype) { 3082 case BPF_PROG_TYPE_SK_MSG: 3083 case BPF_PROG_TYPE_SK_SKB: 3084 return sock_map_prog_detach(attr, ptype); 3085 case BPF_PROG_TYPE_LIRC_MODE2: 3086 return lirc_prog_detach(attr); 3087 case BPF_PROG_TYPE_FLOW_DISSECTOR: 3088 return netns_bpf_prog_detach(attr, ptype); 3089 case BPF_PROG_TYPE_CGROUP_DEVICE: 3090 case BPF_PROG_TYPE_CGROUP_SKB: 3091 case BPF_PROG_TYPE_CGROUP_SOCK: 3092 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3093 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3094 case BPF_PROG_TYPE_CGROUP_SYSCTL: 3095 case BPF_PROG_TYPE_SOCK_OPS: 3096 return cgroup_bpf_prog_detach(attr, ptype); 3097 default: 3098 return -EINVAL; 3099 } 3100} 3101 3102#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 3103 3104static int bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) 3105{ 3106 if (!capable(CAP_NET_ADMIN)) { 3107 return -EPERM; 3108 } 3109 if (CHECK_ATTR(BPF_PROG_QUERY)) { 3110 return -EINVAL; 3111 } 3112 if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) { 3113 return -EINVAL; 3114 } 3115 3116 switch (attr->query.attach_type) { 3117 case BPF_CGROUP_INET_INGRESS: 3118 case BPF_CGROUP_INET_EGRESS: 3119 case BPF_CGROUP_INET_SOCK_CREATE: 3120 case BPF_CGROUP_INET_SOCK_RELEASE: 3121 case BPF_CGROUP_INET4_BIND: 3122 case BPF_CGROUP_INET6_BIND: 3123 case BPF_CGROUP_INET4_POST_BIND: 3124 case BPF_CGROUP_INET6_POST_BIND: 3125 case BPF_CGROUP_INET4_CONNECT: 3126 case BPF_CGROUP_INET6_CONNECT: 3127 case BPF_CGROUP_INET4_GETPEERNAME: 3128 case BPF_CGROUP_INET6_GETPEERNAME: 3129 case BPF_CGROUP_INET4_GETSOCKNAME: 3130 case BPF_CGROUP_INET6_GETSOCKNAME: 3131 case BPF_CGROUP_UDP4_SENDMSG: 3132 case BPF_CGROUP_UDP6_SENDMSG: 3133 case BPF_CGROUP_UDP4_RECVMSG: 3134 case BPF_CGROUP_UDP6_RECVMSG: 3135 case BPF_CGROUP_SOCK_OPS: 3136 case BPF_CGROUP_DEVICE: 3137 case BPF_CGROUP_SYSCTL: 3138 case BPF_CGROUP_GETSOCKOPT: 3139 case BPF_CGROUP_SETSOCKOPT: 3140 return cgroup_bpf_prog_query(attr, uattr); 3141 case BPF_LIRC_MODE2: 3142 return lirc_prog_query(attr, uattr); 3143 case BPF_FLOW_DISSECTOR: 3144 case BPF_SK_LOOKUP: 3145 return netns_bpf_prog_query(attr, uattr); 3146 default: 3147 return -EINVAL; 3148 } 3149} 3150 3151#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu 3152 3153static int bpf_prog_test_run(const union bpf_attr *attr, union bpf_attr __user *uattr) 3154{ 3155 struct bpf_prog *prog; 3156 int ret = -ENOTSUPP; 3157 3158 if (CHECK_ATTR(BPF_PROG_TEST_RUN)) { 3159 return -EINVAL; 3160 } 3161 3162 if ((attr->test.ctx_size_in && !attr->test.ctx_in) || (!attr->test.ctx_size_in && attr->test.ctx_in)) { 3163 return -EINVAL; 3164 } 3165 3166 if ((attr->test.ctx_size_out && !attr->test.ctx_out) || (!attr->test.ctx_size_out && attr->test.ctx_out)) { 3167 return -EINVAL; 3168 } 3169 3170 prog = bpf_prog_get(attr->test.prog_fd); 3171 if (IS_ERR(prog)) { 3172 return PTR_ERR(prog); 3173 } 3174 3175 if (prog->aux->ops->test_run) { 3176 ret = prog->aux->ops->test_run(prog, attr, uattr); 3177 } 3178 3179 bpf_prog_put(prog); 3180 return ret; 3181} 3182 3183#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id 3184 3185static int bpf_obj_get_next_id(const union bpf_attr *attr, union bpf_attr __user *uattr, struct idr *idr, 3186 spinlock_t *lock) 3187{ 3188 u32 next_id = attr->start_id; 3189 int err = 0; 3190 3191 if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX) { 3192 return -EINVAL; 3193 } 3194 3195 if (!capable(CAP_SYS_ADMIN)) { 3196 return -EPERM; 3197 } 3198 3199 next_id++; 3200 spin_lock_bh(lock); 3201 if (!idr_get_next(idr, &next_id)) { 3202 err = -ENOENT; 3203 } 3204 spin_unlock_bh(lock); 3205 3206 if (!err) { 3207 err = put_user(next_id, &uattr->next_id); 3208 } 3209 3210 return err; 3211} 3212 3213struct bpf_map *bpf_map_get_curr_or_next(u32 *id) 3214{ 3215 struct bpf_map *map; 3216 3217 spin_lock_bh(&map_idr_lock); 3218 3219 while (1) { 3220 map = idr_get_next(&map_idr, id); 3221 if (map) { 3222 map = _bpf_map_inc_not_zero(map, false); 3223 if (IS_ERR(map)) { 3224 (*id)++; 3225 continue; 3226 } 3227 } 3228 break; 3229 } 3230 spin_unlock_bh(&map_idr_lock); 3231 3232 return map; 3233} 3234 3235struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id) 3236{ 3237 struct bpf_prog *prog; 3238 3239 spin_lock_bh(&prog_idr_lock); 3240 while (1) { 3241 prog = idr_get_next(&prog_idr, id); 3242 if (prog) { 3243 prog = bpf_prog_inc_not_zero(prog); 3244 if (IS_ERR(prog)) { 3245 (*id)++; 3246 continue; 3247 } 3248 } 3249 break; 3250 } 3251 spin_unlock_bh(&prog_idr_lock); 3252 3253 return prog; 3254} 3255 3256#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 3257 3258struct bpf_prog *bpf_prog_by_id(u32 id) 3259{ 3260 struct bpf_prog *prog; 3261 3262 if (!id) { 3263 return ERR_PTR(-ENOENT); 3264 } 3265 3266 spin_lock_bh(&prog_idr_lock); 3267 prog = idr_find(&prog_idr, id); 3268 if (prog) { 3269 prog = bpf_prog_inc_not_zero(prog); 3270 } else { 3271 prog = ERR_PTR(-ENOENT); 3272 } 3273 spin_unlock_bh(&prog_idr_lock); 3274 return prog; 3275} 3276 3277static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 3278{ 3279 struct bpf_prog *prog; 3280 u32 id = attr->prog_id; 3281 int fd; 3282 3283 if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) { 3284 return -EINVAL; 3285 } 3286 3287 if (!capable(CAP_SYS_ADMIN)) { 3288 return -EPERM; 3289 } 3290 3291 prog = bpf_prog_by_id(id); 3292 if (IS_ERR(prog)) { 3293 return PTR_ERR(prog); 3294 } 3295 3296 fd = bpf_prog_new_fd(prog); 3297 if (fd < 0) { 3298 bpf_prog_put(prog); 3299 } 3300 3301 return fd; 3302} 3303 3304#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags 3305 3306static int bpf_map_get_fd_by_id(const union bpf_attr *attr) 3307{ 3308 struct bpf_map *map; 3309 u32 id = attr->map_id; 3310 int f_flags; 3311 int fd; 3312 3313 if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) || attr->open_flags & ~BPF_OBJ_FLAG_MASK) { 3314 return -EINVAL; 3315 } 3316 3317 if (!capable(CAP_SYS_ADMIN)) { 3318 return -EPERM; 3319 } 3320 3321 f_flags = bpf_get_file_flag(attr->open_flags); 3322 if (f_flags < 0) { 3323 return f_flags; 3324 } 3325 3326 spin_lock_bh(&map_idr_lock); 3327 map = idr_find(&map_idr, id); 3328 if (map) { 3329 map = _bpf_map_inc_not_zero(map, true); 3330 } else { 3331 map = ERR_PTR(-ENOENT); 3332 } 3333 spin_unlock_bh(&map_idr_lock); 3334 3335 if (IS_ERR(map)) { 3336 return PTR_ERR(map); 3337 } 3338 3339 fd = bpf_map_new_fd(map, f_flags); 3340 if (fd < 0) { 3341 bpf_map_put_with_uref(map); 3342 } 3343 3344 return fd; 3345} 3346 3347static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, unsigned long addr, u32 *off, u32 *type) 3348{ 3349 const struct bpf_map *map; 3350 int i; 3351 3352 mutex_lock(&prog->aux->used_maps_mutex); 3353 for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { 3354 map = prog->aux->used_maps[i]; 3355 if (map == (void *)addr) { 3356 *type = BPF_PSEUDO_MAP_FD; 3357 goto out; 3358 } 3359 if (!map->ops->map_direct_value_meta) { 3360 continue; 3361 } 3362 if (!map->ops->map_direct_value_meta(map, addr, off)) { 3363 *type = BPF_PSEUDO_MAP_VALUE; 3364 goto out; 3365 } 3366 } 3367 map = NULL; 3368 3369out: 3370 mutex_unlock(&prog->aux->used_maps_mutex); 3371 return map; 3372} 3373 3374static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, const struct cred *f_cred) 3375{ 3376 const struct bpf_map *map; 3377 struct bpf_insn *insns; 3378 u32 off, type; 3379 u64 imm; 3380 u8 code; 3381 int i; 3382 3383 insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog), GFP_USER); 3384 if (!insns) { 3385 return insns; 3386 } 3387 3388 for (i = 0; i < prog->len; i++) { 3389 code = insns[i].code; 3390 3391 if (code == (BPF_JMP | BPF_TAIL_CALL)) { 3392 insns[i].code = BPF_JMP | BPF_CALL; 3393 insns[i].imm = BPF_FUNC_tail_call; 3394 /* fall-through */ 3395 } 3396 if (code == (BPF_JMP | BPF_CALL) || code == (BPF_JMP | BPF_CALL_ARGS)) { 3397 if (code == (BPF_JMP | BPF_CALL_ARGS)) { 3398 insns[i].code = BPF_JMP | BPF_CALL; 3399 } 3400 if (!bpf_dump_raw_ok(f_cred)) { 3401 insns[i].imm = 0; 3402 } 3403 continue; 3404 } 3405 if (BPF_CLASS(code) == BPF_LDX && BPF_MODE(code) == BPF_PROBE_MEM) { 3406 insns[i].code = BPF_LDX | BPF_SIZE(code) | BPF_MEM; 3407 continue; 3408 } 3409 3410 if (code != (BPF_LD | BPF_IMM | BPF_DW)) { 3411 continue; 3412 } 3413 3414 imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; 3415 map = bpf_map_from_imm(prog, imm, &off, &type); 3416 if (map) { 3417 insns[i].src_reg = type; 3418 insns[i].imm = map->id; 3419 insns[i + 1].imm = off; 3420 continue; 3421 } 3422 } 3423 3424 return insns; 3425} 3426 3427static int set_info_rec_size(struct bpf_prog_info *info) 3428{ 3429 /* 3430 * Ensure info.*_rec_size is the same as kernel expected size 3431 * 3432 * or 3433 * 3434 * Only allow zero *_rec_size if both _rec_size and _cnt are 3435 * zero. In this case, the kernel will set the expected 3436 * _rec_size back to the info. 3437 */ 3438 3439 if ((info->nr_func_info || info->func_info_rec_size) && info->func_info_rec_size != sizeof(struct bpf_func_info)) { 3440 return -EINVAL; 3441 } 3442 3443 if ((info->nr_line_info || info->line_info_rec_size) && info->line_info_rec_size != sizeof(struct bpf_line_info)) { 3444 return -EINVAL; 3445 } 3446 3447 if ((info->nr_jited_line_info || info->jited_line_info_rec_size) && 3448 info->jited_line_info_rec_size != sizeof(__u64)) { 3449 return -EINVAL; 3450 } 3451 3452 info->func_info_rec_size = sizeof(struct bpf_func_info); 3453 info->line_info_rec_size = sizeof(struct bpf_line_info); 3454 info->jited_line_info_rec_size = sizeof(__u64); 3455 3456 return 0; 3457} 3458 3459static int bpf_prog_get_info_by_fd(struct file *file, struct bpf_prog *prog, const union bpf_attr *attr, 3460 union bpf_attr __user *uattr) 3461{ 3462 struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3463 struct bpf_prog_info info; 3464 u32 info_len = attr->info.info_len; 3465 struct bpf_prog_stats stats; 3466 char __user *uinsns; 3467 u32 ulen; 3468 int err; 3469 3470 err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len); 3471 if (err) { 3472 return err; 3473 } 3474 info_len = min_t(u32, sizeof(info), info_len); 3475 3476 memset(&info, 0, sizeof(info)); 3477 if (copy_from_user(&info, uinfo, info_len)) { 3478 return -EFAULT; 3479 } 3480 3481 info.type = prog->type; 3482 info.id = prog->aux->id; 3483 info.load_time = prog->aux->load_time; 3484 info.created_by_uid = from_kuid_munged(current_user_ns(), prog->aux->user->uid); 3485 info.gpl_compatible = prog->gpl_compatible; 3486 3487 memcpy(info.tag, prog->tag, sizeof(prog->tag)); 3488 memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); 3489 3490 mutex_lock(&prog->aux->used_maps_mutex); 3491 ulen = info.nr_map_ids; 3492 info.nr_map_ids = prog->aux->used_map_cnt; 3493 ulen = min_t(u32, info.nr_map_ids, ulen); 3494 if (ulen) { 3495 u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); 3496 u32 i; 3497 3498 for (i = 0; i < ulen; i++) { 3499 if (put_user(prog->aux->used_maps[i]->id, &user_map_ids[i])) { 3500 mutex_unlock(&prog->aux->used_maps_mutex); 3501 return -EFAULT; 3502 } 3503 } 3504 } 3505 mutex_unlock(&prog->aux->used_maps_mutex); 3506 3507 err = set_info_rec_size(&info); 3508 if (err) { 3509 return err; 3510 } 3511 3512 bpf_prog_get_stats(prog, &stats); 3513 info.run_time_ns = stats.nsecs; 3514 info.run_cnt = stats.cnt; 3515 3516 if (!bpf_capable()) { 3517 info.jited_prog_len = 0; 3518 info.xlated_prog_len = 0; 3519 info.nr_jited_ksyms = 0; 3520 info.nr_jited_func_lens = 0; 3521 info.nr_func_info = 0; 3522 info.nr_line_info = 0; 3523 info.nr_jited_line_info = 0; 3524 goto done; 3525 } 3526 3527 ulen = info.xlated_prog_len; 3528 info.xlated_prog_len = bpf_prog_insn_size(prog); 3529 if (info.xlated_prog_len && ulen) { 3530 struct bpf_insn *insns_sanitized; 3531 bool fault; 3532 3533 if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) { 3534 info.xlated_prog_insns = 0; 3535 goto done; 3536 } 3537 insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred); 3538 if (!insns_sanitized) { 3539 return -ENOMEM; 3540 } 3541 uinsns = u64_to_user_ptr(info.xlated_prog_insns); 3542 ulen = min_t(u32, info.xlated_prog_len, ulen); 3543 fault = copy_to_user(uinsns, insns_sanitized, ulen); 3544 kfree(insns_sanitized); 3545 if (fault) { 3546 return -EFAULT; 3547 } 3548 } 3549 3550 if (bpf_prog_is_dev_bound(prog->aux)) { 3551 err = bpf_prog_offload_info_fill(&info, prog); 3552 if (err) { 3553 return err; 3554 } 3555 goto done; 3556 } 3557 3558 /* NOTE: the following code is supposed to be skipped for offload. 3559 * bpf_prog_offload_info_fill() is the place to fill similar fields 3560 * for offload. 3561 */ 3562 ulen = info.jited_prog_len; 3563 if (prog->aux->func_cnt) { 3564 u32 i; 3565 3566 info.jited_prog_len = 0; 3567 for (i = 0; i < prog->aux->func_cnt; i++) { 3568 info.jited_prog_len += prog->aux->func[i]->jited_len; 3569 } 3570 } else { 3571 info.jited_prog_len = prog->jited_len; 3572 } 3573 3574 if (info.jited_prog_len && ulen) { 3575 if (bpf_dump_raw_ok(file->f_cred)) { 3576 uinsns = u64_to_user_ptr(info.jited_prog_insns); 3577 ulen = min_t(u32, info.jited_prog_len, ulen); 3578 3579 /* for multi-function programs, copy the JITed 3580 * instructions for all the functions 3581 */ 3582 if (prog->aux->func_cnt) { 3583 u32 len, free, i; 3584 u8 *img; 3585 3586 free = ulen; 3587 for (i = 0; i < prog->aux->func_cnt; i++) { 3588 len = prog->aux->func[i]->jited_len; 3589 len = min_t(u32, len, free); 3590 img = (u8 *)prog->aux->func[i]->bpf_func; 3591 if (copy_to_user(uinsns, img, len)) { 3592 return -EFAULT; 3593 } 3594 uinsns += len; 3595 free -= len; 3596 if (!free) { 3597 break; 3598 } 3599 } 3600 } else { 3601 if (copy_to_user(uinsns, prog->bpf_func, ulen)) { 3602 return -EFAULT; 3603 } 3604 } 3605 } else { 3606 info.jited_prog_insns = 0; 3607 } 3608 } 3609 3610 ulen = info.nr_jited_ksyms; 3611 info.nr_jited_ksyms = prog->aux->func_cnt ?: 1; 3612 if (ulen) { 3613 if (bpf_dump_raw_ok(file->f_cred)) { 3614 unsigned long ksym_addr; 3615 u64 __user *user_ksyms; 3616 u32 i; 3617 3618 /* copy the address of the kernel symbol 3619 * corresponding to each function 3620 */ 3621 ulen = min_t(u32, info.nr_jited_ksyms, ulen); 3622 user_ksyms = u64_to_user_ptr(info.jited_ksyms); 3623 if (prog->aux->func_cnt) { 3624 for (i = 0; i < ulen; i++) { 3625 ksym_addr = (unsigned long)prog->aux->func[i]->bpf_func; 3626 if (put_user((u64)ksym_addr, &user_ksyms[i])) { 3627 return -EFAULT; 3628 } 3629 } 3630 } else { 3631 ksym_addr = (unsigned long)prog->bpf_func; 3632 if (put_user((u64)ksym_addr, &user_ksyms[0])) { 3633 return -EFAULT; 3634 } 3635 } 3636 } else { 3637 info.jited_ksyms = 0; 3638 } 3639 } 3640 3641 ulen = info.nr_jited_func_lens; 3642 info.nr_jited_func_lens = prog->aux->func_cnt ?: 1; 3643 if (ulen) { 3644 if (bpf_dump_raw_ok(file->f_cred)) { 3645 u32 __user *user_lens; 3646 u32 func_len, i; 3647 3648 /* copy the JITed image lengths for each function */ 3649 ulen = min_t(u32, info.nr_jited_func_lens, ulen); 3650 user_lens = u64_to_user_ptr(info.jited_func_lens); 3651 if (prog->aux->func_cnt) { 3652 for (i = 0; i < ulen; i++) { 3653 func_len = prog->aux->func[i]->jited_len; 3654 if (put_user(func_len, &user_lens[i])) { 3655 return -EFAULT; 3656 } 3657 } 3658 } else { 3659 func_len = prog->jited_len; 3660 if (put_user(func_len, &user_lens[0])) { 3661 return -EFAULT; 3662 } 3663 } 3664 } else { 3665 info.jited_func_lens = 0; 3666 } 3667 } 3668 3669 if (prog->aux->btf) { 3670 info.btf_id = btf_id(prog->aux->btf); 3671 } 3672 3673 ulen = info.nr_func_info; 3674 info.nr_func_info = prog->aux->func_info_cnt; 3675 if (info.nr_func_info && ulen) { 3676 char __user *user_finfo; 3677 3678 user_finfo = u64_to_user_ptr(info.func_info); 3679 ulen = min_t(u32, info.nr_func_info, ulen); 3680 if (copy_to_user(user_finfo, prog->aux->func_info, info.func_info_rec_size * ulen)) { 3681 return -EFAULT; 3682 } 3683 } 3684 3685 ulen = info.nr_line_info; 3686 info.nr_line_info = prog->aux->nr_linfo; 3687 if (info.nr_line_info && ulen) { 3688 __u8 __user *user_linfo; 3689 3690 user_linfo = u64_to_user_ptr(info.line_info); 3691 ulen = min_t(u32, info.nr_line_info, ulen); 3692 if (copy_to_user(user_linfo, prog->aux->linfo, info.line_info_rec_size * ulen)) { 3693 return -EFAULT; 3694 } 3695 } 3696 3697 ulen = info.nr_jited_line_info; 3698 if (prog->aux->jited_linfo) { 3699 info.nr_jited_line_info = prog->aux->nr_linfo; 3700 } else { 3701 info.nr_jited_line_info = 0; 3702 } 3703 if (info.nr_jited_line_info && ulen) { 3704 if (bpf_dump_raw_ok(file->f_cred)) { 3705 __u64 __user *user_linfo; 3706 u32 i; 3707 3708 user_linfo = u64_to_user_ptr(info.jited_line_info); 3709 ulen = min_t(u32, info.nr_jited_line_info, ulen); 3710 for (i = 0; i < ulen; i++) { 3711 if (put_user((__u64)(long)prog->aux->jited_linfo[i], &user_linfo[i])) { 3712 return -EFAULT; 3713 } 3714 } 3715 } else { 3716 info.jited_line_info = 0; 3717 } 3718 } 3719 3720 ulen = info.nr_prog_tags; 3721 info.nr_prog_tags = prog->aux->func_cnt ?: 1; 3722 if (ulen) { 3723 __u8 __user(*user_prog_tags)[BPF_TAG_SIZE]; 3724 u32 i; 3725 3726 user_prog_tags = u64_to_user_ptr(info.prog_tags); 3727 ulen = min_t(u32, info.nr_prog_tags, ulen); 3728 if (prog->aux->func_cnt) { 3729 for (i = 0; i < ulen; i++) { 3730 if (copy_to_user(user_prog_tags[i], prog->aux->func[i]->tag, BPF_TAG_SIZE)) { 3731 return -EFAULT; 3732 } 3733 } 3734 } else { 3735 if (copy_to_user(user_prog_tags[0], prog->tag, BPF_TAG_SIZE)) { 3736 return -EFAULT; 3737 } 3738 } 3739 } 3740 3741done: 3742 if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) { 3743 return -EFAULT; 3744 } 3745 3746 return 0; 3747} 3748 3749static int bpf_map_get_info_by_fd(struct file *file, struct bpf_map *map, const union bpf_attr *attr, 3750 union bpf_attr __user *uattr) 3751{ 3752 struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3753 struct bpf_map_info info; 3754 u32 info_len = attr->info.info_len; 3755 int err; 3756 3757 err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len); 3758 if (err) { 3759 return err; 3760 } 3761 info_len = min_t(u32, sizeof(info), info_len); 3762 3763 memset(&info, 0, sizeof(info)); 3764 info.type = map->map_type; 3765 info.id = map->id; 3766 info.key_size = map->key_size; 3767 info.value_size = map->value_size; 3768 info.max_entries = map->max_entries; 3769 info.map_flags = map->map_flags; 3770 memcpy(info.name, map->name, sizeof(map->name)); 3771 3772 if (map->btf) { 3773 info.btf_id = btf_id(map->btf); 3774 info.btf_key_type_id = map->btf_key_type_id; 3775 info.btf_value_type_id = map->btf_value_type_id; 3776 } 3777 info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 3778 3779 if (bpf_map_is_dev_bound(map)) { 3780 err = bpf_map_offload_info_fill(&info, map); 3781 if (err) { 3782 return err; 3783 } 3784 } 3785 3786 if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) { 3787 return -EFAULT; 3788 } 3789 3790 return 0; 3791} 3792 3793static int bpf_btf_get_info_by_fd(struct file *file, struct btf *btf, const union bpf_attr *attr, 3794 union bpf_attr __user *uattr) 3795{ 3796 struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3797 u32 info_len = attr->info.info_len; 3798 int err; 3799 3800 err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len); 3801 if (err) { 3802 return err; 3803 } 3804 3805 return btf_get_info_by_fd(btf, attr, uattr); 3806} 3807 3808static int bpf_link_get_info_by_fd(struct file *file, struct bpf_link *link, const union bpf_attr *attr, 3809 union bpf_attr __user *uattr) 3810{ 3811 struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); 3812 struct bpf_link_info info; 3813 u32 info_len = attr->info.info_len; 3814 int err; 3815 3816 err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len); 3817 if (err) { 3818 return err; 3819 } 3820 info_len = min_t(u32, sizeof(info), info_len); 3821 3822 memset(&info, 0, sizeof(info)); 3823 if (copy_from_user(&info, uinfo, info_len)) { 3824 return -EFAULT; 3825 } 3826 3827 info.type = link->type; 3828 info.id = link->id; 3829 info.prog_id = link->prog->aux->id; 3830 3831 if (link->ops->fill_link_info) { 3832 err = link->ops->fill_link_info(link, &info); 3833 if (err) { 3834 return err; 3835 } 3836 } 3837 3838 if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) { 3839 return -EFAULT; 3840 } 3841 3842 return 0; 3843} 3844 3845#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info 3846 3847static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, union bpf_attr __user *uattr) 3848{ 3849 int ufd = attr->info.bpf_fd; 3850 struct fd f; 3851 int err; 3852 3853 if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD)) { 3854 return -EINVAL; 3855 } 3856 3857 f = fdget(ufd); 3858 if (!f.file) { 3859 return -EBADFD; 3860 } 3861 3862 if (f.file->f_op == &bpf_prog_fops) { 3863 err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr, uattr); 3864 } else if (f.file->f_op == &bpf_map_fops) { 3865 err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr, uattr); 3866 } else if (f.file->f_op == &btf_fops) { 3867 err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); 3868 } else if (f.file->f_op == &bpf_link_fops) { 3869 err = bpf_link_get_info_by_fd(f.file, f.file->private_data, attr, uattr); 3870 } else { 3871 err = -EINVAL; 3872 } 3873 3874 fdput(f); 3875 return err; 3876} 3877 3878#define BPF_BTF_LOAD_LAST_FIELD btf_log_level 3879 3880static int bpf_btf_load(const union bpf_attr *attr) 3881{ 3882 if (CHECK_ATTR(BPF_BTF_LOAD)) { 3883 return -EINVAL; 3884 } 3885 3886 if (!bpf_capable()) { 3887 return -EPERM; 3888 } 3889 3890 return btf_new_fd(attr); 3891} 3892 3893#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id 3894 3895static int bpf_btf_get_fd_by_id(const union bpf_attr *attr) 3896{ 3897 if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID)) { 3898 return -EINVAL; 3899 } 3900 3901 if (!capable(CAP_SYS_ADMIN)) { 3902 return -EPERM; 3903 } 3904 3905 return btf_get_fd_by_id(attr->btf_id); 3906} 3907 3908static int bpf_task_fd_query_copy(const union bpf_attr *attr, union bpf_attr __user *uattr, u32 prog_id, u32 fd_type, 3909 const char *buf, u64 probe_offset, u64 probe_addr) 3910{ 3911 char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf); 3912 u32 len = buf ? strlen(buf) : 0, input_len; 3913 int err = 0; 3914 3915 if (put_user(len, &uattr->task_fd_query.buf_len)) { 3916 return -EFAULT; 3917 } 3918 input_len = attr->task_fd_query.buf_len; 3919 if (input_len && ubuf) { 3920 if (!len) { 3921 /* nothing to copy, just make ubuf NULL terminated */ 3922 char zero = '\0'; 3923 3924 if (put_user(zero, ubuf)) { 3925 return -EFAULT; 3926 } 3927 } else if (input_len >= len + 1) { 3928 /* ubuf can hold the string with NULL terminator */ 3929 if (copy_to_user(ubuf, buf, len + 1)) { 3930 return -EFAULT; 3931 } 3932 } else { 3933 /* ubuf cannot hold the string with NULL terminator, 3934 * do a partial copy with NULL terminator. 3935 */ 3936 char zero = '\0'; 3937 3938 err = -ENOSPC; 3939 if (copy_to_user(ubuf, buf, input_len - 1)) { 3940 return -EFAULT; 3941 } 3942 if (put_user(zero, ubuf + input_len - 1)) { 3943 return -EFAULT; 3944 } 3945 } 3946 } 3947 3948 if (put_user(prog_id, &uattr->task_fd_query.prog_id) || put_user(fd_type, &uattr->task_fd_query.fd_type) || 3949 put_user(probe_offset, &uattr->task_fd_query.probe_offset) || 3950 put_user(probe_addr, &uattr->task_fd_query.probe_addr)) { 3951 return -EFAULT; 3952 } 3953 3954 return err; 3955} 3956 3957#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr 3958 3959static int bpf_task_fd_query(const union bpf_attr *attr, union bpf_attr __user *uattr) 3960{ 3961 pid_t pid = attr->task_fd_query.pid; 3962 u32 fd = attr->task_fd_query.fd; 3963 const struct perf_event *event; 3964 struct files_struct *files; 3965 struct task_struct *task; 3966 struct file *file; 3967 int err; 3968 3969 if (CHECK_ATTR(BPF_TASK_FD_QUERY)) { 3970 return -EINVAL; 3971 } 3972 3973 if (!capable(CAP_SYS_ADMIN)) { 3974 return -EPERM; 3975 } 3976 3977 if (attr->task_fd_query.flags != 0) { 3978 return -EINVAL; 3979 } 3980 3981 task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 3982 if (!task) { 3983 return -ENOENT; 3984 } 3985 3986 files = get_files_struct(task); 3987 put_task_struct(task); 3988 if (!files) { 3989 return -ENOENT; 3990 } 3991 3992 err = 0; 3993 spin_lock(&files->file_lock); 3994 file = fcheck_files(files, fd); 3995 if (!file) { 3996 err = -EBADF; 3997 } else { 3998 get_file(file); 3999 } 4000 spin_unlock(&files->file_lock); 4001 put_files_struct(files); 4002 4003 if (err) { 4004 goto out; 4005 } 4006 4007 if (file->f_op == &bpf_link_fops) { 4008 struct bpf_link *link = file->private_data; 4009 4010 if (link->ops == &bpf_raw_tp_link_lops) { 4011 struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link); 4012 struct bpf_raw_event_map *btp = raw_tp->btp; 4013 4014 err = bpf_task_fd_query_copy(attr, uattr, raw_tp->link.prog->aux->id, BPF_FD_TYPE_RAW_TRACEPOINT, 4015 btp->tp->name, 0, 0); 4016 goto put_file; 4017 } 4018 goto out_not_supp; 4019 } 4020 4021 event = perf_get_event(file); 4022 if (!IS_ERR(event)) { 4023 u64 probe_offset, probe_addr; 4024 u32 prog_id, fd_type; 4025 const char *buf; 4026 4027 err = bpf_get_perf_event_info(event, &prog_id, &fd_type, &buf, &probe_offset, &probe_addr); 4028 if (!err) { 4029 err = bpf_task_fd_query_copy(attr, uattr, prog_id, fd_type, buf, probe_offset, probe_addr); 4030 } 4031 goto put_file; 4032 } 4033 4034out_not_supp: 4035 err = -ENOTSUPP; 4036put_file: 4037 fput(file); 4038out: 4039 return err; 4040} 4041 4042#define BPF_MAP_BATCH_LAST_FIELD batch.flags 4043 4044#define BPF_DO_BATCH(fn) \ 4045 do { \ 4046 if (!(fn)) { \ 4047 err = -ENOTSUPP; \ 4048 goto err_put; \ 4049 } \ 4050 err = fn(map, attr, uattr); \ 4051 } while (0) 4052 4053static int bpf_map_do_batch(const union bpf_attr *attr, union bpf_attr __user *uattr, int cmd) 4054{ 4055 struct bpf_map *map; 4056 int err, ufd; 4057 struct fd f; 4058 4059 if (CHECK_ATTR(BPF_MAP_BATCH)) { 4060 return -EINVAL; 4061 } 4062 4063 ufd = attr->batch.map_fd; 4064 f = fdget(ufd); 4065 map = __bpf_map_get(f); 4066 if (IS_ERR(map)) { 4067 return PTR_ERR(map); 4068 } 4069 4070 if ((cmd == BPF_MAP_LOOKUP_BATCH || cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) && 4071 !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { 4072 err = -EPERM; 4073 goto err_put; 4074 } 4075 4076 if (cmd != BPF_MAP_LOOKUP_BATCH && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 4077 err = -EPERM; 4078 goto err_put; 4079 } 4080 4081 if (cmd == BPF_MAP_LOOKUP_BATCH) { 4082 BPF_DO_BATCH(map->ops->map_lookup_batch); 4083 } else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) { 4084 BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch); 4085 } else if (cmd == BPF_MAP_UPDATE_BATCH) { 4086 BPF_DO_BATCH(map->ops->map_update_batch); 4087 } else { 4088 BPF_DO_BATCH(map->ops->map_delete_batch); 4089 } 4090 4091err_put: 4092 fdput(f); 4093 return err; 4094} 4095 4096static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 4097{ 4098 if (attr->link_create.attach_type != prog->expected_attach_type) { 4099 return -EINVAL; 4100 } 4101 4102 if (prog->expected_attach_type == BPF_TRACE_ITER) { 4103 return bpf_iter_link_attach(attr, prog); 4104 } else if (prog->type == BPF_PROG_TYPE_EXT) { 4105 return bpf_tracing_prog_attach(prog, attr->link_create.target_fd, attr->link_create.target_btf_id); 4106 } 4107 return -EINVAL; 4108} 4109 4110#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len 4111static int link_create(union bpf_attr *attr) 4112{ 4113 enum bpf_prog_type ptype; 4114 struct bpf_prog *prog; 4115 int ret; 4116 4117 if (CHECK_ATTR(BPF_LINK_CREATE)) { 4118 return -EINVAL; 4119 } 4120 4121 prog = bpf_prog_get(attr->link_create.prog_fd); 4122 if (IS_ERR(prog)) { 4123 return PTR_ERR(prog); 4124 } 4125 4126 ret = bpf_prog_attach_check_attach_type(prog, attr->link_create.attach_type); 4127 if (ret) { 4128 goto out; 4129 } 4130 4131 if (prog->type == BPF_PROG_TYPE_EXT) { 4132 ret = tracing_bpf_link_attach(attr, prog); 4133 goto out; 4134 } 4135 4136 ptype = attach_type_to_prog_type(attr->link_create.attach_type); 4137 if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { 4138 ret = -EINVAL; 4139 goto out; 4140 } 4141 4142 switch (ptype) { 4143 case BPF_PROG_TYPE_CGROUP_SKB: 4144 case BPF_PROG_TYPE_CGROUP_SOCK: 4145 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 4146 case BPF_PROG_TYPE_SOCK_OPS: 4147 case BPF_PROG_TYPE_CGROUP_DEVICE: 4148 case BPF_PROG_TYPE_CGROUP_SYSCTL: 4149 case BPF_PROG_TYPE_CGROUP_SOCKOPT: 4150 ret = cgroup_bpf_link_attach(attr, prog); 4151 break; 4152 case BPF_PROG_TYPE_TRACING: 4153 ret = tracing_bpf_link_attach(attr, prog); 4154 break; 4155 case BPF_PROG_TYPE_FLOW_DISSECTOR: 4156 case BPF_PROG_TYPE_SK_LOOKUP: 4157 ret = netns_bpf_link_create(attr, prog); 4158 break; 4159#ifdef CONFIG_NET 4160 case BPF_PROG_TYPE_XDP: 4161 ret = bpf_xdp_link_attach(attr, prog); 4162 break; 4163#endif 4164 default: 4165 ret = -EINVAL; 4166 } 4167 4168out: 4169 if (ret < 0) { 4170 bpf_prog_put(prog); 4171 } 4172 return ret; 4173} 4174 4175#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd 4176 4177static int link_update(union bpf_attr *attr) 4178{ 4179 struct bpf_prog *old_prog = NULL, *new_prog; 4180 struct bpf_link *link; 4181 u32 flags; 4182 int ret; 4183 4184 if (CHECK_ATTR(BPF_LINK_UPDATE)) { 4185 return -EINVAL; 4186 } 4187 4188 flags = attr->link_update.flags; 4189 if (flags & ~BPF_F_REPLACE) { 4190 return -EINVAL; 4191 } 4192 4193 link = bpf_link_get_from_fd(attr->link_update.link_fd); 4194 if (IS_ERR(link)) { 4195 return PTR_ERR(link); 4196 } 4197 4198 new_prog = bpf_prog_get(attr->link_update.new_prog_fd); 4199 if (IS_ERR(new_prog)) { 4200 ret = PTR_ERR(new_prog); 4201 goto out_put_link; 4202 } 4203 4204 if (flags & BPF_F_REPLACE) { 4205 old_prog = bpf_prog_get(attr->link_update.old_prog_fd); 4206 if (IS_ERR(old_prog)) { 4207 ret = PTR_ERR(old_prog); 4208 old_prog = NULL; 4209 goto out_put_progs; 4210 } 4211 } else if (attr->link_update.old_prog_fd) { 4212 ret = -EINVAL; 4213 goto out_put_progs; 4214 } 4215 4216 if (link->ops->update_prog) { 4217 ret = link->ops->update_prog(link, new_prog, old_prog); 4218 } else { 4219 ret = -EINVAL; 4220 } 4221 4222out_put_progs: 4223 if (old_prog) { 4224 bpf_prog_put(old_prog); 4225 } 4226 if (ret) { 4227 bpf_prog_put(new_prog); 4228 } 4229out_put_link: 4230 bpf_link_put(link); 4231 return ret; 4232} 4233 4234#define BPF_LINK_DETACH_LAST_FIELD link_detach.link_fd 4235 4236static int link_detach(union bpf_attr *attr) 4237{ 4238 struct bpf_link *link; 4239 int ret; 4240 4241 if (CHECK_ATTR(BPF_LINK_DETACH)) { 4242 return -EINVAL; 4243 } 4244 4245 link = bpf_link_get_from_fd(attr->link_detach.link_fd); 4246 if (IS_ERR(link)) { 4247 return PTR_ERR(link); 4248 } 4249 4250 if (link->ops->detach) { 4251 ret = link->ops->detach(link); 4252 } else { 4253 ret = -EOPNOTSUPP; 4254 } 4255 4256 bpf_link_put(link); 4257 return ret; 4258} 4259 4260static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) 4261{ 4262 return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); 4263} 4264 4265struct bpf_link *bpf_link_by_id(u32 id) 4266{ 4267 struct bpf_link *link; 4268 4269 if (!id) { 4270 return ERR_PTR(-ENOENT); 4271 } 4272 4273 spin_lock_bh(&link_idr_lock); 4274 /* before link is "settled", ID is 0, pretend it doesn't exist yet */ 4275 link = idr_find(&link_idr, id); 4276 if (link) { 4277 if (link->id) { 4278 link = bpf_link_inc_not_zero(link); 4279 } else { 4280 link = ERR_PTR(-EAGAIN); 4281 } 4282 } else { 4283 link = ERR_PTR(-ENOENT); 4284 } 4285 spin_unlock_bh(&link_idr_lock); 4286 return link; 4287} 4288 4289#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id 4290 4291static int bpf_link_get_fd_by_id(const union bpf_attr *attr) 4292{ 4293 struct bpf_link *link; 4294 u32 id = attr->link_id; 4295 int fd; 4296 4297 if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) { 4298 return -EINVAL; 4299 } 4300 4301 if (!capable(CAP_SYS_ADMIN)) { 4302 return -EPERM; 4303 } 4304 4305 link = bpf_link_by_id(id); 4306 if (IS_ERR(link)) { 4307 return PTR_ERR(link); 4308 } 4309 4310 fd = bpf_link_new_fd(link); 4311 if (fd < 0) { 4312 bpf_link_put(link); 4313 } 4314 4315 return fd; 4316} 4317 4318DEFINE_MUTEX(bpf_stats_enabled_mutex); 4319 4320static int bpf_stats_release(struct inode *inode, struct file *file) 4321{ 4322 mutex_lock(&bpf_stats_enabled_mutex); 4323 static_key_slow_dec(&bpf_stats_enabled_key.key); 4324 mutex_unlock(&bpf_stats_enabled_mutex); 4325 return 0; 4326} 4327 4328static const struct file_operations bpf_stats_fops = { 4329 .release = bpf_stats_release, 4330}; 4331 4332static int bpf_enable_runtime_stats(void) 4333{ 4334 int fd; 4335 4336 mutex_lock(&bpf_stats_enabled_mutex); 4337 4338 /* Set a very high limit to avoid overflow */ 4339 if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 0x2) { 4340 mutex_unlock(&bpf_stats_enabled_mutex); 4341 return -EBUSY; 4342 } 4343 4344 fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); 4345 if (fd >= 0) { 4346 static_key_slow_inc(&bpf_stats_enabled_key.key); 4347 } 4348 4349 mutex_unlock(&bpf_stats_enabled_mutex); 4350 return fd; 4351} 4352 4353#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type 4354 4355static int bpf_enable_stats(union bpf_attr *attr) 4356{ 4357 if (CHECK_ATTR(BPF_ENABLE_STATS)) { 4358 return -EINVAL; 4359 } 4360 4361 if (!capable(CAP_SYS_ADMIN)) { 4362 return -EPERM; 4363 } 4364 4365 switch (attr->enable_stats.type) { 4366 case BPF_STATS_RUN_TIME: 4367 return bpf_enable_runtime_stats(); 4368 default: 4369 break; 4370 } 4371 return -EINVAL; 4372} 4373 4374#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags 4375 4376static int bpf_iter_create(union bpf_attr *attr) 4377{ 4378 struct bpf_link *link; 4379 int err; 4380 4381 if (CHECK_ATTR(BPF_ITER_CREATE)) { 4382 return -EINVAL; 4383 } 4384 4385 if (attr->iter_create.flags) { 4386 return -EINVAL; 4387 } 4388 4389 link = bpf_link_get_from_fd(attr->iter_create.link_fd); 4390 if (IS_ERR(link)) { 4391 return PTR_ERR(link); 4392 } 4393 4394 err = bpf_iter_new_fd(link); 4395 bpf_link_put(link); 4396 4397 return err; 4398} 4399 4400#define BPF_PROG_BIND_MAP_LAST_FIELD prog_bind_map.flags 4401 4402static int bpf_prog_bind_map(union bpf_attr *attr) 4403{ 4404 struct bpf_prog *prog; 4405 struct bpf_map *map; 4406 struct bpf_map **used_maps_old, **used_maps_new; 4407 int i, ret = 0; 4408 4409 if (CHECK_ATTR(BPF_PROG_BIND_MAP)) { 4410 return -EINVAL; 4411 } 4412 4413 if (attr->prog_bind_map.flags) { 4414 return -EINVAL; 4415 } 4416 4417 prog = bpf_prog_get(attr->prog_bind_map.prog_fd); 4418 if (IS_ERR(prog)) { 4419 return PTR_ERR(prog); 4420 } 4421 4422 map = bpf_map_get(attr->prog_bind_map.map_fd); 4423 if (IS_ERR(map)) { 4424 ret = PTR_ERR(map); 4425 goto out_prog_put; 4426 } 4427 4428 mutex_lock(&prog->aux->used_maps_mutex); 4429 4430 used_maps_old = prog->aux->used_maps; 4431 4432 for (i = 0; i < prog->aux->used_map_cnt; i++) { 4433 if (used_maps_old[i] == map) { 4434 bpf_map_put(map); 4435 goto out_unlock; 4436 } 4437 } 4438 4439 used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1, sizeof(used_maps_new[0]), GFP_KERNEL); 4440 if (!used_maps_new) { 4441 ret = -ENOMEM; 4442 goto out_unlock; 4443 } 4444 4445 memcpy(used_maps_new, used_maps_old, sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 4446 used_maps_new[prog->aux->used_map_cnt] = map; 4447 4448 prog->aux->used_map_cnt++; 4449 prog->aux->used_maps = used_maps_new; 4450 4451 kfree(used_maps_old); 4452 4453out_unlock: 4454 mutex_unlock(&prog->aux->used_maps_mutex); 4455 4456 if (ret) { 4457 bpf_map_put(map); 4458 } 4459out_prog_put: 4460 bpf_prog_put(prog); 4461 return ret; 4462} 4463 4464SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) 4465{ 4466 union bpf_attr attr; 4467 int err; 4468 4469 if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) { 4470 return -EPERM; 4471 } 4472 4473 err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); 4474 if (err) { 4475 return err; 4476 } 4477 size = min_t(u32, size, sizeof(attr)); 4478 4479 /* copy attributes from user space, may be less than sizeof(bpf_attr) */ 4480 memset(&attr, 0, sizeof(attr)); 4481 if (copy_from_user(&attr, uattr, size) != 0) { 4482 return -EFAULT; 4483 } 4484 4485 err = security_bpf(cmd, &attr, size); 4486 if (err < 0) { 4487 return err; 4488 } 4489 4490 switch (cmd) { 4491 case BPF_MAP_CREATE: 4492 err = map_create(&attr); 4493 break; 4494 case BPF_MAP_LOOKUP_ELEM: 4495 err = map_lookup_elem(&attr); 4496 break; 4497 case BPF_MAP_UPDATE_ELEM: 4498 err = map_update_elem(&attr); 4499 break; 4500 case BPF_MAP_DELETE_ELEM: 4501 err = map_delete_elem(&attr); 4502 break; 4503 case BPF_MAP_GET_NEXT_KEY: 4504 err = map_get_next_key(&attr); 4505 break; 4506 case BPF_MAP_FREEZE: 4507 err = map_freeze(&attr); 4508 break; 4509 case BPF_PROG_LOAD: 4510 err = bpf_prog_load(&attr, uattr); 4511 break; 4512 case BPF_OBJ_PIN: 4513 err = bpf_obj_pin(&attr); 4514 break; 4515 case BPF_OBJ_GET: 4516 err = bpf_obj_get(&attr); 4517 break; 4518 case BPF_PROG_ATTACH: 4519 err = bpf_prog_attach(&attr); 4520 break; 4521 case BPF_PROG_DETACH: 4522 err = bpf_prog_detach(&attr); 4523 break; 4524 case BPF_PROG_QUERY: 4525 err = bpf_prog_query(&attr, uattr); 4526 break; 4527 case BPF_PROG_TEST_RUN: 4528 err = bpf_prog_test_run(&attr, uattr); 4529 break; 4530 case BPF_PROG_GET_NEXT_ID: 4531 err = bpf_obj_get_next_id(&attr, uattr, &prog_idr, &prog_idr_lock); 4532 break; 4533 case BPF_MAP_GET_NEXT_ID: 4534 err = bpf_obj_get_next_id(&attr, uattr, &map_idr, &map_idr_lock); 4535 break; 4536 case BPF_BTF_GET_NEXT_ID: 4537 err = bpf_obj_get_next_id(&attr, uattr, &btf_idr, &btf_idr_lock); 4538 break; 4539 case BPF_PROG_GET_FD_BY_ID: 4540 err = bpf_prog_get_fd_by_id(&attr); 4541 break; 4542 case BPF_MAP_GET_FD_BY_ID: 4543 err = bpf_map_get_fd_by_id(&attr); 4544 break; 4545 case BPF_OBJ_GET_INFO_BY_FD: 4546 err = bpf_obj_get_info_by_fd(&attr, uattr); 4547 break; 4548 case BPF_RAW_TRACEPOINT_OPEN: 4549 err = bpf_raw_tracepoint_open(&attr); 4550 break; 4551 case BPF_BTF_LOAD: 4552 err = bpf_btf_load(&attr); 4553 break; 4554 case BPF_BTF_GET_FD_BY_ID: 4555 err = bpf_btf_get_fd_by_id(&attr); 4556 break; 4557 case BPF_TASK_FD_QUERY: 4558 err = bpf_task_fd_query(&attr, uattr); 4559 break; 4560 case BPF_MAP_LOOKUP_AND_DELETE_ELEM: 4561 err = map_lookup_and_delete_elem(&attr); 4562 break; 4563 case BPF_MAP_LOOKUP_BATCH: 4564 err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH); 4565 break; 4566 case BPF_MAP_LOOKUP_AND_DELETE_BATCH: 4567 err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_AND_DELETE_BATCH); 4568 break; 4569 case BPF_MAP_UPDATE_BATCH: 4570 err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH); 4571 break; 4572 case BPF_MAP_DELETE_BATCH: 4573 err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH); 4574 break; 4575 case BPF_LINK_CREATE: 4576 err = link_create(&attr); 4577 break; 4578 case BPF_LINK_UPDATE: 4579 err = link_update(&attr); 4580 break; 4581 case BPF_LINK_GET_FD_BY_ID: 4582 err = bpf_link_get_fd_by_id(&attr); 4583 break; 4584 case BPF_LINK_GET_NEXT_ID: 4585 err = bpf_obj_get_next_id(&attr, uattr, &link_idr, &link_idr_lock); 4586 break; 4587 case BPF_ENABLE_STATS: 4588 err = bpf_enable_stats(&attr); 4589 break; 4590 case BPF_ITER_CREATE: 4591 err = bpf_iter_create(&attr); 4592 break; 4593 case BPF_LINK_DETACH: 4594 err = link_detach(&attr); 4595 break; 4596 case BPF_PROG_BIND_MAP: 4597 err = bpf_prog_bind_map(&attr); 4598 break; 4599 default: 4600 err = -EINVAL; 4601 break; 4602 } 4603 4604 return err; 4605} 4606