18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/kernel/fork.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 1991, 1992 Linus Torvalds 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci/* 98c2ecf20Sopenharmony_ci * 'fork.c' contains the help-routines for the 'fork' system call 108c2ecf20Sopenharmony_ci * (see also entry.S and others). 118c2ecf20Sopenharmony_ci * Fork is rather simple, once you get the hang of it, but the memory 128c2ecf20Sopenharmony_ci * management can be a bitch. See 'mm/memory.c': 'copy_page_range()' 138c2ecf20Sopenharmony_ci */ 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#include <linux/anon_inodes.h> 168c2ecf20Sopenharmony_ci#include <linux/slab.h> 178c2ecf20Sopenharmony_ci#include <linux/sched/autogroup.h> 188c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 198c2ecf20Sopenharmony_ci#include <linux/sched/coredump.h> 208c2ecf20Sopenharmony_ci#include <linux/sched/user.h> 218c2ecf20Sopenharmony_ci#include <linux/sched/numa_balancing.h> 228c2ecf20Sopenharmony_ci#include <linux/sched/stat.h> 238c2ecf20Sopenharmony_ci#include <linux/sched/task.h> 248c2ecf20Sopenharmony_ci#include <linux/sched/task_stack.h> 258c2ecf20Sopenharmony_ci#include <linux/sched/cputime.h> 268c2ecf20Sopenharmony_ci#include <linux/seq_file.h> 278c2ecf20Sopenharmony_ci#include <linux/rtmutex.h> 288c2ecf20Sopenharmony_ci#include <linux/init.h> 298c2ecf20Sopenharmony_ci#include <linux/unistd.h> 308c2ecf20Sopenharmony_ci#include <linux/module.h> 318c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 328c2ecf20Sopenharmony_ci#include <linux/completion.h> 338c2ecf20Sopenharmony_ci#include <linux/personality.h> 348c2ecf20Sopenharmony_ci#include <linux/mempolicy.h> 358c2ecf20Sopenharmony_ci#include <linux/sem.h> 368c2ecf20Sopenharmony_ci#include <linux/file.h> 378c2ecf20Sopenharmony_ci#include <linux/fdtable.h> 388c2ecf20Sopenharmony_ci#include <linux/iocontext.h> 398c2ecf20Sopenharmony_ci#include <linux/key.h> 408c2ecf20Sopenharmony_ci#include <linux/binfmts.h> 418c2ecf20Sopenharmony_ci#include <linux/mman.h> 428c2ecf20Sopenharmony_ci#include <linux/mmu_notifier.h> 438c2ecf20Sopenharmony_ci#include <linux/fs.h> 448c2ecf20Sopenharmony_ci#include <linux/mm.h> 458c2ecf20Sopenharmony_ci#include <linux/mm_inline.h> 468c2ecf20Sopenharmony_ci#include <linux/vmacache.h> 478c2ecf20Sopenharmony_ci#include <linux/nsproxy.h> 488c2ecf20Sopenharmony_ci#include <linux/capability.h> 498c2ecf20Sopenharmony_ci#include <linux/cpu.h> 508c2ecf20Sopenharmony_ci#include <linux/cgroup.h> 518c2ecf20Sopenharmony_ci#include <linux/security.h> 528c2ecf20Sopenharmony_ci#include <linux/hugetlb.h> 538c2ecf20Sopenharmony_ci#include <linux/seccomp.h> 548c2ecf20Sopenharmony_ci#include <linux/swap.h> 558c2ecf20Sopenharmony_ci#include <linux/syscalls.h> 568c2ecf20Sopenharmony_ci#include <linux/jiffies.h> 578c2ecf20Sopenharmony_ci#include <linux/futex.h> 588c2ecf20Sopenharmony_ci#include <linux/compat.h> 598c2ecf20Sopenharmony_ci#include <linux/kthread.h> 608c2ecf20Sopenharmony_ci#include <linux/task_io_accounting_ops.h> 618c2ecf20Sopenharmony_ci#include <linux/rcupdate.h> 628c2ecf20Sopenharmony_ci#include <linux/ptrace.h> 638c2ecf20Sopenharmony_ci#include <linux/mount.h> 648c2ecf20Sopenharmony_ci#include <linux/audit.h> 658c2ecf20Sopenharmony_ci#include <linux/memcontrol.h> 668c2ecf20Sopenharmony_ci#include <linux/ftrace.h> 678c2ecf20Sopenharmony_ci#include <linux/proc_fs.h> 688c2ecf20Sopenharmony_ci#include <linux/profile.h> 698c2ecf20Sopenharmony_ci#include <linux/rmap.h> 708c2ecf20Sopenharmony_ci#include <linux/ksm.h> 718c2ecf20Sopenharmony_ci#include <linux/acct.h> 728c2ecf20Sopenharmony_ci#include <linux/userfaultfd_k.h> 738c2ecf20Sopenharmony_ci#include <linux/tsacct_kern.h> 748c2ecf20Sopenharmony_ci#include <linux/cn_proc.h> 758c2ecf20Sopenharmony_ci#include <linux/freezer.h> 768c2ecf20Sopenharmony_ci#include <linux/delayacct.h> 778c2ecf20Sopenharmony_ci#include <linux/taskstats_kern.h> 788c2ecf20Sopenharmony_ci#include <linux/random.h> 798c2ecf20Sopenharmony_ci#include <linux/tty.h> 808c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 818c2ecf20Sopenharmony_ci#include <linux/fs_struct.h> 828c2ecf20Sopenharmony_ci#include <linux/magic.h> 838c2ecf20Sopenharmony_ci#include <linux/perf_event.h> 848c2ecf20Sopenharmony_ci#include <linux/posix-timers.h> 858c2ecf20Sopenharmony_ci#include <linux/user-return-notifier.h> 868c2ecf20Sopenharmony_ci#include <linux/oom.h> 878c2ecf20Sopenharmony_ci#include <linux/khugepaged.h> 888c2ecf20Sopenharmony_ci#include <linux/signalfd.h> 898c2ecf20Sopenharmony_ci#include <linux/uprobes.h> 908c2ecf20Sopenharmony_ci#include <linux/aio.h> 918c2ecf20Sopenharmony_ci#include <linux/compiler.h> 928c2ecf20Sopenharmony_ci#include <linux/sysctl.h> 938c2ecf20Sopenharmony_ci#include <linux/kcov.h> 948c2ecf20Sopenharmony_ci#include <linux/livepatch.h> 958c2ecf20Sopenharmony_ci#include <linux/thread_info.h> 968c2ecf20Sopenharmony_ci#include <linux/stackleak.h> 978c2ecf20Sopenharmony_ci#include <linux/kasan.h> 988c2ecf20Sopenharmony_ci#include <linux/scs.h> 998c2ecf20Sopenharmony_ci#include <linux/io_uring.h> 1008c2ecf20Sopenharmony_ci#ifdef CONFIG_RECLAIM_ACCT 1018c2ecf20Sopenharmony_ci#include <linux/reclaim_acct.h> 1028c2ecf20Sopenharmony_ci#endif 1038c2ecf20Sopenharmony_ci#include <linux/mm_purgeable.h> 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci#include <asm/pgalloc.h> 1068c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 1078c2ecf20Sopenharmony_ci#include <asm/mmu_context.h> 1088c2ecf20Sopenharmony_ci#include <asm/cacheflush.h> 1098c2ecf20Sopenharmony_ci#include <asm/tlbflush.h> 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci#include <trace/events/sched.h> 1128c2ecf20Sopenharmony_ci#include <linux/hck/lite_hck_ced.h> 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci#define CREATE_TRACE_POINTS 1158c2ecf20Sopenharmony_ci#include <trace/events/task.h> 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci/* 1188c2ecf20Sopenharmony_ci * Minimum number of threads to boot the kernel 1198c2ecf20Sopenharmony_ci */ 1208c2ecf20Sopenharmony_ci#define MIN_THREADS 20 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci/* 1238c2ecf20Sopenharmony_ci * Maximum number of threads 1248c2ecf20Sopenharmony_ci */ 1258c2ecf20Sopenharmony_ci#define MAX_THREADS FUTEX_TID_MASK 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci/* 1288c2ecf20Sopenharmony_ci * Protected counters by write_lock_irq(&tasklist_lock) 1298c2ecf20Sopenharmony_ci */ 1308c2ecf20Sopenharmony_ciunsigned long total_forks; /* Handle normal Linux uptimes. */ 1318c2ecf20Sopenharmony_ciint nr_threads; /* The idle threads do not count.. */ 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_cistatic int max_threads; /* tunable limit on nr_threads */ 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci#define NAMED_ARRAY_INDEX(x) [x] = __stringify(x) 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_cistatic const char * const resident_page_types[] = { 1388c2ecf20Sopenharmony_ci NAMED_ARRAY_INDEX(MM_FILEPAGES), 1398c2ecf20Sopenharmony_ci NAMED_ARRAY_INDEX(MM_ANONPAGES), 1408c2ecf20Sopenharmony_ci NAMED_ARRAY_INDEX(MM_SWAPENTS), 1418c2ecf20Sopenharmony_ci NAMED_ARRAY_INDEX(MM_SHMEMPAGES), 1428c2ecf20Sopenharmony_ci}; 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ciDEFINE_PER_CPU(unsigned long, process_counts) = 0; 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci#ifdef CONFIG_PROVE_RCU 1498c2ecf20Sopenharmony_ciint lockdep_tasklist_lock_is_held(void) 1508c2ecf20Sopenharmony_ci{ 1518c2ecf20Sopenharmony_ci return lockdep_is_held(&tasklist_lock); 1528c2ecf20Sopenharmony_ci} 1538c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held); 1548c2ecf20Sopenharmony_ci#endif /* #ifdef CONFIG_PROVE_RCU */ 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ciint nr_processes(void) 1578c2ecf20Sopenharmony_ci{ 1588c2ecf20Sopenharmony_ci int cpu; 1598c2ecf20Sopenharmony_ci int total = 0; 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) 1628c2ecf20Sopenharmony_ci total += per_cpu(process_counts, cpu); 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci return total; 1658c2ecf20Sopenharmony_ci} 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_civoid __weak arch_release_task_struct(struct task_struct *tsk) 1688c2ecf20Sopenharmony_ci{ 1698c2ecf20Sopenharmony_ci} 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR 1728c2ecf20Sopenharmony_cistatic struct kmem_cache *task_struct_cachep; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_cistatic inline struct task_struct *alloc_task_struct_node(int node) 1758c2ecf20Sopenharmony_ci{ 1768c2ecf20Sopenharmony_ci return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); 1778c2ecf20Sopenharmony_ci} 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_cistatic inline void free_task_struct(struct task_struct *tsk) 1808c2ecf20Sopenharmony_ci{ 1818c2ecf20Sopenharmony_ci kmem_cache_free(task_struct_cachep, tsk); 1828c2ecf20Sopenharmony_ci} 1838c2ecf20Sopenharmony_ci#endif 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci/* 1888c2ecf20Sopenharmony_ci * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a 1898c2ecf20Sopenharmony_ci * kmemcache based allocator. 1908c2ecf20Sopenharmony_ci */ 1918c2ecf20Sopenharmony_ci# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK 1948c2ecf20Sopenharmony_ci/* 1958c2ecf20Sopenharmony_ci * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB 1968c2ecf20Sopenharmony_ci * flush. Try to minimize the number of calls by caching stacks. 1978c2ecf20Sopenharmony_ci */ 1988c2ecf20Sopenharmony_ci#define NR_CACHED_STACKS 2 1998c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]); 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_cistatic int free_vm_stack_cache(unsigned int cpu) 2028c2ecf20Sopenharmony_ci{ 2038c2ecf20Sopenharmony_ci struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu); 2048c2ecf20Sopenharmony_ci int i; 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci for (i = 0; i < NR_CACHED_STACKS; i++) { 2078c2ecf20Sopenharmony_ci struct vm_struct *vm_stack = cached_vm_stacks[i]; 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci if (!vm_stack) 2108c2ecf20Sopenharmony_ci continue; 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci vfree(vm_stack->addr); 2138c2ecf20Sopenharmony_ci cached_vm_stacks[i] = NULL; 2148c2ecf20Sopenharmony_ci } 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci return 0; 2178c2ecf20Sopenharmony_ci} 2188c2ecf20Sopenharmony_ci#endif 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_cistatic unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) 2218c2ecf20Sopenharmony_ci{ 2228c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK 2238c2ecf20Sopenharmony_ci void *stack; 2248c2ecf20Sopenharmony_ci int i; 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci for (i = 0; i < NR_CACHED_STACKS; i++) { 2278c2ecf20Sopenharmony_ci struct vm_struct *s; 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci s = this_cpu_xchg(cached_stacks[i], NULL); 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci if (!s) 2328c2ecf20Sopenharmony_ci continue; 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci /* Clear the KASAN shadow of the stack. */ 2358c2ecf20Sopenharmony_ci kasan_unpoison_shadow(s->addr, THREAD_SIZE); 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci /* Clear stale pointers from reused stack. */ 2388c2ecf20Sopenharmony_ci memset(s->addr, 0, THREAD_SIZE); 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci tsk->stack_vm_area = s; 2418c2ecf20Sopenharmony_ci tsk->stack = s->addr; 2428c2ecf20Sopenharmony_ci return s->addr; 2438c2ecf20Sopenharmony_ci } 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci /* 2468c2ecf20Sopenharmony_ci * Allocated stacks are cached and later reused by new threads, 2478c2ecf20Sopenharmony_ci * so memcg accounting is performed manually on assigning/releasing 2488c2ecf20Sopenharmony_ci * stacks to tasks. Drop __GFP_ACCOUNT. 2498c2ecf20Sopenharmony_ci */ 2508c2ecf20Sopenharmony_ci stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, 2518c2ecf20Sopenharmony_ci VMALLOC_START, VMALLOC_END, 2528c2ecf20Sopenharmony_ci THREADINFO_GFP & ~__GFP_ACCOUNT, 2538c2ecf20Sopenharmony_ci PAGE_KERNEL, 2548c2ecf20Sopenharmony_ci 0, node, __builtin_return_address(0)); 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci /* 2578c2ecf20Sopenharmony_ci * We can't call find_vm_area() in interrupt context, and 2588c2ecf20Sopenharmony_ci * free_thread_stack() can be called in interrupt context, 2598c2ecf20Sopenharmony_ci * so cache the vm_struct. 2608c2ecf20Sopenharmony_ci */ 2618c2ecf20Sopenharmony_ci if (stack) { 2628c2ecf20Sopenharmony_ci tsk->stack_vm_area = find_vm_area(stack); 2638c2ecf20Sopenharmony_ci tsk->stack = stack; 2648c2ecf20Sopenharmony_ci } 2658c2ecf20Sopenharmony_ci return stack; 2668c2ecf20Sopenharmony_ci#else 2678c2ecf20Sopenharmony_ci struct page *page = alloc_pages_node(node, THREADINFO_GFP, 2688c2ecf20Sopenharmony_ci THREAD_SIZE_ORDER); 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ci if (likely(page)) { 2718c2ecf20Sopenharmony_ci tsk->stack = kasan_reset_tag(page_address(page)); 2728c2ecf20Sopenharmony_ci return tsk->stack; 2738c2ecf20Sopenharmony_ci } 2748c2ecf20Sopenharmony_ci return NULL; 2758c2ecf20Sopenharmony_ci#endif 2768c2ecf20Sopenharmony_ci} 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_cistatic inline void free_thread_stack(struct task_struct *tsk) 2798c2ecf20Sopenharmony_ci{ 2808c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK 2818c2ecf20Sopenharmony_ci struct vm_struct *vm = task_stack_vm_area(tsk); 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci if (vm) { 2848c2ecf20Sopenharmony_ci int i; 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) 2878c2ecf20Sopenharmony_ci memcg_kmem_uncharge_page(vm->pages[i], 0); 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci for (i = 0; i < NR_CACHED_STACKS; i++) { 2908c2ecf20Sopenharmony_ci if (this_cpu_cmpxchg(cached_stacks[i], 2918c2ecf20Sopenharmony_ci NULL, tsk->stack_vm_area) != NULL) 2928c2ecf20Sopenharmony_ci continue; 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci return; 2958c2ecf20Sopenharmony_ci } 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci vfree_atomic(tsk->stack); 2988c2ecf20Sopenharmony_ci return; 2998c2ecf20Sopenharmony_ci } 3008c2ecf20Sopenharmony_ci#endif 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER); 3038c2ecf20Sopenharmony_ci} 3048c2ecf20Sopenharmony_ci# else 3058c2ecf20Sopenharmony_cistatic struct kmem_cache *thread_stack_cache; 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_cistatic unsigned long *alloc_thread_stack_node(struct task_struct *tsk, 3088c2ecf20Sopenharmony_ci int node) 3098c2ecf20Sopenharmony_ci{ 3108c2ecf20Sopenharmony_ci unsigned long *stack; 3118c2ecf20Sopenharmony_ci stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); 3128c2ecf20Sopenharmony_ci stack = kasan_reset_tag(stack); 3138c2ecf20Sopenharmony_ci tsk->stack = stack; 3148c2ecf20Sopenharmony_ci return stack; 3158c2ecf20Sopenharmony_ci} 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_cistatic void free_thread_stack(struct task_struct *tsk) 3188c2ecf20Sopenharmony_ci{ 3198c2ecf20Sopenharmony_ci kmem_cache_free(thread_stack_cache, tsk->stack); 3208c2ecf20Sopenharmony_ci} 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_civoid thread_stack_cache_init(void) 3238c2ecf20Sopenharmony_ci{ 3248c2ecf20Sopenharmony_ci thread_stack_cache = kmem_cache_create_usercopy("thread_stack", 3258c2ecf20Sopenharmony_ci THREAD_SIZE, THREAD_SIZE, 0, 0, 3268c2ecf20Sopenharmony_ci THREAD_SIZE, NULL); 3278c2ecf20Sopenharmony_ci BUG_ON(thread_stack_cache == NULL); 3288c2ecf20Sopenharmony_ci} 3298c2ecf20Sopenharmony_ci# endif 3308c2ecf20Sopenharmony_ci#endif 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci/* SLAB cache for signal_struct structures (tsk->signal) */ 3338c2ecf20Sopenharmony_cistatic struct kmem_cache *signal_cachep; 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci/* SLAB cache for sighand_struct structures (tsk->sighand) */ 3368c2ecf20Sopenharmony_cistruct kmem_cache *sighand_cachep; 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci/* SLAB cache for files_struct structures (tsk->files) */ 3398c2ecf20Sopenharmony_cistruct kmem_cache *files_cachep; 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci/* SLAB cache for fs_struct structures (tsk->fs) */ 3428c2ecf20Sopenharmony_cistruct kmem_cache *fs_cachep; 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci/* SLAB cache for vm_area_struct structures */ 3458c2ecf20Sopenharmony_cistatic struct kmem_cache *vm_area_cachep; 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_ci/* SLAB cache for mm_struct structures (tsk->mm) */ 3488c2ecf20Sopenharmony_cistatic struct kmem_cache *mm_cachep; 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_cistruct vm_area_struct *vm_area_alloc(struct mm_struct *mm) 3518c2ecf20Sopenharmony_ci{ 3528c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 3558c2ecf20Sopenharmony_ci if (vma) 3568c2ecf20Sopenharmony_ci vma_init(vma, mm); 3578c2ecf20Sopenharmony_ci return vma; 3588c2ecf20Sopenharmony_ci} 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_cistruct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) 3618c2ecf20Sopenharmony_ci{ 3628c2ecf20Sopenharmony_ci struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci if (new) { 3658c2ecf20Sopenharmony_ci ASSERT_EXCLUSIVE_WRITER(orig->vm_flags); 3668c2ecf20Sopenharmony_ci ASSERT_EXCLUSIVE_WRITER(orig->vm_file); 3678c2ecf20Sopenharmony_ci /* 3688c2ecf20Sopenharmony_ci * orig->shared.rb may be modified concurrently, but the clone 3698c2ecf20Sopenharmony_ci * will be reinitialized. 3708c2ecf20Sopenharmony_ci */ 3718c2ecf20Sopenharmony_ci *new = data_race(*orig); 3728c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&new->anon_vma_chain); 3738c2ecf20Sopenharmony_ci new->vm_next = new->vm_prev = NULL; 3748c2ecf20Sopenharmony_ci dup_anon_vma_name(orig, new); 3758c2ecf20Sopenharmony_ci } 3768c2ecf20Sopenharmony_ci return new; 3778c2ecf20Sopenharmony_ci} 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_civoid vm_area_free(struct vm_area_struct *vma) 3808c2ecf20Sopenharmony_ci{ 3818c2ecf20Sopenharmony_ci free_anon_vma_name(vma); 3828c2ecf20Sopenharmony_ci kmem_cache_free(vm_area_cachep, vma); 3838c2ecf20Sopenharmony_ci} 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_cistatic void account_kernel_stack(struct task_struct *tsk, int account) 3868c2ecf20Sopenharmony_ci{ 3878c2ecf20Sopenharmony_ci void *stack = task_stack_page(tsk); 3888c2ecf20Sopenharmony_ci struct vm_struct *vm = task_stack_vm_area(tsk); 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci /* All stack pages are in the same node. */ 3928c2ecf20Sopenharmony_ci if (vm) 3938c2ecf20Sopenharmony_ci mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB, 3948c2ecf20Sopenharmony_ci account * (THREAD_SIZE / 1024)); 3958c2ecf20Sopenharmony_ci else 3968c2ecf20Sopenharmony_ci mod_lruvec_slab_state(stack, NR_KERNEL_STACK_KB, 3978c2ecf20Sopenharmony_ci account * (THREAD_SIZE / 1024)); 3988c2ecf20Sopenharmony_ci} 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_cistatic int memcg_charge_kernel_stack(struct task_struct *tsk) 4018c2ecf20Sopenharmony_ci{ 4028c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK 4038c2ecf20Sopenharmony_ci struct vm_struct *vm = task_stack_vm_area(tsk); 4048c2ecf20Sopenharmony_ci int ret; 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0); 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci if (vm) { 4098c2ecf20Sopenharmony_ci int i; 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE); 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { 4148c2ecf20Sopenharmony_ci /* 4158c2ecf20Sopenharmony_ci * If memcg_kmem_charge_page() fails, page->mem_cgroup 4168c2ecf20Sopenharmony_ci * pointer is NULL, and memcg_kmem_uncharge_page() in 4178c2ecf20Sopenharmony_ci * free_thread_stack() will ignore this page. 4188c2ecf20Sopenharmony_ci */ 4198c2ecf20Sopenharmony_ci ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 4208c2ecf20Sopenharmony_ci 0); 4218c2ecf20Sopenharmony_ci if (ret) 4228c2ecf20Sopenharmony_ci return ret; 4238c2ecf20Sopenharmony_ci } 4248c2ecf20Sopenharmony_ci } 4258c2ecf20Sopenharmony_ci#endif 4268c2ecf20Sopenharmony_ci return 0; 4278c2ecf20Sopenharmony_ci} 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_cistatic void release_task_stack(struct task_struct *tsk) 4308c2ecf20Sopenharmony_ci{ 4318c2ecf20Sopenharmony_ci if (WARN_ON(tsk->state != TASK_DEAD)) 4328c2ecf20Sopenharmony_ci return; /* Better to leak the stack than to free prematurely */ 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci account_kernel_stack(tsk, -1); 4358c2ecf20Sopenharmony_ci free_thread_stack(tsk); 4368c2ecf20Sopenharmony_ci tsk->stack = NULL; 4378c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK 4388c2ecf20Sopenharmony_ci tsk->stack_vm_area = NULL; 4398c2ecf20Sopenharmony_ci#endif 4408c2ecf20Sopenharmony_ci} 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci#ifdef CONFIG_THREAD_INFO_IN_TASK 4438c2ecf20Sopenharmony_civoid put_task_stack(struct task_struct *tsk) 4448c2ecf20Sopenharmony_ci{ 4458c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&tsk->stack_refcount)) 4468c2ecf20Sopenharmony_ci release_task_stack(tsk); 4478c2ecf20Sopenharmony_ci} 4488c2ecf20Sopenharmony_ci#endif 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_civoid free_task(struct task_struct *tsk) 4518c2ecf20Sopenharmony_ci{ 4528c2ecf20Sopenharmony_ci#ifdef CONFIG_SECCOMP 4538c2ecf20Sopenharmony_ci WARN_ON_ONCE(tsk->seccomp.filter); 4548c2ecf20Sopenharmony_ci#endif 4558c2ecf20Sopenharmony_ci scs_release(tsk); 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci#ifndef CONFIG_THREAD_INFO_IN_TASK 4588c2ecf20Sopenharmony_ci /* 4598c2ecf20Sopenharmony_ci * The task is finally done with both the stack and thread_info, 4608c2ecf20Sopenharmony_ci * so free both. 4618c2ecf20Sopenharmony_ci */ 4628c2ecf20Sopenharmony_ci release_task_stack(tsk); 4638c2ecf20Sopenharmony_ci#else 4648c2ecf20Sopenharmony_ci /* 4658c2ecf20Sopenharmony_ci * If the task had a separate stack allocation, it should be gone 4668c2ecf20Sopenharmony_ci * by now. 4678c2ecf20Sopenharmony_ci */ 4688c2ecf20Sopenharmony_ci WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0); 4698c2ecf20Sopenharmony_ci#endif 4708c2ecf20Sopenharmony_ci rt_mutex_debug_task_free(tsk); 4718c2ecf20Sopenharmony_ci ftrace_graph_exit_task(tsk); 4728c2ecf20Sopenharmony_ci arch_release_task_struct(tsk); 4738c2ecf20Sopenharmony_ci if (tsk->flags & PF_KTHREAD) 4748c2ecf20Sopenharmony_ci free_kthread_struct(tsk); 4758c2ecf20Sopenharmony_ci free_task_struct(tsk); 4768c2ecf20Sopenharmony_ci} 4778c2ecf20Sopenharmony_ciEXPORT_SYMBOL(free_task); 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci#ifdef CONFIG_MMU 4808c2ecf20Sopenharmony_cistatic __latent_entropy int dup_mmap(struct mm_struct *mm, 4818c2ecf20Sopenharmony_ci struct mm_struct *oldmm) 4828c2ecf20Sopenharmony_ci{ 4838c2ecf20Sopenharmony_ci struct vm_area_struct *mpnt, *tmp, *prev, **pprev; 4848c2ecf20Sopenharmony_ci struct rb_node **rb_link, *rb_parent; 4858c2ecf20Sopenharmony_ci int retval; 4868c2ecf20Sopenharmony_ci unsigned long charge; 4878c2ecf20Sopenharmony_ci LIST_HEAD(uf); 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci uprobe_start_dup_mmap(); 4908c2ecf20Sopenharmony_ci if (mmap_write_lock_killable(oldmm)) { 4918c2ecf20Sopenharmony_ci retval = -EINTR; 4928c2ecf20Sopenharmony_ci goto fail_uprobe_end; 4938c2ecf20Sopenharmony_ci } 4948c2ecf20Sopenharmony_ci flush_cache_dup_mm(oldmm); 4958c2ecf20Sopenharmony_ci uprobe_dup_mmap(oldmm, mm); 4968c2ecf20Sopenharmony_ci /* 4978c2ecf20Sopenharmony_ci * Not linked in yet - no deadlock potential: 4988c2ecf20Sopenharmony_ci */ 4998c2ecf20Sopenharmony_ci mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING); 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ci /* No ordering required: file already has been exposed. */ 5028c2ecf20Sopenharmony_ci RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci mm->total_vm = oldmm->total_vm; 5058c2ecf20Sopenharmony_ci mm->data_vm = oldmm->data_vm; 5068c2ecf20Sopenharmony_ci mm->exec_vm = oldmm->exec_vm; 5078c2ecf20Sopenharmony_ci mm->stack_vm = oldmm->stack_vm; 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci rb_link = &mm->mm_rb.rb_node; 5108c2ecf20Sopenharmony_ci rb_parent = NULL; 5118c2ecf20Sopenharmony_ci pprev = &mm->mmap; 5128c2ecf20Sopenharmony_ci retval = ksm_fork(mm, oldmm); 5138c2ecf20Sopenharmony_ci if (retval) 5148c2ecf20Sopenharmony_ci goto out; 5158c2ecf20Sopenharmony_ci retval = khugepaged_fork(mm, oldmm); 5168c2ecf20Sopenharmony_ci if (retval) 5178c2ecf20Sopenharmony_ci goto out; 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci prev = NULL; 5208c2ecf20Sopenharmony_ci for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 5218c2ecf20Sopenharmony_ci struct file *file; 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_ci if (mpnt->vm_flags & VM_DONTCOPY) { 5248c2ecf20Sopenharmony_ci vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt)); 5258c2ecf20Sopenharmony_ci continue; 5268c2ecf20Sopenharmony_ci } 5278c2ecf20Sopenharmony_ci charge = 0; 5288c2ecf20Sopenharmony_ci /* 5298c2ecf20Sopenharmony_ci * Don't duplicate many vmas if we've been oom-killed (for 5308c2ecf20Sopenharmony_ci * example) 5318c2ecf20Sopenharmony_ci */ 5328c2ecf20Sopenharmony_ci if (fatal_signal_pending(current)) { 5338c2ecf20Sopenharmony_ci retval = -EINTR; 5348c2ecf20Sopenharmony_ci goto out; 5358c2ecf20Sopenharmony_ci } 5368c2ecf20Sopenharmony_ci if (mpnt->vm_flags & VM_ACCOUNT) { 5378c2ecf20Sopenharmony_ci unsigned long len = vma_pages(mpnt); 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ 5408c2ecf20Sopenharmony_ci goto fail_nomem; 5418c2ecf20Sopenharmony_ci charge = len; 5428c2ecf20Sopenharmony_ci } 5438c2ecf20Sopenharmony_ci tmp = vm_area_dup(mpnt); 5448c2ecf20Sopenharmony_ci if (!tmp) 5458c2ecf20Sopenharmony_ci goto fail_nomem; 5468c2ecf20Sopenharmony_ci retval = vma_dup_policy(mpnt, tmp); 5478c2ecf20Sopenharmony_ci if (retval) 5488c2ecf20Sopenharmony_ci goto fail_nomem_policy; 5498c2ecf20Sopenharmony_ci tmp->vm_mm = mm; 5508c2ecf20Sopenharmony_ci retval = dup_userfaultfd(tmp, &uf); 5518c2ecf20Sopenharmony_ci if (retval) 5528c2ecf20Sopenharmony_ci goto fail_nomem_anon_vma_fork; 5538c2ecf20Sopenharmony_ci if (tmp->vm_flags & VM_WIPEONFORK) { 5548c2ecf20Sopenharmony_ci /* 5558c2ecf20Sopenharmony_ci * VM_WIPEONFORK gets a clean slate in the child. 5568c2ecf20Sopenharmony_ci * Don't prepare anon_vma until fault since we don't 5578c2ecf20Sopenharmony_ci * copy page for current vma. 5588c2ecf20Sopenharmony_ci */ 5598c2ecf20Sopenharmony_ci tmp->anon_vma = NULL; 5608c2ecf20Sopenharmony_ci } else if (anon_vma_fork(tmp, mpnt)) 5618c2ecf20Sopenharmony_ci goto fail_nomem_anon_vma_fork; 5628c2ecf20Sopenharmony_ci tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); 5638c2ecf20Sopenharmony_ci file = tmp->vm_file; 5648c2ecf20Sopenharmony_ci if (file) { 5658c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 5668c2ecf20Sopenharmony_ci struct address_space *mapping = file->f_mapping; 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci get_file(file); 5698c2ecf20Sopenharmony_ci if (tmp->vm_flags & VM_DENYWRITE) 5708c2ecf20Sopenharmony_ci put_write_access(inode); 5718c2ecf20Sopenharmony_ci i_mmap_lock_write(mapping); 5728c2ecf20Sopenharmony_ci if (tmp->vm_flags & VM_SHARED) 5738c2ecf20Sopenharmony_ci mapping_allow_writable(mapping); 5748c2ecf20Sopenharmony_ci flush_dcache_mmap_lock(mapping); 5758c2ecf20Sopenharmony_ci /* insert tmp into the share list, just after mpnt */ 5768c2ecf20Sopenharmony_ci vma_interval_tree_insert_after(tmp, mpnt, 5778c2ecf20Sopenharmony_ci &mapping->i_mmap); 5788c2ecf20Sopenharmony_ci flush_dcache_mmap_unlock(mapping); 5798c2ecf20Sopenharmony_ci i_mmap_unlock_write(mapping); 5808c2ecf20Sopenharmony_ci } 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci /* 5838c2ecf20Sopenharmony_ci * Clear hugetlb-related page reserves for children. This only 5848c2ecf20Sopenharmony_ci * affects MAP_PRIVATE mappings. Faults generated by the child 5858c2ecf20Sopenharmony_ci * are not guaranteed to succeed, even if read-only 5868c2ecf20Sopenharmony_ci */ 5878c2ecf20Sopenharmony_ci if (is_vm_hugetlb_page(tmp)) 5888c2ecf20Sopenharmony_ci reset_vma_resv_huge_pages(tmp); 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci /* 5918c2ecf20Sopenharmony_ci * Link in the new vma and copy the page table entries. 5928c2ecf20Sopenharmony_ci */ 5938c2ecf20Sopenharmony_ci *pprev = tmp; 5948c2ecf20Sopenharmony_ci pprev = &tmp->vm_next; 5958c2ecf20Sopenharmony_ci tmp->vm_prev = prev; 5968c2ecf20Sopenharmony_ci prev = tmp; 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci __vma_link_rb(mm, tmp, rb_link, rb_parent); 5998c2ecf20Sopenharmony_ci rb_link = &tmp->vm_rb.rb_right; 6008c2ecf20Sopenharmony_ci rb_parent = &tmp->vm_rb; 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci mm->map_count++; 6038c2ecf20Sopenharmony_ci if (!(tmp->vm_flags & VM_WIPEONFORK)) 6048c2ecf20Sopenharmony_ci retval = copy_page_range(tmp, mpnt); 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_ci if (tmp->vm_ops && tmp->vm_ops->open) 6078c2ecf20Sopenharmony_ci tmp->vm_ops->open(tmp); 6088c2ecf20Sopenharmony_ci 6098c2ecf20Sopenharmony_ci if (retval) 6108c2ecf20Sopenharmony_ci goto out; 6118c2ecf20Sopenharmony_ci } 6128c2ecf20Sopenharmony_ci /* a new mm has just been created */ 6138c2ecf20Sopenharmony_ci retval = arch_dup_mmap(oldmm, mm); 6148c2ecf20Sopenharmony_ciout: 6158c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 6168c2ecf20Sopenharmony_ci flush_tlb_mm(oldmm); 6178c2ecf20Sopenharmony_ci mmap_write_unlock(oldmm); 6188c2ecf20Sopenharmony_ci dup_userfaultfd_complete(&uf); 6198c2ecf20Sopenharmony_cifail_uprobe_end: 6208c2ecf20Sopenharmony_ci uprobe_end_dup_mmap(); 6218c2ecf20Sopenharmony_ci return retval; 6228c2ecf20Sopenharmony_cifail_nomem_anon_vma_fork: 6238c2ecf20Sopenharmony_ci mpol_put(vma_policy(tmp)); 6248c2ecf20Sopenharmony_cifail_nomem_policy: 6258c2ecf20Sopenharmony_ci vm_area_free(tmp); 6268c2ecf20Sopenharmony_cifail_nomem: 6278c2ecf20Sopenharmony_ci retval = -ENOMEM; 6288c2ecf20Sopenharmony_ci vm_unacct_memory(charge); 6298c2ecf20Sopenharmony_ci goto out; 6308c2ecf20Sopenharmony_ci} 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_cistatic inline int mm_alloc_pgd(struct mm_struct *mm) 6338c2ecf20Sopenharmony_ci{ 6348c2ecf20Sopenharmony_ci mm_init_uxpgd(mm); 6358c2ecf20Sopenharmony_ci mm->pgd = pgd_alloc(mm); 6368c2ecf20Sopenharmony_ci if (unlikely(!mm->pgd)) 6378c2ecf20Sopenharmony_ci return -ENOMEM; 6388c2ecf20Sopenharmony_ci return 0; 6398c2ecf20Sopenharmony_ci} 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_cistatic inline void mm_free_pgd(struct mm_struct *mm) 6428c2ecf20Sopenharmony_ci{ 6438c2ecf20Sopenharmony_ci pgd_free(mm, mm->pgd); 6448c2ecf20Sopenharmony_ci mm_clear_uxpgd(mm); 6458c2ecf20Sopenharmony_ci} 6468c2ecf20Sopenharmony_ci#else 6478c2ecf20Sopenharmony_cistatic int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) 6488c2ecf20Sopenharmony_ci{ 6498c2ecf20Sopenharmony_ci mmap_write_lock(oldmm); 6508c2ecf20Sopenharmony_ci RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm)); 6518c2ecf20Sopenharmony_ci mmap_write_unlock(oldmm); 6528c2ecf20Sopenharmony_ci return 0; 6538c2ecf20Sopenharmony_ci} 6548c2ecf20Sopenharmony_ci#define mm_alloc_pgd(mm) (0) 6558c2ecf20Sopenharmony_ci#define mm_free_pgd(mm) 6568c2ecf20Sopenharmony_ci#endif /* CONFIG_MMU */ 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_cistatic void check_mm(struct mm_struct *mm) 6598c2ecf20Sopenharmony_ci{ 6608c2ecf20Sopenharmony_ci int i; 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci BUILD_BUG_ON_MSG(ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS, 6638c2ecf20Sopenharmony_ci "Please make sure 'struct resident_page_types[]' is updated as well"); 6648c2ecf20Sopenharmony_ci 6658c2ecf20Sopenharmony_ci for (i = 0; i < NR_MM_COUNTERS; i++) { 6668c2ecf20Sopenharmony_ci long x = atomic_long_read(&mm->rss_stat.count[i]); 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci if (unlikely(x)) 6698c2ecf20Sopenharmony_ci pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n", 6708c2ecf20Sopenharmony_ci mm, resident_page_types[i], x); 6718c2ecf20Sopenharmony_ci } 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci if (mm_pgtables_bytes(mm)) 6748c2ecf20Sopenharmony_ci pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n", 6758c2ecf20Sopenharmony_ci mm_pgtables_bytes(mm)); 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS 6788c2ecf20Sopenharmony_ci VM_BUG_ON_MM(mm->pmd_huge_pte, mm); 6798c2ecf20Sopenharmony_ci#endif 6808c2ecf20Sopenharmony_ci} 6818c2ecf20Sopenharmony_ci 6828c2ecf20Sopenharmony_ci#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) 6838c2ecf20Sopenharmony_ci#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci/* 6868c2ecf20Sopenharmony_ci * Called when the last reference to the mm 6878c2ecf20Sopenharmony_ci * is dropped: either by a lazy thread or by 6888c2ecf20Sopenharmony_ci * mmput. Free the page directory and the mm. 6898c2ecf20Sopenharmony_ci */ 6908c2ecf20Sopenharmony_civoid __mmdrop(struct mm_struct *mm) 6918c2ecf20Sopenharmony_ci{ 6928c2ecf20Sopenharmony_ci BUG_ON(mm == &init_mm); 6938c2ecf20Sopenharmony_ci WARN_ON_ONCE(mm == current->mm); 6948c2ecf20Sopenharmony_ci WARN_ON_ONCE(mm == current->active_mm); 6958c2ecf20Sopenharmony_ci mm_free_pgd(mm); 6968c2ecf20Sopenharmony_ci destroy_context(mm); 6978c2ecf20Sopenharmony_ci mmu_notifier_subscriptions_destroy(mm); 6988c2ecf20Sopenharmony_ci check_mm(mm); 6998c2ecf20Sopenharmony_ci put_user_ns(mm->user_ns); 7008c2ecf20Sopenharmony_ci free_mm(mm); 7018c2ecf20Sopenharmony_ci} 7028c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__mmdrop); 7038c2ecf20Sopenharmony_ci 7048c2ecf20Sopenharmony_cistatic void mmdrop_async_fn(struct work_struct *work) 7058c2ecf20Sopenharmony_ci{ 7068c2ecf20Sopenharmony_ci struct mm_struct *mm; 7078c2ecf20Sopenharmony_ci 7088c2ecf20Sopenharmony_ci mm = container_of(work, struct mm_struct, async_put_work); 7098c2ecf20Sopenharmony_ci __mmdrop(mm); 7108c2ecf20Sopenharmony_ci} 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_cistatic void mmdrop_async(struct mm_struct *mm) 7138c2ecf20Sopenharmony_ci{ 7148c2ecf20Sopenharmony_ci if (unlikely(atomic_dec_and_test(&mm->mm_count))) { 7158c2ecf20Sopenharmony_ci INIT_WORK(&mm->async_put_work, mmdrop_async_fn); 7168c2ecf20Sopenharmony_ci schedule_work(&mm->async_put_work); 7178c2ecf20Sopenharmony_ci } 7188c2ecf20Sopenharmony_ci} 7198c2ecf20Sopenharmony_ci 7208c2ecf20Sopenharmony_cistatic inline void free_signal_struct(struct signal_struct *sig) 7218c2ecf20Sopenharmony_ci{ 7228c2ecf20Sopenharmony_ci taskstats_tgid_free(sig); 7238c2ecf20Sopenharmony_ci sched_autogroup_exit(sig); 7248c2ecf20Sopenharmony_ci /* 7258c2ecf20Sopenharmony_ci * __mmdrop is not safe to call from softirq context on x86 due to 7268c2ecf20Sopenharmony_ci * pgd_dtor so postpone it to the async context 7278c2ecf20Sopenharmony_ci */ 7288c2ecf20Sopenharmony_ci if (sig->oom_mm) 7298c2ecf20Sopenharmony_ci mmdrop_async(sig->oom_mm); 7308c2ecf20Sopenharmony_ci kmem_cache_free(signal_cachep, sig); 7318c2ecf20Sopenharmony_ci} 7328c2ecf20Sopenharmony_ci 7338c2ecf20Sopenharmony_cistatic inline void put_signal_struct(struct signal_struct *sig) 7348c2ecf20Sopenharmony_ci{ 7358c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&sig->sigcnt)) 7368c2ecf20Sopenharmony_ci free_signal_struct(sig); 7378c2ecf20Sopenharmony_ci} 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_civoid __put_task_struct(struct task_struct *tsk) 7408c2ecf20Sopenharmony_ci{ 7418c2ecf20Sopenharmony_ci WARN_ON(!tsk->exit_state); 7428c2ecf20Sopenharmony_ci WARN_ON(refcount_read(&tsk->usage)); 7438c2ecf20Sopenharmony_ci WARN_ON(tsk == current); 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci io_uring_free(tsk); 7468c2ecf20Sopenharmony_ci cgroup_free(tsk); 7478c2ecf20Sopenharmony_ci task_numa_free(tsk, true); 7488c2ecf20Sopenharmony_ci security_task_free(tsk); 7498c2ecf20Sopenharmony_ci exit_creds(tsk); 7508c2ecf20Sopenharmony_ci delayacct_tsk_free(tsk); 7518c2ecf20Sopenharmony_ci put_signal_struct(tsk->signal); 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_ci if (!profile_handoff_task(tsk)) 7548c2ecf20Sopenharmony_ci free_task(tsk); 7558c2ecf20Sopenharmony_ci} 7568c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__put_task_struct); 7578c2ecf20Sopenharmony_ci 7588c2ecf20Sopenharmony_civoid __put_task_struct_rcu_cb(struct rcu_head *rhp) 7598c2ecf20Sopenharmony_ci{ 7608c2ecf20Sopenharmony_ci struct task_struct *task = container_of(rhp, struct task_struct, rcu); 7618c2ecf20Sopenharmony_ci 7628c2ecf20Sopenharmony_ci __put_task_struct(task); 7638c2ecf20Sopenharmony_ci} 7648c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__put_task_struct_rcu_cb); 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_civoid __init __weak arch_task_cache_init(void) { } 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci/* 7698c2ecf20Sopenharmony_ci * set_max_threads 7708c2ecf20Sopenharmony_ci */ 7718c2ecf20Sopenharmony_cistatic void set_max_threads(unsigned int max_threads_suggested) 7728c2ecf20Sopenharmony_ci{ 7738c2ecf20Sopenharmony_ci u64 threads; 7748c2ecf20Sopenharmony_ci unsigned long nr_pages = totalram_pages(); 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_ci /* 7778c2ecf20Sopenharmony_ci * The number of threads shall be limited such that the thread 7788c2ecf20Sopenharmony_ci * structures may only consume a small part of the available memory. 7798c2ecf20Sopenharmony_ci */ 7808c2ecf20Sopenharmony_ci if (fls64(nr_pages) + fls64(PAGE_SIZE) > 64) 7818c2ecf20Sopenharmony_ci threads = MAX_THREADS; 7828c2ecf20Sopenharmony_ci else 7838c2ecf20Sopenharmony_ci threads = div64_u64((u64) nr_pages * (u64) PAGE_SIZE, 7848c2ecf20Sopenharmony_ci (u64) THREAD_SIZE * 8UL); 7858c2ecf20Sopenharmony_ci 7868c2ecf20Sopenharmony_ci if (threads > max_threads_suggested) 7878c2ecf20Sopenharmony_ci threads = max_threads_suggested; 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); 7908c2ecf20Sopenharmony_ci} 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT 7938c2ecf20Sopenharmony_ci/* Initialized by the architecture: */ 7948c2ecf20Sopenharmony_ciint arch_task_struct_size __read_mostly; 7958c2ecf20Sopenharmony_ci#endif 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_ci#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR 7988c2ecf20Sopenharmony_cistatic void task_struct_whitelist(unsigned long *offset, unsigned long *size) 7998c2ecf20Sopenharmony_ci{ 8008c2ecf20Sopenharmony_ci /* Fetch thread_struct whitelist for the architecture. */ 8018c2ecf20Sopenharmony_ci arch_thread_struct_whitelist(offset, size); 8028c2ecf20Sopenharmony_ci 8038c2ecf20Sopenharmony_ci /* 8048c2ecf20Sopenharmony_ci * Handle zero-sized whitelist or empty thread_struct, otherwise 8058c2ecf20Sopenharmony_ci * adjust offset to position of thread_struct in task_struct. 8068c2ecf20Sopenharmony_ci */ 8078c2ecf20Sopenharmony_ci if (unlikely(*size == 0)) 8088c2ecf20Sopenharmony_ci *offset = 0; 8098c2ecf20Sopenharmony_ci else 8108c2ecf20Sopenharmony_ci *offset += offsetof(struct task_struct, thread); 8118c2ecf20Sopenharmony_ci} 8128c2ecf20Sopenharmony_ci#endif /* CONFIG_ARCH_TASK_STRUCT_ALLOCATOR */ 8138c2ecf20Sopenharmony_ci 8148c2ecf20Sopenharmony_civoid __init fork_init(void) 8158c2ecf20Sopenharmony_ci{ 8168c2ecf20Sopenharmony_ci int i; 8178c2ecf20Sopenharmony_ci#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR 8188c2ecf20Sopenharmony_ci#ifndef ARCH_MIN_TASKALIGN 8198c2ecf20Sopenharmony_ci#define ARCH_MIN_TASKALIGN 0 8208c2ecf20Sopenharmony_ci#endif 8218c2ecf20Sopenharmony_ci int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); 8228c2ecf20Sopenharmony_ci unsigned long useroffset, usersize; 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci /* create a slab on which task_structs can be allocated */ 8258c2ecf20Sopenharmony_ci task_struct_whitelist(&useroffset, &usersize); 8268c2ecf20Sopenharmony_ci task_struct_cachep = kmem_cache_create_usercopy("task_struct", 8278c2ecf20Sopenharmony_ci arch_task_struct_size, align, 8288c2ecf20Sopenharmony_ci SLAB_PANIC|SLAB_ACCOUNT, 8298c2ecf20Sopenharmony_ci useroffset, usersize, NULL); 8308c2ecf20Sopenharmony_ci#endif 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_ci /* do the arch specific task caches init */ 8338c2ecf20Sopenharmony_ci arch_task_cache_init(); 8348c2ecf20Sopenharmony_ci 8358c2ecf20Sopenharmony_ci set_max_threads(MAX_THREADS); 8368c2ecf20Sopenharmony_ci 8378c2ecf20Sopenharmony_ci init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; 8388c2ecf20Sopenharmony_ci init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; 8398c2ecf20Sopenharmony_ci init_task.signal->rlim[RLIMIT_SIGPENDING] = 8408c2ecf20Sopenharmony_ci init_task.signal->rlim[RLIMIT_NPROC]; 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_ci for (i = 0; i < UCOUNT_COUNTS; i++) { 8438c2ecf20Sopenharmony_ci init_user_ns.ucount_max[i] = max_threads/2; 8448c2ecf20Sopenharmony_ci } 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK 8478c2ecf20Sopenharmony_ci cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", 8488c2ecf20Sopenharmony_ci NULL, free_vm_stack_cache); 8498c2ecf20Sopenharmony_ci#endif 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci scs_init(); 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci lockdep_init_task(&init_task); 8548c2ecf20Sopenharmony_ci uprobes_init(); 8558c2ecf20Sopenharmony_ci} 8568c2ecf20Sopenharmony_ci 8578c2ecf20Sopenharmony_ciint __weak arch_dup_task_struct(struct task_struct *dst, 8588c2ecf20Sopenharmony_ci struct task_struct *src) 8598c2ecf20Sopenharmony_ci{ 8608c2ecf20Sopenharmony_ci *dst = *src; 8618c2ecf20Sopenharmony_ci return 0; 8628c2ecf20Sopenharmony_ci} 8638c2ecf20Sopenharmony_ci 8648c2ecf20Sopenharmony_civoid set_task_stack_end_magic(struct task_struct *tsk) 8658c2ecf20Sopenharmony_ci{ 8668c2ecf20Sopenharmony_ci unsigned long *stackend; 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_ci stackend = end_of_stack(tsk); 8698c2ecf20Sopenharmony_ci *stackend = STACK_END_MAGIC; /* for overflow detection */ 8708c2ecf20Sopenharmony_ci} 8718c2ecf20Sopenharmony_ci 8728c2ecf20Sopenharmony_cistatic struct task_struct *dup_task_struct(struct task_struct *orig, int node) 8738c2ecf20Sopenharmony_ci{ 8748c2ecf20Sopenharmony_ci struct task_struct *tsk; 8758c2ecf20Sopenharmony_ci unsigned long *stack; 8768c2ecf20Sopenharmony_ci struct vm_struct *stack_vm_area __maybe_unused; 8778c2ecf20Sopenharmony_ci int err; 8788c2ecf20Sopenharmony_ci 8798c2ecf20Sopenharmony_ci if (node == NUMA_NO_NODE) 8808c2ecf20Sopenharmony_ci node = tsk_fork_get_node(orig); 8818c2ecf20Sopenharmony_ci tsk = alloc_task_struct_node(node); 8828c2ecf20Sopenharmony_ci if (!tsk) 8838c2ecf20Sopenharmony_ci return NULL; 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci stack = alloc_thread_stack_node(tsk, node); 8868c2ecf20Sopenharmony_ci if (!stack) 8878c2ecf20Sopenharmony_ci goto free_tsk; 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ci if (memcg_charge_kernel_stack(tsk)) 8908c2ecf20Sopenharmony_ci goto free_stack; 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ci stack_vm_area = task_stack_vm_area(tsk); 8938c2ecf20Sopenharmony_ci 8948c2ecf20Sopenharmony_ci err = arch_dup_task_struct(tsk, orig); 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci#ifdef CONFIG_ACCESS_TOKENID 8978c2ecf20Sopenharmony_ci tsk->token = orig->token; 8988c2ecf20Sopenharmony_ci tsk->ftoken = 0; 8998c2ecf20Sopenharmony_ci#endif 9008c2ecf20Sopenharmony_ci /* 9018c2ecf20Sopenharmony_ci * arch_dup_task_struct() clobbers the stack-related fields. Make 9028c2ecf20Sopenharmony_ci * sure they're properly initialized before using any stack-related 9038c2ecf20Sopenharmony_ci * functions again. 9048c2ecf20Sopenharmony_ci */ 9058c2ecf20Sopenharmony_ci tsk->stack = stack; 9068c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK 9078c2ecf20Sopenharmony_ci tsk->stack_vm_area = stack_vm_area; 9088c2ecf20Sopenharmony_ci#endif 9098c2ecf20Sopenharmony_ci#ifdef CONFIG_THREAD_INFO_IN_TASK 9108c2ecf20Sopenharmony_ci refcount_set(&tsk->stack_refcount, 1); 9118c2ecf20Sopenharmony_ci#endif 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_ci if (err) 9148c2ecf20Sopenharmony_ci goto free_stack; 9158c2ecf20Sopenharmony_ci 9168c2ecf20Sopenharmony_ci err = scs_prepare(tsk, node); 9178c2ecf20Sopenharmony_ci if (err) 9188c2ecf20Sopenharmony_ci goto free_stack; 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci#ifdef CONFIG_SECCOMP 9218c2ecf20Sopenharmony_ci /* 9228c2ecf20Sopenharmony_ci * We must handle setting up seccomp filters once we're under 9238c2ecf20Sopenharmony_ci * the sighand lock in case orig has changed between now and 9248c2ecf20Sopenharmony_ci * then. Until then, filter must be NULL to avoid messing up 9258c2ecf20Sopenharmony_ci * the usage counts on the error path calling free_task. 9268c2ecf20Sopenharmony_ci */ 9278c2ecf20Sopenharmony_ci tsk->seccomp.filter = NULL; 9288c2ecf20Sopenharmony_ci#endif 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci setup_thread_stack(tsk, orig); 9318c2ecf20Sopenharmony_ci clear_user_return_notifier(tsk); 9328c2ecf20Sopenharmony_ci clear_tsk_need_resched(tsk); 9338c2ecf20Sopenharmony_ci set_task_stack_end_magic(tsk); 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci#ifdef CONFIG_STACKPROTECTOR 9368c2ecf20Sopenharmony_ci tsk->stack_canary = get_random_canary(); 9378c2ecf20Sopenharmony_ci#endif 9388c2ecf20Sopenharmony_ci if (orig->cpus_ptr == &orig->cpus_mask) 9398c2ecf20Sopenharmony_ci tsk->cpus_ptr = &tsk->cpus_mask; 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_ci /* 9428c2ecf20Sopenharmony_ci * One for the user space visible state that goes away when reaped. 9438c2ecf20Sopenharmony_ci * One for the scheduler. 9448c2ecf20Sopenharmony_ci */ 9458c2ecf20Sopenharmony_ci refcount_set(&tsk->rcu_users, 2); 9468c2ecf20Sopenharmony_ci /* One for the rcu users */ 9478c2ecf20Sopenharmony_ci refcount_set(&tsk->usage, 1); 9488c2ecf20Sopenharmony_ci#ifdef CONFIG_BLK_DEV_IO_TRACE 9498c2ecf20Sopenharmony_ci tsk->btrace_seq = 0; 9508c2ecf20Sopenharmony_ci#endif 9518c2ecf20Sopenharmony_ci tsk->splice_pipe = NULL; 9528c2ecf20Sopenharmony_ci tsk->task_frag.page = NULL; 9538c2ecf20Sopenharmony_ci tsk->wake_q.next = NULL; 9548c2ecf20Sopenharmony_ci tsk->pf_io_worker = NULL; 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ci account_kernel_stack(tsk, 1); 9578c2ecf20Sopenharmony_ci 9588c2ecf20Sopenharmony_ci kcov_task_init(tsk); 9598c2ecf20Sopenharmony_ci 9608c2ecf20Sopenharmony_ci#ifdef CONFIG_FAULT_INJECTION 9618c2ecf20Sopenharmony_ci tsk->fail_nth = 0; 9628c2ecf20Sopenharmony_ci#endif 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci#ifdef CONFIG_BLK_CGROUP 9658c2ecf20Sopenharmony_ci tsk->throttle_queue = NULL; 9668c2ecf20Sopenharmony_ci tsk->use_memdelay = 0; 9678c2ecf20Sopenharmony_ci#endif 9688c2ecf20Sopenharmony_ci 9698c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG 9708c2ecf20Sopenharmony_ci tsk->active_memcg = NULL; 9718c2ecf20Sopenharmony_ci#endif 9728c2ecf20Sopenharmony_ci return tsk; 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_cifree_stack: 9758c2ecf20Sopenharmony_ci free_thread_stack(tsk); 9768c2ecf20Sopenharmony_cifree_tsk: 9778c2ecf20Sopenharmony_ci free_task_struct(tsk); 9788c2ecf20Sopenharmony_ci return NULL; 9798c2ecf20Sopenharmony_ci} 9808c2ecf20Sopenharmony_ci 9818c2ecf20Sopenharmony_ci__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); 9828c2ecf20Sopenharmony_ci 9838c2ecf20Sopenharmony_cistatic unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT; 9848c2ecf20Sopenharmony_ci 9858c2ecf20Sopenharmony_cistatic int __init coredump_filter_setup(char *s) 9868c2ecf20Sopenharmony_ci{ 9878c2ecf20Sopenharmony_ci default_dump_filter = 9888c2ecf20Sopenharmony_ci (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) & 9898c2ecf20Sopenharmony_ci MMF_DUMP_FILTER_MASK; 9908c2ecf20Sopenharmony_ci return 1; 9918c2ecf20Sopenharmony_ci} 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci__setup("coredump_filter=", coredump_filter_setup); 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci#include <linux/init_task.h> 9968c2ecf20Sopenharmony_ci 9978c2ecf20Sopenharmony_cistatic void mm_init_aio(struct mm_struct *mm) 9988c2ecf20Sopenharmony_ci{ 9998c2ecf20Sopenharmony_ci#ifdef CONFIG_AIO 10008c2ecf20Sopenharmony_ci spin_lock_init(&mm->ioctx_lock); 10018c2ecf20Sopenharmony_ci mm->ioctx_table = NULL; 10028c2ecf20Sopenharmony_ci#endif 10038c2ecf20Sopenharmony_ci} 10048c2ecf20Sopenharmony_ci 10058c2ecf20Sopenharmony_cistatic __always_inline void mm_clear_owner(struct mm_struct *mm, 10068c2ecf20Sopenharmony_ci struct task_struct *p) 10078c2ecf20Sopenharmony_ci{ 10088c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG 10098c2ecf20Sopenharmony_ci if (mm->owner == p) 10108c2ecf20Sopenharmony_ci WRITE_ONCE(mm->owner, NULL); 10118c2ecf20Sopenharmony_ci#endif 10128c2ecf20Sopenharmony_ci} 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_cistatic void mm_init_owner(struct mm_struct *mm, struct task_struct *p) 10158c2ecf20Sopenharmony_ci{ 10168c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG 10178c2ecf20Sopenharmony_ci mm->owner = p; 10188c2ecf20Sopenharmony_ci#endif 10198c2ecf20Sopenharmony_ci} 10208c2ecf20Sopenharmony_ci 10218c2ecf20Sopenharmony_cistatic void mm_init_pasid(struct mm_struct *mm) 10228c2ecf20Sopenharmony_ci{ 10238c2ecf20Sopenharmony_ci#ifdef CONFIG_IOMMU_SUPPORT 10248c2ecf20Sopenharmony_ci mm->pasid = INIT_PASID; 10258c2ecf20Sopenharmony_ci#endif 10268c2ecf20Sopenharmony_ci} 10278c2ecf20Sopenharmony_ci 10288c2ecf20Sopenharmony_cistatic void mm_init_uprobes_state(struct mm_struct *mm) 10298c2ecf20Sopenharmony_ci{ 10308c2ecf20Sopenharmony_ci#ifdef CONFIG_UPROBES 10318c2ecf20Sopenharmony_ci mm->uprobes_state.xol_area = NULL; 10328c2ecf20Sopenharmony_ci#endif 10338c2ecf20Sopenharmony_ci} 10348c2ecf20Sopenharmony_ci 10358c2ecf20Sopenharmony_cistatic struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, 10368c2ecf20Sopenharmony_ci struct user_namespace *user_ns) 10378c2ecf20Sopenharmony_ci{ 10388c2ecf20Sopenharmony_ci mm->mmap = NULL; 10398c2ecf20Sopenharmony_ci mm->mm_rb = RB_ROOT; 10408c2ecf20Sopenharmony_ci mm->vmacache_seqnum = 0; 10418c2ecf20Sopenharmony_ci#ifdef CONFIG_RSS_THRESHOLD 10428c2ecf20Sopenharmony_ci mm->rss_threshold = 0; 10438c2ecf20Sopenharmony_ci#endif 10448c2ecf20Sopenharmony_ci atomic_set(&mm->mm_users, 1); 10458c2ecf20Sopenharmony_ci atomic_set(&mm->mm_count, 1); 10468c2ecf20Sopenharmony_ci seqcount_init(&mm->write_protect_seq); 10478c2ecf20Sopenharmony_ci mmap_init_lock(mm); 10488c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mm->mmlist); 10498c2ecf20Sopenharmony_ci mm->core_state = NULL; 10508c2ecf20Sopenharmony_ci mm_pgtables_bytes_init(mm); 10518c2ecf20Sopenharmony_ci mm->map_count = 0; 10528c2ecf20Sopenharmony_ci mm->locked_vm = 0; 10538c2ecf20Sopenharmony_ci atomic_set(&mm->has_pinned, 0); 10548c2ecf20Sopenharmony_ci atomic64_set(&mm->pinned_vm, 0); 10558c2ecf20Sopenharmony_ci memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); 10568c2ecf20Sopenharmony_ci spin_lock_init(&mm->page_table_lock); 10578c2ecf20Sopenharmony_ci spin_lock_init(&mm->arg_lock); 10588c2ecf20Sopenharmony_ci mm_init_cpumask(mm); 10598c2ecf20Sopenharmony_ci mm_init_aio(mm); 10608c2ecf20Sopenharmony_ci mm_init_owner(mm, p); 10618c2ecf20Sopenharmony_ci mm_init_pasid(mm); 10628c2ecf20Sopenharmony_ci RCU_INIT_POINTER(mm->exe_file, NULL); 10638c2ecf20Sopenharmony_ci mmu_notifier_subscriptions_init(mm); 10648c2ecf20Sopenharmony_ci init_tlb_flush_pending(mm); 10658c2ecf20Sopenharmony_ci#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS 10668c2ecf20Sopenharmony_ci mm->pmd_huge_pte = NULL; 10678c2ecf20Sopenharmony_ci#endif 10688c2ecf20Sopenharmony_ci mm_init_uprobes_state(mm); 10698c2ecf20Sopenharmony_ci hugetlb_count_init(mm); 10708c2ecf20Sopenharmony_ci 10718c2ecf20Sopenharmony_ci if (current->mm) { 10728c2ecf20Sopenharmony_ci mm->flags = current->mm->flags & MMF_INIT_MASK; 10738c2ecf20Sopenharmony_ci mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; 10748c2ecf20Sopenharmony_ci } else { 10758c2ecf20Sopenharmony_ci mm->flags = default_dump_filter; 10768c2ecf20Sopenharmony_ci mm->def_flags = 0; 10778c2ecf20Sopenharmony_ci } 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_ci if (mm_alloc_pgd(mm)) 10808c2ecf20Sopenharmony_ci goto fail_nopgd; 10818c2ecf20Sopenharmony_ci 10828c2ecf20Sopenharmony_ci if (init_new_context(p, mm)) 10838c2ecf20Sopenharmony_ci goto fail_nocontext; 10848c2ecf20Sopenharmony_ci 10858c2ecf20Sopenharmony_ci mm->user_ns = get_user_ns(user_ns); 10868c2ecf20Sopenharmony_ci return mm; 10878c2ecf20Sopenharmony_ci 10888c2ecf20Sopenharmony_cifail_nocontext: 10898c2ecf20Sopenharmony_ci mm_free_pgd(mm); 10908c2ecf20Sopenharmony_cifail_nopgd: 10918c2ecf20Sopenharmony_ci free_mm(mm); 10928c2ecf20Sopenharmony_ci return NULL; 10938c2ecf20Sopenharmony_ci} 10948c2ecf20Sopenharmony_ci 10958c2ecf20Sopenharmony_ci/* 10968c2ecf20Sopenharmony_ci * Allocate and initialize an mm_struct. 10978c2ecf20Sopenharmony_ci */ 10988c2ecf20Sopenharmony_cistruct mm_struct *mm_alloc(void) 10998c2ecf20Sopenharmony_ci{ 11008c2ecf20Sopenharmony_ci struct mm_struct *mm; 11018c2ecf20Sopenharmony_ci 11028c2ecf20Sopenharmony_ci mm = allocate_mm(); 11038c2ecf20Sopenharmony_ci if (!mm) 11048c2ecf20Sopenharmony_ci return NULL; 11058c2ecf20Sopenharmony_ci 11068c2ecf20Sopenharmony_ci memset(mm, 0, sizeof(*mm)); 11078c2ecf20Sopenharmony_ci return mm_init(mm, current, current_user_ns()); 11088c2ecf20Sopenharmony_ci} 11098c2ecf20Sopenharmony_ci 11108c2ecf20Sopenharmony_cistatic inline void __mmput(struct mm_struct *mm) 11118c2ecf20Sopenharmony_ci{ 11128c2ecf20Sopenharmony_ci VM_BUG_ON(atomic_read(&mm->mm_users)); 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_ci uprobe_clear_state(mm); 11158c2ecf20Sopenharmony_ci exit_aio(mm); 11168c2ecf20Sopenharmony_ci ksm_exit(mm); 11178c2ecf20Sopenharmony_ci khugepaged_exit(mm); /* must run before exit_mmap */ 11188c2ecf20Sopenharmony_ci exit_mmap(mm); 11198c2ecf20Sopenharmony_ci mm_put_huge_zero_page(mm); 11208c2ecf20Sopenharmony_ci set_mm_exe_file(mm, NULL); 11218c2ecf20Sopenharmony_ci if (!list_empty(&mm->mmlist)) { 11228c2ecf20Sopenharmony_ci spin_lock(&mmlist_lock); 11238c2ecf20Sopenharmony_ci list_del(&mm->mmlist); 11248c2ecf20Sopenharmony_ci spin_unlock(&mmlist_lock); 11258c2ecf20Sopenharmony_ci } 11268c2ecf20Sopenharmony_ci if (mm->binfmt) 11278c2ecf20Sopenharmony_ci module_put(mm->binfmt->module); 11288c2ecf20Sopenharmony_ci mmdrop(mm); 11298c2ecf20Sopenharmony_ci} 11308c2ecf20Sopenharmony_ci 11318c2ecf20Sopenharmony_ci/* 11328c2ecf20Sopenharmony_ci * Decrement the use count and release all resources for an mm. 11338c2ecf20Sopenharmony_ci */ 11348c2ecf20Sopenharmony_civoid mmput(struct mm_struct *mm) 11358c2ecf20Sopenharmony_ci{ 11368c2ecf20Sopenharmony_ci might_sleep(); 11378c2ecf20Sopenharmony_ci 11388c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&mm->mm_users)) 11398c2ecf20Sopenharmony_ci __mmput(mm); 11408c2ecf20Sopenharmony_ci} 11418c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmput); 11428c2ecf20Sopenharmony_ci 11438c2ecf20Sopenharmony_ci#ifdef CONFIG_MMU 11448c2ecf20Sopenharmony_cistatic void mmput_async_fn(struct work_struct *work) 11458c2ecf20Sopenharmony_ci{ 11468c2ecf20Sopenharmony_ci struct mm_struct *mm = container_of(work, struct mm_struct, 11478c2ecf20Sopenharmony_ci async_put_work); 11488c2ecf20Sopenharmony_ci 11498c2ecf20Sopenharmony_ci __mmput(mm); 11508c2ecf20Sopenharmony_ci} 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_civoid mmput_async(struct mm_struct *mm) 11538c2ecf20Sopenharmony_ci{ 11548c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&mm->mm_users)) { 11558c2ecf20Sopenharmony_ci INIT_WORK(&mm->async_put_work, mmput_async_fn); 11568c2ecf20Sopenharmony_ci schedule_work(&mm->async_put_work); 11578c2ecf20Sopenharmony_ci } 11588c2ecf20Sopenharmony_ci} 11598c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmput_async); 11608c2ecf20Sopenharmony_ci#endif 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_ci/** 11638c2ecf20Sopenharmony_ci * set_mm_exe_file - change a reference to the mm's executable file 11648c2ecf20Sopenharmony_ci * 11658c2ecf20Sopenharmony_ci * This changes mm's executable file (shown as symlink /proc/[pid]/exe). 11668c2ecf20Sopenharmony_ci * 11678c2ecf20Sopenharmony_ci * Main users are mmput() and sys_execve(). Callers prevent concurrent 11688c2ecf20Sopenharmony_ci * invocations: in mmput() nobody alive left, in execve task is single 11698c2ecf20Sopenharmony_ci * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the 11708c2ecf20Sopenharmony_ci * mm->exe_file, but does so without using set_mm_exe_file() in order 11718c2ecf20Sopenharmony_ci * to do avoid the need for any locks. 11728c2ecf20Sopenharmony_ci */ 11738c2ecf20Sopenharmony_civoid set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) 11748c2ecf20Sopenharmony_ci{ 11758c2ecf20Sopenharmony_ci struct file *old_exe_file; 11768c2ecf20Sopenharmony_ci 11778c2ecf20Sopenharmony_ci /* 11788c2ecf20Sopenharmony_ci * It is safe to dereference the exe_file without RCU as 11798c2ecf20Sopenharmony_ci * this function is only called if nobody else can access 11808c2ecf20Sopenharmony_ci * this mm -- see comment above for justification. 11818c2ecf20Sopenharmony_ci */ 11828c2ecf20Sopenharmony_ci old_exe_file = rcu_dereference_raw(mm->exe_file); 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci if (new_exe_file) 11858c2ecf20Sopenharmony_ci get_file(new_exe_file); 11868c2ecf20Sopenharmony_ci rcu_assign_pointer(mm->exe_file, new_exe_file); 11878c2ecf20Sopenharmony_ci if (old_exe_file) 11888c2ecf20Sopenharmony_ci fput(old_exe_file); 11898c2ecf20Sopenharmony_ci} 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ci/** 11928c2ecf20Sopenharmony_ci * get_mm_exe_file - acquire a reference to the mm's executable file 11938c2ecf20Sopenharmony_ci * 11948c2ecf20Sopenharmony_ci * Returns %NULL if mm has no associated executable file. 11958c2ecf20Sopenharmony_ci * User must release file via fput(). 11968c2ecf20Sopenharmony_ci */ 11978c2ecf20Sopenharmony_cistruct file *get_mm_exe_file(struct mm_struct *mm) 11988c2ecf20Sopenharmony_ci{ 11998c2ecf20Sopenharmony_ci struct file *exe_file; 12008c2ecf20Sopenharmony_ci 12018c2ecf20Sopenharmony_ci rcu_read_lock(); 12028c2ecf20Sopenharmony_ci exe_file = rcu_dereference(mm->exe_file); 12038c2ecf20Sopenharmony_ci if (exe_file && !get_file_rcu(exe_file)) 12048c2ecf20Sopenharmony_ci exe_file = NULL; 12058c2ecf20Sopenharmony_ci rcu_read_unlock(); 12068c2ecf20Sopenharmony_ci return exe_file; 12078c2ecf20Sopenharmony_ci} 12088c2ecf20Sopenharmony_ciEXPORT_SYMBOL(get_mm_exe_file); 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci/** 12118c2ecf20Sopenharmony_ci * get_task_exe_file - acquire a reference to the task's executable file 12128c2ecf20Sopenharmony_ci * 12138c2ecf20Sopenharmony_ci * Returns %NULL if task's mm (if any) has no associated executable file or 12148c2ecf20Sopenharmony_ci * this is a kernel thread with borrowed mm (see the comment above get_task_mm). 12158c2ecf20Sopenharmony_ci * User must release file via fput(). 12168c2ecf20Sopenharmony_ci */ 12178c2ecf20Sopenharmony_cistruct file *get_task_exe_file(struct task_struct *task) 12188c2ecf20Sopenharmony_ci{ 12198c2ecf20Sopenharmony_ci struct file *exe_file = NULL; 12208c2ecf20Sopenharmony_ci struct mm_struct *mm; 12218c2ecf20Sopenharmony_ci 12228c2ecf20Sopenharmony_ci task_lock(task); 12238c2ecf20Sopenharmony_ci mm = task->mm; 12248c2ecf20Sopenharmony_ci if (mm) { 12258c2ecf20Sopenharmony_ci if (!(task->flags & PF_KTHREAD)) 12268c2ecf20Sopenharmony_ci exe_file = get_mm_exe_file(mm); 12278c2ecf20Sopenharmony_ci } 12288c2ecf20Sopenharmony_ci task_unlock(task); 12298c2ecf20Sopenharmony_ci return exe_file; 12308c2ecf20Sopenharmony_ci} 12318c2ecf20Sopenharmony_ciEXPORT_SYMBOL(get_task_exe_file); 12328c2ecf20Sopenharmony_ci 12338c2ecf20Sopenharmony_ci/** 12348c2ecf20Sopenharmony_ci * get_task_mm - acquire a reference to the task's mm 12358c2ecf20Sopenharmony_ci * 12368c2ecf20Sopenharmony_ci * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning 12378c2ecf20Sopenharmony_ci * this kernel workthread has transiently adopted a user mm with use_mm, 12388c2ecf20Sopenharmony_ci * to do its AIO) is not set and if so returns a reference to it, after 12398c2ecf20Sopenharmony_ci * bumping up the use count. User must release the mm via mmput() 12408c2ecf20Sopenharmony_ci * after use. Typically used by /proc and ptrace. 12418c2ecf20Sopenharmony_ci */ 12428c2ecf20Sopenharmony_cistruct mm_struct *get_task_mm(struct task_struct *task) 12438c2ecf20Sopenharmony_ci{ 12448c2ecf20Sopenharmony_ci struct mm_struct *mm; 12458c2ecf20Sopenharmony_ci 12468c2ecf20Sopenharmony_ci task_lock(task); 12478c2ecf20Sopenharmony_ci mm = task->mm; 12488c2ecf20Sopenharmony_ci if (mm) { 12498c2ecf20Sopenharmony_ci if (task->flags & PF_KTHREAD) 12508c2ecf20Sopenharmony_ci mm = NULL; 12518c2ecf20Sopenharmony_ci else 12528c2ecf20Sopenharmony_ci mmget(mm); 12538c2ecf20Sopenharmony_ci } 12548c2ecf20Sopenharmony_ci task_unlock(task); 12558c2ecf20Sopenharmony_ci return mm; 12568c2ecf20Sopenharmony_ci} 12578c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(get_task_mm); 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_cistruct mm_struct *mm_access(struct task_struct *task, unsigned int mode) 12608c2ecf20Sopenharmony_ci{ 12618c2ecf20Sopenharmony_ci struct mm_struct *mm; 12628c2ecf20Sopenharmony_ci int err; 12638c2ecf20Sopenharmony_ci 12648c2ecf20Sopenharmony_ci err = down_read_killable(&task->signal->exec_update_lock); 12658c2ecf20Sopenharmony_ci if (err) 12668c2ecf20Sopenharmony_ci return ERR_PTR(err); 12678c2ecf20Sopenharmony_ci 12688c2ecf20Sopenharmony_ci mm = get_task_mm(task); 12698c2ecf20Sopenharmony_ci if (mm && mm != current->mm && 12708c2ecf20Sopenharmony_ci !ptrace_may_access(task, mode)) { 12718c2ecf20Sopenharmony_ci mmput(mm); 12728c2ecf20Sopenharmony_ci mm = ERR_PTR(-EACCES); 12738c2ecf20Sopenharmony_ci } 12748c2ecf20Sopenharmony_ci up_read(&task->signal->exec_update_lock); 12758c2ecf20Sopenharmony_ci 12768c2ecf20Sopenharmony_ci return mm; 12778c2ecf20Sopenharmony_ci} 12788c2ecf20Sopenharmony_ci 12798c2ecf20Sopenharmony_cistatic void complete_vfork_done(struct task_struct *tsk) 12808c2ecf20Sopenharmony_ci{ 12818c2ecf20Sopenharmony_ci struct completion *vfork; 12828c2ecf20Sopenharmony_ci 12838c2ecf20Sopenharmony_ci task_lock(tsk); 12848c2ecf20Sopenharmony_ci vfork = tsk->vfork_done; 12858c2ecf20Sopenharmony_ci if (likely(vfork)) { 12868c2ecf20Sopenharmony_ci tsk->vfork_done = NULL; 12878c2ecf20Sopenharmony_ci complete(vfork); 12888c2ecf20Sopenharmony_ci } 12898c2ecf20Sopenharmony_ci task_unlock(tsk); 12908c2ecf20Sopenharmony_ci} 12918c2ecf20Sopenharmony_ci 12928c2ecf20Sopenharmony_cistatic int wait_for_vfork_done(struct task_struct *child, 12938c2ecf20Sopenharmony_ci struct completion *vfork) 12948c2ecf20Sopenharmony_ci{ 12958c2ecf20Sopenharmony_ci int killed; 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_ci freezer_do_not_count(); 12988c2ecf20Sopenharmony_ci cgroup_enter_frozen(); 12998c2ecf20Sopenharmony_ci killed = wait_for_completion_killable(vfork); 13008c2ecf20Sopenharmony_ci cgroup_leave_frozen(false); 13018c2ecf20Sopenharmony_ci freezer_count(); 13028c2ecf20Sopenharmony_ci 13038c2ecf20Sopenharmony_ci if (killed) { 13048c2ecf20Sopenharmony_ci task_lock(child); 13058c2ecf20Sopenharmony_ci child->vfork_done = NULL; 13068c2ecf20Sopenharmony_ci task_unlock(child); 13078c2ecf20Sopenharmony_ci } 13088c2ecf20Sopenharmony_ci 13098c2ecf20Sopenharmony_ci put_task_struct(child); 13108c2ecf20Sopenharmony_ci return killed; 13118c2ecf20Sopenharmony_ci} 13128c2ecf20Sopenharmony_ci 13138c2ecf20Sopenharmony_ci/* Please note the differences between mmput and mm_release. 13148c2ecf20Sopenharmony_ci * mmput is called whenever we stop holding onto a mm_struct, 13158c2ecf20Sopenharmony_ci * error success whatever. 13168c2ecf20Sopenharmony_ci * 13178c2ecf20Sopenharmony_ci * mm_release is called after a mm_struct has been removed 13188c2ecf20Sopenharmony_ci * from the current process. 13198c2ecf20Sopenharmony_ci * 13208c2ecf20Sopenharmony_ci * This difference is important for error handling, when we 13218c2ecf20Sopenharmony_ci * only half set up a mm_struct for a new process and need to restore 13228c2ecf20Sopenharmony_ci * the old one. Because we mmput the new mm_struct before 13238c2ecf20Sopenharmony_ci * restoring the old one. . . 13248c2ecf20Sopenharmony_ci * Eric Biederman 10 January 1998 13258c2ecf20Sopenharmony_ci */ 13268c2ecf20Sopenharmony_cistatic void mm_release(struct task_struct *tsk, struct mm_struct *mm) 13278c2ecf20Sopenharmony_ci{ 13288c2ecf20Sopenharmony_ci uprobe_free_utask(tsk); 13298c2ecf20Sopenharmony_ci 13308c2ecf20Sopenharmony_ci /* Get rid of any cached register state */ 13318c2ecf20Sopenharmony_ci deactivate_mm(tsk, mm); 13328c2ecf20Sopenharmony_ci 13338c2ecf20Sopenharmony_ci /* 13348c2ecf20Sopenharmony_ci * Signal userspace if we're not exiting with a core dump 13358c2ecf20Sopenharmony_ci * because we want to leave the value intact for debugging 13368c2ecf20Sopenharmony_ci * purposes. 13378c2ecf20Sopenharmony_ci */ 13388c2ecf20Sopenharmony_ci if (tsk->clear_child_tid) { 13398c2ecf20Sopenharmony_ci if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) && 13408c2ecf20Sopenharmony_ci atomic_read(&mm->mm_users) > 1) { 13418c2ecf20Sopenharmony_ci /* 13428c2ecf20Sopenharmony_ci * We don't check the error code - if userspace has 13438c2ecf20Sopenharmony_ci * not set up a proper pointer then tough luck. 13448c2ecf20Sopenharmony_ci */ 13458c2ecf20Sopenharmony_ci put_user(0, tsk->clear_child_tid); 13468c2ecf20Sopenharmony_ci do_futex(tsk->clear_child_tid, FUTEX_WAKE, 13478c2ecf20Sopenharmony_ci 1, NULL, NULL, 0, 0); 13488c2ecf20Sopenharmony_ci } 13498c2ecf20Sopenharmony_ci tsk->clear_child_tid = NULL; 13508c2ecf20Sopenharmony_ci } 13518c2ecf20Sopenharmony_ci 13528c2ecf20Sopenharmony_ci /* 13538c2ecf20Sopenharmony_ci * All done, finally we can wake up parent and return this mm to him. 13548c2ecf20Sopenharmony_ci * Also kthread_stop() uses this completion for synchronization. 13558c2ecf20Sopenharmony_ci */ 13568c2ecf20Sopenharmony_ci if (tsk->vfork_done) 13578c2ecf20Sopenharmony_ci complete_vfork_done(tsk); 13588c2ecf20Sopenharmony_ci} 13598c2ecf20Sopenharmony_ci 13608c2ecf20Sopenharmony_civoid exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) 13618c2ecf20Sopenharmony_ci{ 13628c2ecf20Sopenharmony_ci futex_exit_release(tsk); 13638c2ecf20Sopenharmony_ci mm_release(tsk, mm); 13648c2ecf20Sopenharmony_ci} 13658c2ecf20Sopenharmony_ci 13668c2ecf20Sopenharmony_civoid exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) 13678c2ecf20Sopenharmony_ci{ 13688c2ecf20Sopenharmony_ci futex_exec_release(tsk); 13698c2ecf20Sopenharmony_ci mm_release(tsk, mm); 13708c2ecf20Sopenharmony_ci} 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_ci/** 13738c2ecf20Sopenharmony_ci * dup_mm() - duplicates an existing mm structure 13748c2ecf20Sopenharmony_ci * @tsk: the task_struct with which the new mm will be associated. 13758c2ecf20Sopenharmony_ci * @oldmm: the mm to duplicate. 13768c2ecf20Sopenharmony_ci * 13778c2ecf20Sopenharmony_ci * Allocates a new mm structure and duplicates the provided @oldmm structure 13788c2ecf20Sopenharmony_ci * content into it. 13798c2ecf20Sopenharmony_ci * 13808c2ecf20Sopenharmony_ci * Return: the duplicated mm or NULL on failure. 13818c2ecf20Sopenharmony_ci */ 13828c2ecf20Sopenharmony_cistatic struct mm_struct *dup_mm(struct task_struct *tsk, 13838c2ecf20Sopenharmony_ci struct mm_struct *oldmm) 13848c2ecf20Sopenharmony_ci{ 13858c2ecf20Sopenharmony_ci struct mm_struct *mm; 13868c2ecf20Sopenharmony_ci int err; 13878c2ecf20Sopenharmony_ci 13888c2ecf20Sopenharmony_ci mm = allocate_mm(); 13898c2ecf20Sopenharmony_ci if (!mm) 13908c2ecf20Sopenharmony_ci goto fail_nomem; 13918c2ecf20Sopenharmony_ci 13928c2ecf20Sopenharmony_ci memcpy(mm, oldmm, sizeof(*mm)); 13938c2ecf20Sopenharmony_ci 13948c2ecf20Sopenharmony_ci if (!mm_init(mm, tsk, mm->user_ns)) 13958c2ecf20Sopenharmony_ci goto fail_nomem; 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci err = dup_mmap(mm, oldmm); 13988c2ecf20Sopenharmony_ci if (err) 13998c2ecf20Sopenharmony_ci goto free_pt; 14008c2ecf20Sopenharmony_ci 14018c2ecf20Sopenharmony_ci mm->hiwater_rss = get_mm_rss(mm); 14028c2ecf20Sopenharmony_ci mm->hiwater_vm = mm->total_vm; 14038c2ecf20Sopenharmony_ci 14048c2ecf20Sopenharmony_ci if (mm->binfmt && !try_module_get(mm->binfmt->module)) 14058c2ecf20Sopenharmony_ci goto free_pt; 14068c2ecf20Sopenharmony_ci 14078c2ecf20Sopenharmony_ci return mm; 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_cifree_pt: 14108c2ecf20Sopenharmony_ci /* don't put binfmt in mmput, we haven't got module yet */ 14118c2ecf20Sopenharmony_ci mm->binfmt = NULL; 14128c2ecf20Sopenharmony_ci mm_init_owner(mm, NULL); 14138c2ecf20Sopenharmony_ci mmput(mm); 14148c2ecf20Sopenharmony_ci 14158c2ecf20Sopenharmony_cifail_nomem: 14168c2ecf20Sopenharmony_ci return NULL; 14178c2ecf20Sopenharmony_ci} 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_cistatic int copy_mm(unsigned long clone_flags, struct task_struct *tsk) 14208c2ecf20Sopenharmony_ci{ 14218c2ecf20Sopenharmony_ci struct mm_struct *mm, *oldmm; 14228c2ecf20Sopenharmony_ci int retval; 14238c2ecf20Sopenharmony_ci 14248c2ecf20Sopenharmony_ci tsk->min_flt = tsk->maj_flt = 0; 14258c2ecf20Sopenharmony_ci tsk->nvcsw = tsk->nivcsw = 0; 14268c2ecf20Sopenharmony_ci#ifdef CONFIG_DETECT_HUNG_TASK 14278c2ecf20Sopenharmony_ci tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; 14288c2ecf20Sopenharmony_ci tsk->last_switch_time = 0; 14298c2ecf20Sopenharmony_ci#endif 14308c2ecf20Sopenharmony_ci 14318c2ecf20Sopenharmony_ci tsk->mm = NULL; 14328c2ecf20Sopenharmony_ci tsk->active_mm = NULL; 14338c2ecf20Sopenharmony_ci 14348c2ecf20Sopenharmony_ci /* 14358c2ecf20Sopenharmony_ci * Are we cloning a kernel thread? 14368c2ecf20Sopenharmony_ci * 14378c2ecf20Sopenharmony_ci * We need to steal a active VM for that.. 14388c2ecf20Sopenharmony_ci */ 14398c2ecf20Sopenharmony_ci oldmm = current->mm; 14408c2ecf20Sopenharmony_ci if (!oldmm) 14418c2ecf20Sopenharmony_ci return 0; 14428c2ecf20Sopenharmony_ci 14438c2ecf20Sopenharmony_ci /* initialize the new vmacache entries */ 14448c2ecf20Sopenharmony_ci vmacache_flush(tsk); 14458c2ecf20Sopenharmony_ci 14468c2ecf20Sopenharmony_ci if (clone_flags & CLONE_VM) { 14478c2ecf20Sopenharmony_ci mmget(oldmm); 14488c2ecf20Sopenharmony_ci mm = oldmm; 14498c2ecf20Sopenharmony_ci goto good_mm; 14508c2ecf20Sopenharmony_ci } 14518c2ecf20Sopenharmony_ci 14528c2ecf20Sopenharmony_ci retval = -ENOMEM; 14538c2ecf20Sopenharmony_ci mm = dup_mm(tsk, current->mm); 14548c2ecf20Sopenharmony_ci if (!mm) 14558c2ecf20Sopenharmony_ci goto fail_nomem; 14568c2ecf20Sopenharmony_ci 14578c2ecf20Sopenharmony_cigood_mm: 14588c2ecf20Sopenharmony_ci tsk->mm = mm; 14598c2ecf20Sopenharmony_ci tsk->active_mm = mm; 14608c2ecf20Sopenharmony_ci return 0; 14618c2ecf20Sopenharmony_ci 14628c2ecf20Sopenharmony_cifail_nomem: 14638c2ecf20Sopenharmony_ci return retval; 14648c2ecf20Sopenharmony_ci} 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_cistatic int copy_fs(unsigned long clone_flags, struct task_struct *tsk) 14678c2ecf20Sopenharmony_ci{ 14688c2ecf20Sopenharmony_ci struct fs_struct *fs = current->fs; 14698c2ecf20Sopenharmony_ci if (clone_flags & CLONE_FS) { 14708c2ecf20Sopenharmony_ci /* tsk->fs is already what we want */ 14718c2ecf20Sopenharmony_ci spin_lock(&fs->lock); 14728c2ecf20Sopenharmony_ci if (fs->in_exec) { 14738c2ecf20Sopenharmony_ci spin_unlock(&fs->lock); 14748c2ecf20Sopenharmony_ci return -EAGAIN; 14758c2ecf20Sopenharmony_ci } 14768c2ecf20Sopenharmony_ci fs->users++; 14778c2ecf20Sopenharmony_ci spin_unlock(&fs->lock); 14788c2ecf20Sopenharmony_ci return 0; 14798c2ecf20Sopenharmony_ci } 14808c2ecf20Sopenharmony_ci tsk->fs = copy_fs_struct(fs); 14818c2ecf20Sopenharmony_ci if (!tsk->fs) 14828c2ecf20Sopenharmony_ci return -ENOMEM; 14838c2ecf20Sopenharmony_ci return 0; 14848c2ecf20Sopenharmony_ci} 14858c2ecf20Sopenharmony_ci 14868c2ecf20Sopenharmony_cistatic int copy_files(unsigned long clone_flags, struct task_struct *tsk) 14878c2ecf20Sopenharmony_ci{ 14888c2ecf20Sopenharmony_ci struct files_struct *oldf, *newf; 14898c2ecf20Sopenharmony_ci int error = 0; 14908c2ecf20Sopenharmony_ci 14918c2ecf20Sopenharmony_ci /* 14928c2ecf20Sopenharmony_ci * A background process may not have any files ... 14938c2ecf20Sopenharmony_ci */ 14948c2ecf20Sopenharmony_ci oldf = current->files; 14958c2ecf20Sopenharmony_ci if (!oldf) 14968c2ecf20Sopenharmony_ci goto out; 14978c2ecf20Sopenharmony_ci 14988c2ecf20Sopenharmony_ci if (clone_flags & CLONE_FILES) { 14998c2ecf20Sopenharmony_ci atomic_inc(&oldf->count); 15008c2ecf20Sopenharmony_ci goto out; 15018c2ecf20Sopenharmony_ci } 15028c2ecf20Sopenharmony_ci 15038c2ecf20Sopenharmony_ci newf = dup_fd(oldf, NR_OPEN_MAX, &error); 15048c2ecf20Sopenharmony_ci if (!newf) 15058c2ecf20Sopenharmony_ci goto out; 15068c2ecf20Sopenharmony_ci 15078c2ecf20Sopenharmony_ci tsk->files = newf; 15088c2ecf20Sopenharmony_ci error = 0; 15098c2ecf20Sopenharmony_ciout: 15108c2ecf20Sopenharmony_ci return error; 15118c2ecf20Sopenharmony_ci} 15128c2ecf20Sopenharmony_ci 15138c2ecf20Sopenharmony_cistatic int copy_io(unsigned long clone_flags, struct task_struct *tsk) 15148c2ecf20Sopenharmony_ci{ 15158c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK 15168c2ecf20Sopenharmony_ci struct io_context *ioc = current->io_context; 15178c2ecf20Sopenharmony_ci struct io_context *new_ioc; 15188c2ecf20Sopenharmony_ci 15198c2ecf20Sopenharmony_ci if (!ioc) 15208c2ecf20Sopenharmony_ci return 0; 15218c2ecf20Sopenharmony_ci /* 15228c2ecf20Sopenharmony_ci * Share io context with parent, if CLONE_IO is set 15238c2ecf20Sopenharmony_ci */ 15248c2ecf20Sopenharmony_ci if (clone_flags & CLONE_IO) { 15258c2ecf20Sopenharmony_ci ioc_task_link(ioc); 15268c2ecf20Sopenharmony_ci tsk->io_context = ioc; 15278c2ecf20Sopenharmony_ci } else if (ioprio_valid(ioc->ioprio)) { 15288c2ecf20Sopenharmony_ci new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); 15298c2ecf20Sopenharmony_ci if (unlikely(!new_ioc)) 15308c2ecf20Sopenharmony_ci return -ENOMEM; 15318c2ecf20Sopenharmony_ci 15328c2ecf20Sopenharmony_ci new_ioc->ioprio = ioc->ioprio; 15338c2ecf20Sopenharmony_ci put_io_context(new_ioc); 15348c2ecf20Sopenharmony_ci } 15358c2ecf20Sopenharmony_ci#endif 15368c2ecf20Sopenharmony_ci return 0; 15378c2ecf20Sopenharmony_ci} 15388c2ecf20Sopenharmony_ci 15398c2ecf20Sopenharmony_cistatic int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) 15408c2ecf20Sopenharmony_ci{ 15418c2ecf20Sopenharmony_ci struct sighand_struct *sig; 15428c2ecf20Sopenharmony_ci 15438c2ecf20Sopenharmony_ci if (clone_flags & CLONE_SIGHAND) { 15448c2ecf20Sopenharmony_ci refcount_inc(¤t->sighand->count); 15458c2ecf20Sopenharmony_ci return 0; 15468c2ecf20Sopenharmony_ci } 15478c2ecf20Sopenharmony_ci sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); 15488c2ecf20Sopenharmony_ci RCU_INIT_POINTER(tsk->sighand, sig); 15498c2ecf20Sopenharmony_ci if (!sig) 15508c2ecf20Sopenharmony_ci return -ENOMEM; 15518c2ecf20Sopenharmony_ci 15528c2ecf20Sopenharmony_ci refcount_set(&sig->count, 1); 15538c2ecf20Sopenharmony_ci spin_lock_irq(¤t->sighand->siglock); 15548c2ecf20Sopenharmony_ci memcpy(sig->action, current->sighand->action, sizeof(sig->action)); 15558c2ecf20Sopenharmony_ci spin_unlock_irq(¤t->sighand->siglock); 15568c2ecf20Sopenharmony_ci 15578c2ecf20Sopenharmony_ci /* Reset all signal handler not set to SIG_IGN to SIG_DFL. */ 15588c2ecf20Sopenharmony_ci if (clone_flags & CLONE_CLEAR_SIGHAND) 15598c2ecf20Sopenharmony_ci flush_signal_handlers(tsk, 0); 15608c2ecf20Sopenharmony_ci 15618c2ecf20Sopenharmony_ci return 0; 15628c2ecf20Sopenharmony_ci} 15638c2ecf20Sopenharmony_ci 15648c2ecf20Sopenharmony_civoid __cleanup_sighand(struct sighand_struct *sighand) 15658c2ecf20Sopenharmony_ci{ 15668c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&sighand->count)) { 15678c2ecf20Sopenharmony_ci signalfd_cleanup(sighand); 15688c2ecf20Sopenharmony_ci /* 15698c2ecf20Sopenharmony_ci * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it 15708c2ecf20Sopenharmony_ci * without an RCU grace period, see __lock_task_sighand(). 15718c2ecf20Sopenharmony_ci */ 15728c2ecf20Sopenharmony_ci kmem_cache_free(sighand_cachep, sighand); 15738c2ecf20Sopenharmony_ci } 15748c2ecf20Sopenharmony_ci} 15758c2ecf20Sopenharmony_ci 15768c2ecf20Sopenharmony_ci/* 15778c2ecf20Sopenharmony_ci * Initialize POSIX timer handling for a thread group. 15788c2ecf20Sopenharmony_ci */ 15798c2ecf20Sopenharmony_cistatic void posix_cpu_timers_init_group(struct signal_struct *sig) 15808c2ecf20Sopenharmony_ci{ 15818c2ecf20Sopenharmony_ci struct posix_cputimers *pct = &sig->posix_cputimers; 15828c2ecf20Sopenharmony_ci unsigned long cpu_limit; 15838c2ecf20Sopenharmony_ci 15848c2ecf20Sopenharmony_ci cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); 15858c2ecf20Sopenharmony_ci posix_cputimers_group_init(pct, cpu_limit); 15868c2ecf20Sopenharmony_ci} 15878c2ecf20Sopenharmony_ci 15888c2ecf20Sopenharmony_cistatic int copy_signal(unsigned long clone_flags, struct task_struct *tsk) 15898c2ecf20Sopenharmony_ci{ 15908c2ecf20Sopenharmony_ci struct signal_struct *sig; 15918c2ecf20Sopenharmony_ci 15928c2ecf20Sopenharmony_ci if (clone_flags & CLONE_THREAD) 15938c2ecf20Sopenharmony_ci return 0; 15948c2ecf20Sopenharmony_ci 15958c2ecf20Sopenharmony_ci sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL); 15968c2ecf20Sopenharmony_ci tsk->signal = sig; 15978c2ecf20Sopenharmony_ci if (!sig) 15988c2ecf20Sopenharmony_ci return -ENOMEM; 15998c2ecf20Sopenharmony_ci 16008c2ecf20Sopenharmony_ci sig->nr_threads = 1; 16018c2ecf20Sopenharmony_ci atomic_set(&sig->live, 1); 16028c2ecf20Sopenharmony_ci refcount_set(&sig->sigcnt, 1); 16038c2ecf20Sopenharmony_ci 16048c2ecf20Sopenharmony_ci /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */ 16058c2ecf20Sopenharmony_ci sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); 16068c2ecf20Sopenharmony_ci tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head); 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_ci init_waitqueue_head(&sig->wait_chldexit); 16098c2ecf20Sopenharmony_ci sig->curr_target = tsk; 16108c2ecf20Sopenharmony_ci init_sigpending(&sig->shared_pending); 16118c2ecf20Sopenharmony_ci INIT_HLIST_HEAD(&sig->multiprocess); 16128c2ecf20Sopenharmony_ci seqlock_init(&sig->stats_lock); 16138c2ecf20Sopenharmony_ci prev_cputime_init(&sig->prev_cputime); 16148c2ecf20Sopenharmony_ci 16158c2ecf20Sopenharmony_ci#ifdef CONFIG_POSIX_TIMERS 16168c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&sig->posix_timers); 16178c2ecf20Sopenharmony_ci hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 16188c2ecf20Sopenharmony_ci sig->real_timer.function = it_real_fn; 16198c2ecf20Sopenharmony_ci#endif 16208c2ecf20Sopenharmony_ci 16218c2ecf20Sopenharmony_ci task_lock(current->group_leader); 16228c2ecf20Sopenharmony_ci memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 16238c2ecf20Sopenharmony_ci task_unlock(current->group_leader); 16248c2ecf20Sopenharmony_ci 16258c2ecf20Sopenharmony_ci posix_cpu_timers_init_group(sig); 16268c2ecf20Sopenharmony_ci 16278c2ecf20Sopenharmony_ci tty_audit_fork(sig); 16288c2ecf20Sopenharmony_ci sched_autogroup_fork(sig); 16298c2ecf20Sopenharmony_ci 16308c2ecf20Sopenharmony_ci sig->oom_score_adj = current->signal->oom_score_adj; 16318c2ecf20Sopenharmony_ci sig->oom_score_adj_min = current->signal->oom_score_adj_min; 16328c2ecf20Sopenharmony_ci 16338c2ecf20Sopenharmony_ci mutex_init(&sig->cred_guard_mutex); 16348c2ecf20Sopenharmony_ci init_rwsem(&sig->exec_update_lock); 16358c2ecf20Sopenharmony_ci 16368c2ecf20Sopenharmony_ci return 0; 16378c2ecf20Sopenharmony_ci} 16388c2ecf20Sopenharmony_ci 16398c2ecf20Sopenharmony_cistatic void copy_seccomp(struct task_struct *p) 16408c2ecf20Sopenharmony_ci{ 16418c2ecf20Sopenharmony_ci#ifdef CONFIG_SECCOMP 16428c2ecf20Sopenharmony_ci /* 16438c2ecf20Sopenharmony_ci * Must be called with sighand->lock held, which is common to 16448c2ecf20Sopenharmony_ci * all threads in the group. Holding cred_guard_mutex is not 16458c2ecf20Sopenharmony_ci * needed because this new task is not yet running and cannot 16468c2ecf20Sopenharmony_ci * be racing exec. 16478c2ecf20Sopenharmony_ci */ 16488c2ecf20Sopenharmony_ci assert_spin_locked(¤t->sighand->siglock); 16498c2ecf20Sopenharmony_ci 16508c2ecf20Sopenharmony_ci /* Ref-count the new filter user, and assign it. */ 16518c2ecf20Sopenharmony_ci get_seccomp_filter(current); 16528c2ecf20Sopenharmony_ci p->seccomp = current->seccomp; 16538c2ecf20Sopenharmony_ci 16548c2ecf20Sopenharmony_ci /* 16558c2ecf20Sopenharmony_ci * Explicitly enable no_new_privs here in case it got set 16568c2ecf20Sopenharmony_ci * between the task_struct being duplicated and holding the 16578c2ecf20Sopenharmony_ci * sighand lock. The seccomp state and nnp must be in sync. 16588c2ecf20Sopenharmony_ci */ 16598c2ecf20Sopenharmony_ci if (task_no_new_privs(current)) 16608c2ecf20Sopenharmony_ci task_set_no_new_privs(p); 16618c2ecf20Sopenharmony_ci 16628c2ecf20Sopenharmony_ci /* 16638c2ecf20Sopenharmony_ci * If the parent gained a seccomp mode after copying thread 16648c2ecf20Sopenharmony_ci * flags and between before we held the sighand lock, we have 16658c2ecf20Sopenharmony_ci * to manually enable the seccomp thread flag here. 16668c2ecf20Sopenharmony_ci */ 16678c2ecf20Sopenharmony_ci if (p->seccomp.mode != SECCOMP_MODE_DISABLED) 16688c2ecf20Sopenharmony_ci set_tsk_thread_flag(p, TIF_SECCOMP); 16698c2ecf20Sopenharmony_ci#endif 16708c2ecf20Sopenharmony_ci} 16718c2ecf20Sopenharmony_ci 16728c2ecf20Sopenharmony_ciSYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) 16738c2ecf20Sopenharmony_ci{ 16748c2ecf20Sopenharmony_ci current->clear_child_tid = tidptr; 16758c2ecf20Sopenharmony_ci 16768c2ecf20Sopenharmony_ci return task_pid_vnr(current); 16778c2ecf20Sopenharmony_ci} 16788c2ecf20Sopenharmony_ci 16798c2ecf20Sopenharmony_cistatic void rt_mutex_init_task(struct task_struct *p) 16808c2ecf20Sopenharmony_ci{ 16818c2ecf20Sopenharmony_ci raw_spin_lock_init(&p->pi_lock); 16828c2ecf20Sopenharmony_ci#ifdef CONFIG_RT_MUTEXES 16838c2ecf20Sopenharmony_ci p->pi_waiters = RB_ROOT_CACHED; 16848c2ecf20Sopenharmony_ci p->pi_top_task = NULL; 16858c2ecf20Sopenharmony_ci p->pi_blocked_on = NULL; 16868c2ecf20Sopenharmony_ci#endif 16878c2ecf20Sopenharmony_ci} 16888c2ecf20Sopenharmony_ci 16898c2ecf20Sopenharmony_cistatic inline void init_task_pid_links(struct task_struct *task) 16908c2ecf20Sopenharmony_ci{ 16918c2ecf20Sopenharmony_ci enum pid_type type; 16928c2ecf20Sopenharmony_ci 16938c2ecf20Sopenharmony_ci for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { 16948c2ecf20Sopenharmony_ci INIT_HLIST_NODE(&task->pid_links[type]); 16958c2ecf20Sopenharmony_ci } 16968c2ecf20Sopenharmony_ci} 16978c2ecf20Sopenharmony_ci 16988c2ecf20Sopenharmony_cistatic inline void 16998c2ecf20Sopenharmony_ciinit_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) 17008c2ecf20Sopenharmony_ci{ 17018c2ecf20Sopenharmony_ci if (type == PIDTYPE_PID) 17028c2ecf20Sopenharmony_ci task->thread_pid = pid; 17038c2ecf20Sopenharmony_ci else 17048c2ecf20Sopenharmony_ci task->signal->pids[type] = pid; 17058c2ecf20Sopenharmony_ci} 17068c2ecf20Sopenharmony_ci 17078c2ecf20Sopenharmony_cistatic inline void rcu_copy_process(struct task_struct *p) 17088c2ecf20Sopenharmony_ci{ 17098c2ecf20Sopenharmony_ci#ifdef CONFIG_PREEMPT_RCU 17108c2ecf20Sopenharmony_ci p->rcu_read_lock_nesting = 0; 17118c2ecf20Sopenharmony_ci p->rcu_read_unlock_special.s = 0; 17128c2ecf20Sopenharmony_ci p->rcu_blocked_node = NULL; 17138c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&p->rcu_node_entry); 17148c2ecf20Sopenharmony_ci#endif /* #ifdef CONFIG_PREEMPT_RCU */ 17158c2ecf20Sopenharmony_ci#ifdef CONFIG_TASKS_RCU 17168c2ecf20Sopenharmony_ci p->rcu_tasks_holdout = false; 17178c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); 17188c2ecf20Sopenharmony_ci p->rcu_tasks_idle_cpu = -1; 17198c2ecf20Sopenharmony_ci#endif /* #ifdef CONFIG_TASKS_RCU */ 17208c2ecf20Sopenharmony_ci#ifdef CONFIG_TASKS_TRACE_RCU 17218c2ecf20Sopenharmony_ci p->trc_reader_nesting = 0; 17228c2ecf20Sopenharmony_ci p->trc_reader_special.s = 0; 17238c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&p->trc_holdout_list); 17248c2ecf20Sopenharmony_ci#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ 17258c2ecf20Sopenharmony_ci} 17268c2ecf20Sopenharmony_ci 17278c2ecf20Sopenharmony_cistruct pid *pidfd_pid(const struct file *file) 17288c2ecf20Sopenharmony_ci{ 17298c2ecf20Sopenharmony_ci if (file->f_op == &pidfd_fops) 17308c2ecf20Sopenharmony_ci return file->private_data; 17318c2ecf20Sopenharmony_ci 17328c2ecf20Sopenharmony_ci return ERR_PTR(-EBADF); 17338c2ecf20Sopenharmony_ci} 17348c2ecf20Sopenharmony_ci 17358c2ecf20Sopenharmony_cistatic int pidfd_release(struct inode *inode, struct file *file) 17368c2ecf20Sopenharmony_ci{ 17378c2ecf20Sopenharmony_ci struct pid *pid = file->private_data; 17388c2ecf20Sopenharmony_ci 17398c2ecf20Sopenharmony_ci file->private_data = NULL; 17408c2ecf20Sopenharmony_ci put_pid(pid); 17418c2ecf20Sopenharmony_ci return 0; 17428c2ecf20Sopenharmony_ci} 17438c2ecf20Sopenharmony_ci 17448c2ecf20Sopenharmony_ci#ifdef CONFIG_PROC_FS 17458c2ecf20Sopenharmony_ci/** 17468c2ecf20Sopenharmony_ci * pidfd_show_fdinfo - print information about a pidfd 17478c2ecf20Sopenharmony_ci * @m: proc fdinfo file 17488c2ecf20Sopenharmony_ci * @f: file referencing a pidfd 17498c2ecf20Sopenharmony_ci * 17508c2ecf20Sopenharmony_ci * Pid: 17518c2ecf20Sopenharmony_ci * This function will print the pid that a given pidfd refers to in the 17528c2ecf20Sopenharmony_ci * pid namespace of the procfs instance. 17538c2ecf20Sopenharmony_ci * If the pid namespace of the process is not a descendant of the pid 17548c2ecf20Sopenharmony_ci * namespace of the procfs instance 0 will be shown as its pid. This is 17558c2ecf20Sopenharmony_ci * similar to calling getppid() on a process whose parent is outside of 17568c2ecf20Sopenharmony_ci * its pid namespace. 17578c2ecf20Sopenharmony_ci * 17588c2ecf20Sopenharmony_ci * NSpid: 17598c2ecf20Sopenharmony_ci * If pid namespaces are supported then this function will also print 17608c2ecf20Sopenharmony_ci * the pid of a given pidfd refers to for all descendant pid namespaces 17618c2ecf20Sopenharmony_ci * starting from the current pid namespace of the instance, i.e. the 17628c2ecf20Sopenharmony_ci * Pid field and the first entry in the NSpid field will be identical. 17638c2ecf20Sopenharmony_ci * If the pid namespace of the process is not a descendant of the pid 17648c2ecf20Sopenharmony_ci * namespace of the procfs instance 0 will be shown as its first NSpid 17658c2ecf20Sopenharmony_ci * entry and no others will be shown. 17668c2ecf20Sopenharmony_ci * Note that this differs from the Pid and NSpid fields in 17678c2ecf20Sopenharmony_ci * /proc/<pid>/status where Pid and NSpid are always shown relative to 17688c2ecf20Sopenharmony_ci * the pid namespace of the procfs instance. The difference becomes 17698c2ecf20Sopenharmony_ci * obvious when sending around a pidfd between pid namespaces from a 17708c2ecf20Sopenharmony_ci * different branch of the tree, i.e. where no ancestoral relation is 17718c2ecf20Sopenharmony_ci * present between the pid namespaces: 17728c2ecf20Sopenharmony_ci * - create two new pid namespaces ns1 and ns2 in the initial pid 17738c2ecf20Sopenharmony_ci * namespace (also take care to create new mount namespaces in the 17748c2ecf20Sopenharmony_ci * new pid namespace and mount procfs) 17758c2ecf20Sopenharmony_ci * - create a process with a pidfd in ns1 17768c2ecf20Sopenharmony_ci * - send pidfd from ns1 to ns2 17778c2ecf20Sopenharmony_ci * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid 17788c2ecf20Sopenharmony_ci * have exactly one entry, which is 0 17798c2ecf20Sopenharmony_ci */ 17808c2ecf20Sopenharmony_cistatic void pidfd_show_fdinfo(struct seq_file *m, struct file *f) 17818c2ecf20Sopenharmony_ci{ 17828c2ecf20Sopenharmony_ci struct pid *pid = f->private_data; 17838c2ecf20Sopenharmony_ci struct pid_namespace *ns; 17848c2ecf20Sopenharmony_ci pid_t nr = -1; 17858c2ecf20Sopenharmony_ci 17868c2ecf20Sopenharmony_ci if (likely(pid_has_task(pid, PIDTYPE_PID))) { 17878c2ecf20Sopenharmony_ci ns = proc_pid_ns(file_inode(m->file)->i_sb); 17888c2ecf20Sopenharmony_ci nr = pid_nr_ns(pid, ns); 17898c2ecf20Sopenharmony_ci } 17908c2ecf20Sopenharmony_ci 17918c2ecf20Sopenharmony_ci seq_put_decimal_ll(m, "Pid:\t", nr); 17928c2ecf20Sopenharmony_ci 17938c2ecf20Sopenharmony_ci#ifdef CONFIG_PID_NS 17948c2ecf20Sopenharmony_ci seq_put_decimal_ll(m, "\nNSpid:\t", nr); 17958c2ecf20Sopenharmony_ci if (nr > 0) { 17968c2ecf20Sopenharmony_ci int i; 17978c2ecf20Sopenharmony_ci 17988c2ecf20Sopenharmony_ci /* If nr is non-zero it means that 'pid' is valid and that 17998c2ecf20Sopenharmony_ci * ns, i.e. the pid namespace associated with the procfs 18008c2ecf20Sopenharmony_ci * instance, is in the pid namespace hierarchy of pid. 18018c2ecf20Sopenharmony_ci * Start at one below the already printed level. 18028c2ecf20Sopenharmony_ci */ 18038c2ecf20Sopenharmony_ci for (i = ns->level + 1; i <= pid->level; i++) 18048c2ecf20Sopenharmony_ci seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); 18058c2ecf20Sopenharmony_ci } 18068c2ecf20Sopenharmony_ci#endif 18078c2ecf20Sopenharmony_ci seq_putc(m, '\n'); 18088c2ecf20Sopenharmony_ci} 18098c2ecf20Sopenharmony_ci#endif 18108c2ecf20Sopenharmony_ci 18118c2ecf20Sopenharmony_ci/* 18128c2ecf20Sopenharmony_ci * Poll support for process exit notification. 18138c2ecf20Sopenharmony_ci */ 18148c2ecf20Sopenharmony_cistatic __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) 18158c2ecf20Sopenharmony_ci{ 18168c2ecf20Sopenharmony_ci struct pid *pid = file->private_data; 18178c2ecf20Sopenharmony_ci __poll_t poll_flags = 0; 18188c2ecf20Sopenharmony_ci 18198c2ecf20Sopenharmony_ci poll_wait(file, &pid->wait_pidfd, pts); 18208c2ecf20Sopenharmony_ci 18218c2ecf20Sopenharmony_ci /* 18228c2ecf20Sopenharmony_ci * Inform pollers only when the whole thread group exits. 18238c2ecf20Sopenharmony_ci * If the thread group leader exits before all other threads in the 18248c2ecf20Sopenharmony_ci * group, then poll(2) should block, similar to the wait(2) family. 18258c2ecf20Sopenharmony_ci */ 18268c2ecf20Sopenharmony_ci if (thread_group_exited(pid)) 18278c2ecf20Sopenharmony_ci poll_flags = EPOLLIN | EPOLLRDNORM; 18288c2ecf20Sopenharmony_ci 18298c2ecf20Sopenharmony_ci return poll_flags; 18308c2ecf20Sopenharmony_ci} 18318c2ecf20Sopenharmony_ci 18328c2ecf20Sopenharmony_ciconst struct file_operations pidfd_fops = { 18338c2ecf20Sopenharmony_ci .release = pidfd_release, 18348c2ecf20Sopenharmony_ci .poll = pidfd_poll, 18358c2ecf20Sopenharmony_ci#ifdef CONFIG_PROC_FS 18368c2ecf20Sopenharmony_ci .show_fdinfo = pidfd_show_fdinfo, 18378c2ecf20Sopenharmony_ci#endif 18388c2ecf20Sopenharmony_ci}; 18398c2ecf20Sopenharmony_ci 18408c2ecf20Sopenharmony_cistatic void __delayed_free_task(struct rcu_head *rhp) 18418c2ecf20Sopenharmony_ci{ 18428c2ecf20Sopenharmony_ci struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); 18438c2ecf20Sopenharmony_ci 18448c2ecf20Sopenharmony_ci free_task(tsk); 18458c2ecf20Sopenharmony_ci} 18468c2ecf20Sopenharmony_ci 18478c2ecf20Sopenharmony_cistatic __always_inline void delayed_free_task(struct task_struct *tsk) 18488c2ecf20Sopenharmony_ci{ 18498c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_MEMCG)) 18508c2ecf20Sopenharmony_ci call_rcu(&tsk->rcu, __delayed_free_task); 18518c2ecf20Sopenharmony_ci else 18528c2ecf20Sopenharmony_ci free_task(tsk); 18538c2ecf20Sopenharmony_ci} 18548c2ecf20Sopenharmony_ci 18558c2ecf20Sopenharmony_cistatic void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk) 18568c2ecf20Sopenharmony_ci{ 18578c2ecf20Sopenharmony_ci /* Skip if kernel thread */ 18588c2ecf20Sopenharmony_ci if (!tsk->mm) 18598c2ecf20Sopenharmony_ci return; 18608c2ecf20Sopenharmony_ci 18618c2ecf20Sopenharmony_ci /* Skip if spawning a thread or using vfork */ 18628c2ecf20Sopenharmony_ci if ((clone_flags & (CLONE_VM | CLONE_THREAD | CLONE_VFORK)) != CLONE_VM) 18638c2ecf20Sopenharmony_ci return; 18648c2ecf20Sopenharmony_ci 18658c2ecf20Sopenharmony_ci /* We need to synchronize with __set_oom_adj */ 18668c2ecf20Sopenharmony_ci mutex_lock(&oom_adj_mutex); 18678c2ecf20Sopenharmony_ci set_bit(MMF_MULTIPROCESS, &tsk->mm->flags); 18688c2ecf20Sopenharmony_ci /* Update the values in case they were changed after copy_signal */ 18698c2ecf20Sopenharmony_ci tsk->signal->oom_score_adj = current->signal->oom_score_adj; 18708c2ecf20Sopenharmony_ci tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min; 18718c2ecf20Sopenharmony_ci mutex_unlock(&oom_adj_mutex); 18728c2ecf20Sopenharmony_ci} 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_ci/* 18758c2ecf20Sopenharmony_ci * This creates a new process as a copy of the old one, 18768c2ecf20Sopenharmony_ci * but does not actually start it yet. 18778c2ecf20Sopenharmony_ci * 18788c2ecf20Sopenharmony_ci * It copies the registers, and all the appropriate 18798c2ecf20Sopenharmony_ci * parts of the process environment (as per the clone 18808c2ecf20Sopenharmony_ci * flags). The actual kick-off is left to the caller. 18818c2ecf20Sopenharmony_ci */ 18828c2ecf20Sopenharmony_cistatic __latent_entropy struct task_struct *copy_process( 18838c2ecf20Sopenharmony_ci struct pid *pid, 18848c2ecf20Sopenharmony_ci int trace, 18858c2ecf20Sopenharmony_ci int node, 18868c2ecf20Sopenharmony_ci struct kernel_clone_args *args) 18878c2ecf20Sopenharmony_ci{ 18888c2ecf20Sopenharmony_ci int pidfd = -1, retval; 18898c2ecf20Sopenharmony_ci struct task_struct *p; 18908c2ecf20Sopenharmony_ci struct multiprocess_signals delayed; 18918c2ecf20Sopenharmony_ci struct file *pidfile = NULL; 18928c2ecf20Sopenharmony_ci u64 clone_flags = args->flags; 18938c2ecf20Sopenharmony_ci struct nsproxy *nsp = current->nsproxy; 18948c2ecf20Sopenharmony_ci 18958c2ecf20Sopenharmony_ci /* 18968c2ecf20Sopenharmony_ci * Don't allow sharing the root directory with processes in a different 18978c2ecf20Sopenharmony_ci * namespace 18988c2ecf20Sopenharmony_ci */ 18998c2ecf20Sopenharmony_ci if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) 19008c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 19018c2ecf20Sopenharmony_ci 19028c2ecf20Sopenharmony_ci if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) 19038c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 19048c2ecf20Sopenharmony_ci 19058c2ecf20Sopenharmony_ci /* 19068c2ecf20Sopenharmony_ci * Thread groups must share signals as well, and detached threads 19078c2ecf20Sopenharmony_ci * can only be started up within the thread group. 19088c2ecf20Sopenharmony_ci */ 19098c2ecf20Sopenharmony_ci if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) 19108c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 19118c2ecf20Sopenharmony_ci 19128c2ecf20Sopenharmony_ci /* 19138c2ecf20Sopenharmony_ci * Shared signal handlers imply shared VM. By way of the above, 19148c2ecf20Sopenharmony_ci * thread groups also imply shared VM. Blocking this case allows 19158c2ecf20Sopenharmony_ci * for various simplifications in other code. 19168c2ecf20Sopenharmony_ci */ 19178c2ecf20Sopenharmony_ci if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) 19188c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 19198c2ecf20Sopenharmony_ci 19208c2ecf20Sopenharmony_ci /* 19218c2ecf20Sopenharmony_ci * Siblings of global init remain as zombies on exit since they are 19228c2ecf20Sopenharmony_ci * not reaped by their parent (swapper). To solve this and to avoid 19238c2ecf20Sopenharmony_ci * multi-rooted process trees, prevent global and container-inits 19248c2ecf20Sopenharmony_ci * from creating siblings. 19258c2ecf20Sopenharmony_ci */ 19268c2ecf20Sopenharmony_ci if ((clone_flags & CLONE_PARENT) && 19278c2ecf20Sopenharmony_ci current->signal->flags & SIGNAL_UNKILLABLE) 19288c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 19298c2ecf20Sopenharmony_ci 19308c2ecf20Sopenharmony_ci /* 19318c2ecf20Sopenharmony_ci * If the new process will be in a different pid or user namespace 19328c2ecf20Sopenharmony_ci * do not allow it to share a thread group with the forking task. 19338c2ecf20Sopenharmony_ci */ 19348c2ecf20Sopenharmony_ci if (clone_flags & CLONE_THREAD) { 19358c2ecf20Sopenharmony_ci if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || 19368c2ecf20Sopenharmony_ci (task_active_pid_ns(current) != nsp->pid_ns_for_children)) 19378c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 19388c2ecf20Sopenharmony_ci } 19398c2ecf20Sopenharmony_ci 19408c2ecf20Sopenharmony_ci /* 19418c2ecf20Sopenharmony_ci * If the new process will be in a different time namespace 19428c2ecf20Sopenharmony_ci * do not allow it to share VM or a thread group with the forking task. 19438c2ecf20Sopenharmony_ci */ 19448c2ecf20Sopenharmony_ci if (clone_flags & (CLONE_THREAD | CLONE_VM)) { 19458c2ecf20Sopenharmony_ci if (nsp->time_ns != nsp->time_ns_for_children) 19468c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 19478c2ecf20Sopenharmony_ci } 19488c2ecf20Sopenharmony_ci 19498c2ecf20Sopenharmony_ci if (clone_flags & CLONE_PIDFD) { 19508c2ecf20Sopenharmony_ci /* 19518c2ecf20Sopenharmony_ci * - CLONE_DETACHED is blocked so that we can potentially 19528c2ecf20Sopenharmony_ci * reuse it later for CLONE_PIDFD. 19538c2ecf20Sopenharmony_ci * - CLONE_THREAD is blocked until someone really needs it. 19548c2ecf20Sopenharmony_ci */ 19558c2ecf20Sopenharmony_ci if (clone_flags & (CLONE_DETACHED | CLONE_THREAD)) 19568c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 19578c2ecf20Sopenharmony_ci } 19588c2ecf20Sopenharmony_ci 19598c2ecf20Sopenharmony_ci /* 19608c2ecf20Sopenharmony_ci * Force any signals received before this point to be delivered 19618c2ecf20Sopenharmony_ci * before the fork happens. Collect up signals sent to multiple 19628c2ecf20Sopenharmony_ci * processes that happen during the fork and delay them so that 19638c2ecf20Sopenharmony_ci * they appear to happen after the fork. 19648c2ecf20Sopenharmony_ci */ 19658c2ecf20Sopenharmony_ci sigemptyset(&delayed.signal); 19668c2ecf20Sopenharmony_ci INIT_HLIST_NODE(&delayed.node); 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_ci spin_lock_irq(¤t->sighand->siglock); 19698c2ecf20Sopenharmony_ci if (!(clone_flags & CLONE_THREAD)) 19708c2ecf20Sopenharmony_ci hlist_add_head(&delayed.node, ¤t->signal->multiprocess); 19718c2ecf20Sopenharmony_ci recalc_sigpending(); 19728c2ecf20Sopenharmony_ci spin_unlock_irq(¤t->sighand->siglock); 19738c2ecf20Sopenharmony_ci retval = -ERESTARTNOINTR; 19748c2ecf20Sopenharmony_ci if (task_sigpending(current)) 19758c2ecf20Sopenharmony_ci goto fork_out; 19768c2ecf20Sopenharmony_ci 19778c2ecf20Sopenharmony_ci retval = -ENOMEM; 19788c2ecf20Sopenharmony_ci p = dup_task_struct(current, node); 19798c2ecf20Sopenharmony_ci if (!p) 19808c2ecf20Sopenharmony_ci goto fork_out; 19818c2ecf20Sopenharmony_ci if (args->io_thread) { 19828c2ecf20Sopenharmony_ci /* 19838c2ecf20Sopenharmony_ci * Mark us an IO worker, and block any signal that isn't 19848c2ecf20Sopenharmony_ci * fatal or STOP 19858c2ecf20Sopenharmony_ci */ 19868c2ecf20Sopenharmony_ci p->flags |= PF_IO_WORKER; 19878c2ecf20Sopenharmony_ci siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP)); 19888c2ecf20Sopenharmony_ci } 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci /* 19918c2ecf20Sopenharmony_ci * This _must_ happen before we call free_task(), i.e. before we jump 19928c2ecf20Sopenharmony_ci * to any of the bad_fork_* labels. This is to avoid freeing 19938c2ecf20Sopenharmony_ci * p->set_child_tid which is (ab)used as a kthread's data pointer for 19948c2ecf20Sopenharmony_ci * kernel threads (PF_KTHREAD). 19958c2ecf20Sopenharmony_ci */ 19968c2ecf20Sopenharmony_ci p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL; 19978c2ecf20Sopenharmony_ci /* 19988c2ecf20Sopenharmony_ci * Clear TID on mm_release()? 19998c2ecf20Sopenharmony_ci */ 20008c2ecf20Sopenharmony_ci p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL; 20018c2ecf20Sopenharmony_ci 20028c2ecf20Sopenharmony_ci ftrace_graph_init_task(p); 20038c2ecf20Sopenharmony_ci 20048c2ecf20Sopenharmony_ci rt_mutex_init_task(p); 20058c2ecf20Sopenharmony_ci 20068c2ecf20Sopenharmony_ci lockdep_assert_irqs_enabled(); 20078c2ecf20Sopenharmony_ci#ifdef CONFIG_PROVE_LOCKING 20088c2ecf20Sopenharmony_ci DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); 20098c2ecf20Sopenharmony_ci#endif 20108c2ecf20Sopenharmony_ci retval = -EAGAIN; 20118c2ecf20Sopenharmony_ci if (atomic_read(&p->real_cred->user->processes) >= 20128c2ecf20Sopenharmony_ci task_rlimit(p, RLIMIT_NPROC)) { 20138c2ecf20Sopenharmony_ci if (p->real_cred->user != INIT_USER && 20148c2ecf20Sopenharmony_ci !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) 20158c2ecf20Sopenharmony_ci goto bad_fork_free; 20168c2ecf20Sopenharmony_ci } 20178c2ecf20Sopenharmony_ci current->flags &= ~PF_NPROC_EXCEEDED; 20188c2ecf20Sopenharmony_ci 20198c2ecf20Sopenharmony_ci retval = copy_creds(p, clone_flags); 20208c2ecf20Sopenharmony_ci if (retval < 0) 20218c2ecf20Sopenharmony_ci goto bad_fork_free; 20228c2ecf20Sopenharmony_ci 20238c2ecf20Sopenharmony_ci /* 20248c2ecf20Sopenharmony_ci * If multiple threads are within copy_process(), then this check 20258c2ecf20Sopenharmony_ci * triggers too late. This doesn't hurt, the check is only there 20268c2ecf20Sopenharmony_ci * to stop root fork bombs. 20278c2ecf20Sopenharmony_ci */ 20288c2ecf20Sopenharmony_ci retval = -EAGAIN; 20298c2ecf20Sopenharmony_ci if (data_race(nr_threads >= max_threads)) 20308c2ecf20Sopenharmony_ci goto bad_fork_cleanup_count; 20318c2ecf20Sopenharmony_ci 20328c2ecf20Sopenharmony_ci delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ 20338c2ecf20Sopenharmony_ci#ifdef CONFIG_RECLAIM_ACCT 20348c2ecf20Sopenharmony_ci reclaimacct_tsk_init(p); 20358c2ecf20Sopenharmony_ci#endif 20368c2ecf20Sopenharmony_ci p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE); 20378c2ecf20Sopenharmony_ci p->flags |= PF_FORKNOEXEC; 20388c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&p->children); 20398c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&p->sibling); 20408c2ecf20Sopenharmony_ci rcu_copy_process(p); 20418c2ecf20Sopenharmony_ci p->vfork_done = NULL; 20428c2ecf20Sopenharmony_ci spin_lock_init(&p->alloc_lock); 20438c2ecf20Sopenharmony_ci 20448c2ecf20Sopenharmony_ci init_sigpending(&p->pending); 20458c2ecf20Sopenharmony_ci 20468c2ecf20Sopenharmony_ci p->utime = p->stime = p->gtime = 0; 20478c2ecf20Sopenharmony_ci#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME 20488c2ecf20Sopenharmony_ci p->utimescaled = p->stimescaled = 0; 20498c2ecf20Sopenharmony_ci#endif 20508c2ecf20Sopenharmony_ci prev_cputime_init(&p->prev_cputime); 20518c2ecf20Sopenharmony_ci 20528c2ecf20Sopenharmony_ci#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 20538c2ecf20Sopenharmony_ci seqcount_init(&p->vtime.seqcount); 20548c2ecf20Sopenharmony_ci p->vtime.starttime = 0; 20558c2ecf20Sopenharmony_ci p->vtime.state = VTIME_INACTIVE; 20568c2ecf20Sopenharmony_ci#endif 20578c2ecf20Sopenharmony_ci 20588c2ecf20Sopenharmony_ci#ifdef CONFIG_IO_URING 20598c2ecf20Sopenharmony_ci p->io_uring = NULL; 20608c2ecf20Sopenharmony_ci#endif 20618c2ecf20Sopenharmony_ci 20628c2ecf20Sopenharmony_ci#if defined(SPLIT_RSS_COUNTING) 20638c2ecf20Sopenharmony_ci memset(&p->rss_stat, 0, sizeof(p->rss_stat)); 20648c2ecf20Sopenharmony_ci#endif 20658c2ecf20Sopenharmony_ci 20668c2ecf20Sopenharmony_ci p->default_timer_slack_ns = current->timer_slack_ns; 20678c2ecf20Sopenharmony_ci 20688c2ecf20Sopenharmony_ci#ifdef CONFIG_PSI 20698c2ecf20Sopenharmony_ci p->psi_flags = 0; 20708c2ecf20Sopenharmony_ci#endif 20718c2ecf20Sopenharmony_ci 20728c2ecf20Sopenharmony_ci task_io_accounting_init(&p->ioac); 20738c2ecf20Sopenharmony_ci acct_clear_integrals(p); 20748c2ecf20Sopenharmony_ci 20758c2ecf20Sopenharmony_ci posix_cputimers_init(&p->posix_cputimers); 20768c2ecf20Sopenharmony_ci 20778c2ecf20Sopenharmony_ci p->io_context = NULL; 20788c2ecf20Sopenharmony_ci audit_set_context(p, NULL); 20798c2ecf20Sopenharmony_ci cgroup_fork(p); 20808c2ecf20Sopenharmony_ci#ifdef CONFIG_NUMA 20818c2ecf20Sopenharmony_ci p->mempolicy = mpol_dup(p->mempolicy); 20828c2ecf20Sopenharmony_ci if (IS_ERR(p->mempolicy)) { 20838c2ecf20Sopenharmony_ci retval = PTR_ERR(p->mempolicy); 20848c2ecf20Sopenharmony_ci p->mempolicy = NULL; 20858c2ecf20Sopenharmony_ci goto bad_fork_cleanup_threadgroup_lock; 20868c2ecf20Sopenharmony_ci } 20878c2ecf20Sopenharmony_ci#endif 20888c2ecf20Sopenharmony_ci#ifdef CONFIG_CPUSETS 20898c2ecf20Sopenharmony_ci p->cpuset_mem_spread_rotor = NUMA_NO_NODE; 20908c2ecf20Sopenharmony_ci p->cpuset_slab_spread_rotor = NUMA_NO_NODE; 20918c2ecf20Sopenharmony_ci seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock); 20928c2ecf20Sopenharmony_ci#endif 20938c2ecf20Sopenharmony_ci#ifdef CONFIG_TRACE_IRQFLAGS 20948c2ecf20Sopenharmony_ci memset(&p->irqtrace, 0, sizeof(p->irqtrace)); 20958c2ecf20Sopenharmony_ci p->irqtrace.hardirq_disable_ip = _THIS_IP_; 20968c2ecf20Sopenharmony_ci p->irqtrace.softirq_enable_ip = _THIS_IP_; 20978c2ecf20Sopenharmony_ci p->softirqs_enabled = 1; 20988c2ecf20Sopenharmony_ci p->softirq_context = 0; 20998c2ecf20Sopenharmony_ci#endif 21008c2ecf20Sopenharmony_ci 21018c2ecf20Sopenharmony_ci p->pagefault_disabled = 0; 21028c2ecf20Sopenharmony_ci 21038c2ecf20Sopenharmony_ci#ifdef CONFIG_LOCKDEP 21048c2ecf20Sopenharmony_ci lockdep_init_task(p); 21058c2ecf20Sopenharmony_ci#endif 21068c2ecf20Sopenharmony_ci 21078c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_MUTEXES 21088c2ecf20Sopenharmony_ci p->blocked_on = NULL; /* not blocked yet */ 21098c2ecf20Sopenharmony_ci#endif 21108c2ecf20Sopenharmony_ci#ifdef CONFIG_BCACHE 21118c2ecf20Sopenharmony_ci p->sequential_io = 0; 21128c2ecf20Sopenharmony_ci p->sequential_io_avg = 0; 21138c2ecf20Sopenharmony_ci#endif 21148c2ecf20Sopenharmony_ci#ifdef CONFIG_BPF_SYSCALL 21158c2ecf20Sopenharmony_ci p->bpf_ctx = NULL; 21168c2ecf20Sopenharmony_ci#endif 21178c2ecf20Sopenharmony_ci 21188c2ecf20Sopenharmony_ci /* Perform scheduler related setup. Assign this task to a CPU. */ 21198c2ecf20Sopenharmony_ci retval = sched_fork(clone_flags, p); 21208c2ecf20Sopenharmony_ci if (retval) 21218c2ecf20Sopenharmony_ci goto bad_fork_cleanup_policy; 21228c2ecf20Sopenharmony_ci 21238c2ecf20Sopenharmony_ci retval = perf_event_init_task(p); 21248c2ecf20Sopenharmony_ci if (retval) 21258c2ecf20Sopenharmony_ci goto bad_fork_cleanup_policy; 21268c2ecf20Sopenharmony_ci retval = audit_alloc(p); 21278c2ecf20Sopenharmony_ci if (retval) 21288c2ecf20Sopenharmony_ci goto bad_fork_cleanup_perf; 21298c2ecf20Sopenharmony_ci /* copy all the process information */ 21308c2ecf20Sopenharmony_ci shm_init_task(p); 21318c2ecf20Sopenharmony_ci retval = security_task_alloc(p, clone_flags); 21328c2ecf20Sopenharmony_ci if (retval) 21338c2ecf20Sopenharmony_ci goto bad_fork_cleanup_audit; 21348c2ecf20Sopenharmony_ci retval = copy_semundo(clone_flags, p); 21358c2ecf20Sopenharmony_ci if (retval) 21368c2ecf20Sopenharmony_ci goto bad_fork_cleanup_security; 21378c2ecf20Sopenharmony_ci retval = copy_files(clone_flags, p); 21388c2ecf20Sopenharmony_ci if (retval) 21398c2ecf20Sopenharmony_ci goto bad_fork_cleanup_semundo; 21408c2ecf20Sopenharmony_ci retval = copy_fs(clone_flags, p); 21418c2ecf20Sopenharmony_ci if (retval) 21428c2ecf20Sopenharmony_ci goto bad_fork_cleanup_files; 21438c2ecf20Sopenharmony_ci retval = copy_sighand(clone_flags, p); 21448c2ecf20Sopenharmony_ci if (retval) 21458c2ecf20Sopenharmony_ci goto bad_fork_cleanup_fs; 21468c2ecf20Sopenharmony_ci retval = copy_signal(clone_flags, p); 21478c2ecf20Sopenharmony_ci if (retval) 21488c2ecf20Sopenharmony_ci goto bad_fork_cleanup_sighand; 21498c2ecf20Sopenharmony_ci retval = copy_mm(clone_flags, p); 21508c2ecf20Sopenharmony_ci if (retval) 21518c2ecf20Sopenharmony_ci goto bad_fork_cleanup_signal; 21528c2ecf20Sopenharmony_ci retval = copy_namespaces(clone_flags, p); 21538c2ecf20Sopenharmony_ci if (retval) 21548c2ecf20Sopenharmony_ci goto bad_fork_cleanup_mm; 21558c2ecf20Sopenharmony_ci retval = copy_io(clone_flags, p); 21568c2ecf20Sopenharmony_ci if (retval) 21578c2ecf20Sopenharmony_ci goto bad_fork_cleanup_namespaces; 21588c2ecf20Sopenharmony_ci retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls); 21598c2ecf20Sopenharmony_ci if (retval) 21608c2ecf20Sopenharmony_ci goto bad_fork_cleanup_io; 21618c2ecf20Sopenharmony_ci 21628c2ecf20Sopenharmony_ci stackleak_task_init(p); 21638c2ecf20Sopenharmony_ci 21648c2ecf20Sopenharmony_ci if (pid != &init_struct_pid) { 21658c2ecf20Sopenharmony_ci pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid, 21668c2ecf20Sopenharmony_ci args->set_tid_size); 21678c2ecf20Sopenharmony_ci if (IS_ERR(pid)) { 21688c2ecf20Sopenharmony_ci retval = PTR_ERR(pid); 21698c2ecf20Sopenharmony_ci goto bad_fork_cleanup_thread; 21708c2ecf20Sopenharmony_ci } 21718c2ecf20Sopenharmony_ci } 21728c2ecf20Sopenharmony_ci 21738c2ecf20Sopenharmony_ci /* 21748c2ecf20Sopenharmony_ci * This has to happen after we've potentially unshared the file 21758c2ecf20Sopenharmony_ci * descriptor table (so that the pidfd doesn't leak into the child 21768c2ecf20Sopenharmony_ci * if the fd table isn't shared). 21778c2ecf20Sopenharmony_ci */ 21788c2ecf20Sopenharmony_ci if (clone_flags & CLONE_PIDFD) { 21798c2ecf20Sopenharmony_ci retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC); 21808c2ecf20Sopenharmony_ci if (retval < 0) 21818c2ecf20Sopenharmony_ci goto bad_fork_free_pid; 21828c2ecf20Sopenharmony_ci 21838c2ecf20Sopenharmony_ci pidfd = retval; 21848c2ecf20Sopenharmony_ci 21858c2ecf20Sopenharmony_ci pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid, 21868c2ecf20Sopenharmony_ci O_RDWR | O_CLOEXEC); 21878c2ecf20Sopenharmony_ci if (IS_ERR(pidfile)) { 21888c2ecf20Sopenharmony_ci put_unused_fd(pidfd); 21898c2ecf20Sopenharmony_ci retval = PTR_ERR(pidfile); 21908c2ecf20Sopenharmony_ci goto bad_fork_free_pid; 21918c2ecf20Sopenharmony_ci } 21928c2ecf20Sopenharmony_ci get_pid(pid); /* held by pidfile now */ 21938c2ecf20Sopenharmony_ci 21948c2ecf20Sopenharmony_ci retval = put_user(pidfd, args->pidfd); 21958c2ecf20Sopenharmony_ci if (retval) 21968c2ecf20Sopenharmony_ci goto bad_fork_put_pidfd; 21978c2ecf20Sopenharmony_ci } 21988c2ecf20Sopenharmony_ci 21998c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK 22008c2ecf20Sopenharmony_ci p->plug = NULL; 22018c2ecf20Sopenharmony_ci#endif 22028c2ecf20Sopenharmony_ci futex_init_task(p); 22038c2ecf20Sopenharmony_ci 22048c2ecf20Sopenharmony_ci /* 22058c2ecf20Sopenharmony_ci * sigaltstack should be cleared when sharing the same VM 22068c2ecf20Sopenharmony_ci */ 22078c2ecf20Sopenharmony_ci if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) 22088c2ecf20Sopenharmony_ci sas_ss_reset(p); 22098c2ecf20Sopenharmony_ci 22108c2ecf20Sopenharmony_ci /* 22118c2ecf20Sopenharmony_ci * Syscall tracing and stepping should be turned off in the 22128c2ecf20Sopenharmony_ci * child regardless of CLONE_PTRACE. 22138c2ecf20Sopenharmony_ci */ 22148c2ecf20Sopenharmony_ci user_disable_single_step(p); 22158c2ecf20Sopenharmony_ci clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); 22168c2ecf20Sopenharmony_ci#ifdef TIF_SYSCALL_EMU 22178c2ecf20Sopenharmony_ci clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); 22188c2ecf20Sopenharmony_ci#endif 22198c2ecf20Sopenharmony_ci clear_tsk_latency_tracing(p); 22208c2ecf20Sopenharmony_ci 22218c2ecf20Sopenharmony_ci /* ok, now we should be set up.. */ 22228c2ecf20Sopenharmony_ci p->pid = pid_nr(pid); 22238c2ecf20Sopenharmony_ci if (clone_flags & CLONE_THREAD) { 22248c2ecf20Sopenharmony_ci p->group_leader = current->group_leader; 22258c2ecf20Sopenharmony_ci p->tgid = current->tgid; 22268c2ecf20Sopenharmony_ci } else { 22278c2ecf20Sopenharmony_ci p->group_leader = p; 22288c2ecf20Sopenharmony_ci p->tgid = p->pid; 22298c2ecf20Sopenharmony_ci } 22308c2ecf20Sopenharmony_ci 22318c2ecf20Sopenharmony_ci p->nr_dirtied = 0; 22328c2ecf20Sopenharmony_ci p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); 22338c2ecf20Sopenharmony_ci p->dirty_paused_when = 0; 22348c2ecf20Sopenharmony_ci 22358c2ecf20Sopenharmony_ci p->pdeath_signal = 0; 22368c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&p->thread_group); 22378c2ecf20Sopenharmony_ci p->task_works = NULL; 22388c2ecf20Sopenharmony_ci clear_posix_cputimers_work(p); 22398c2ecf20Sopenharmony_ci 22408c2ecf20Sopenharmony_ci /* 22418c2ecf20Sopenharmony_ci * Ensure that the cgroup subsystem policies allow the new process to be 22428c2ecf20Sopenharmony_ci * forked. It should be noted that the new process's css_set can be changed 22438c2ecf20Sopenharmony_ci * between here and cgroup_post_fork() if an organisation operation is in 22448c2ecf20Sopenharmony_ci * progress. 22458c2ecf20Sopenharmony_ci */ 22468c2ecf20Sopenharmony_ci retval = cgroup_can_fork(p, args); 22478c2ecf20Sopenharmony_ci if (retval) 22488c2ecf20Sopenharmony_ci goto bad_fork_put_pidfd; 22498c2ecf20Sopenharmony_ci 22508c2ecf20Sopenharmony_ci /* 22518c2ecf20Sopenharmony_ci * From this point on we must avoid any synchronous user-space 22528c2ecf20Sopenharmony_ci * communication until we take the tasklist-lock. In particular, we do 22538c2ecf20Sopenharmony_ci * not want user-space to be able to predict the process start-time by 22548c2ecf20Sopenharmony_ci * stalling fork(2) after we recorded the start_time but before it is 22558c2ecf20Sopenharmony_ci * visible to the system. 22568c2ecf20Sopenharmony_ci */ 22578c2ecf20Sopenharmony_ci 22588c2ecf20Sopenharmony_ci p->start_time = ktime_get_ns(); 22598c2ecf20Sopenharmony_ci p->start_boottime = ktime_get_boottime_ns(); 22608c2ecf20Sopenharmony_ci 22618c2ecf20Sopenharmony_ci /* 22628c2ecf20Sopenharmony_ci * Make it visible to the rest of the system, but dont wake it up yet. 22638c2ecf20Sopenharmony_ci * Need tasklist lock for parent etc handling! 22648c2ecf20Sopenharmony_ci */ 22658c2ecf20Sopenharmony_ci write_lock_irq(&tasklist_lock); 22668c2ecf20Sopenharmony_ci 22678c2ecf20Sopenharmony_ci /* CLONE_PARENT re-uses the old parent */ 22688c2ecf20Sopenharmony_ci if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { 22698c2ecf20Sopenharmony_ci p->real_parent = current->real_parent; 22708c2ecf20Sopenharmony_ci p->parent_exec_id = current->parent_exec_id; 22718c2ecf20Sopenharmony_ci if (clone_flags & CLONE_THREAD) 22728c2ecf20Sopenharmony_ci p->exit_signal = -1; 22738c2ecf20Sopenharmony_ci else 22748c2ecf20Sopenharmony_ci p->exit_signal = current->group_leader->exit_signal; 22758c2ecf20Sopenharmony_ci } else { 22768c2ecf20Sopenharmony_ci p->real_parent = current; 22778c2ecf20Sopenharmony_ci p->parent_exec_id = current->self_exec_id; 22788c2ecf20Sopenharmony_ci p->exit_signal = args->exit_signal; 22798c2ecf20Sopenharmony_ci } 22808c2ecf20Sopenharmony_ci 22818c2ecf20Sopenharmony_ci klp_copy_process(p); 22828c2ecf20Sopenharmony_ci 22838c2ecf20Sopenharmony_ci spin_lock(¤t->sighand->siglock); 22848c2ecf20Sopenharmony_ci 22858c2ecf20Sopenharmony_ci rseq_fork(p, clone_flags); 22868c2ecf20Sopenharmony_ci 22878c2ecf20Sopenharmony_ci /* Don't start children in a dying pid namespace */ 22888c2ecf20Sopenharmony_ci if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) { 22898c2ecf20Sopenharmony_ci retval = -ENOMEM; 22908c2ecf20Sopenharmony_ci goto bad_fork_cancel_cgroup; 22918c2ecf20Sopenharmony_ci } 22928c2ecf20Sopenharmony_ci 22938c2ecf20Sopenharmony_ci /* Let kill terminate clone/fork in the middle */ 22948c2ecf20Sopenharmony_ci if (fatal_signal_pending(current)) { 22958c2ecf20Sopenharmony_ci retval = -EINTR; 22968c2ecf20Sopenharmony_ci goto bad_fork_cancel_cgroup; 22978c2ecf20Sopenharmony_ci } 22988c2ecf20Sopenharmony_ci 22998c2ecf20Sopenharmony_ci /* No more failure paths after this point. */ 23008c2ecf20Sopenharmony_ci 23018c2ecf20Sopenharmony_ci /* 23028c2ecf20Sopenharmony_ci * Copy seccomp details explicitly here, in case they were changed 23038c2ecf20Sopenharmony_ci * before holding sighand lock. 23048c2ecf20Sopenharmony_ci */ 23058c2ecf20Sopenharmony_ci copy_seccomp(p); 23068c2ecf20Sopenharmony_ci 23078c2ecf20Sopenharmony_ci init_task_pid_links(p); 23088c2ecf20Sopenharmony_ci if (likely(p->pid)) { 23098c2ecf20Sopenharmony_ci ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); 23108c2ecf20Sopenharmony_ci 23118c2ecf20Sopenharmony_ci init_task_pid(p, PIDTYPE_PID, pid); 23128c2ecf20Sopenharmony_ci if (thread_group_leader(p)) { 23138c2ecf20Sopenharmony_ci init_task_pid(p, PIDTYPE_TGID, pid); 23148c2ecf20Sopenharmony_ci init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); 23158c2ecf20Sopenharmony_ci init_task_pid(p, PIDTYPE_SID, task_session(current)); 23168c2ecf20Sopenharmony_ci 23178c2ecf20Sopenharmony_ci if (is_child_reaper(pid)) { 23188c2ecf20Sopenharmony_ci ns_of_pid(pid)->child_reaper = p; 23198c2ecf20Sopenharmony_ci p->signal->flags |= SIGNAL_UNKILLABLE; 23208c2ecf20Sopenharmony_ci } 23218c2ecf20Sopenharmony_ci p->signal->shared_pending.signal = delayed.signal; 23228c2ecf20Sopenharmony_ci p->signal->tty = tty_kref_get(current->signal->tty); 23238c2ecf20Sopenharmony_ci /* 23248c2ecf20Sopenharmony_ci * Inherit has_child_subreaper flag under the same 23258c2ecf20Sopenharmony_ci * tasklist_lock with adding child to the process tree 23268c2ecf20Sopenharmony_ci * for propagate_has_child_subreaper optimization. 23278c2ecf20Sopenharmony_ci */ 23288c2ecf20Sopenharmony_ci p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper || 23298c2ecf20Sopenharmony_ci p->real_parent->signal->is_child_subreaper; 23308c2ecf20Sopenharmony_ci list_add_tail(&p->sibling, &p->real_parent->children); 23318c2ecf20Sopenharmony_ci list_add_tail_rcu(&p->tasks, &init_task.tasks); 23328c2ecf20Sopenharmony_ci attach_pid(p, PIDTYPE_TGID); 23338c2ecf20Sopenharmony_ci attach_pid(p, PIDTYPE_PGID); 23348c2ecf20Sopenharmony_ci attach_pid(p, PIDTYPE_SID); 23358c2ecf20Sopenharmony_ci __this_cpu_inc(process_counts); 23368c2ecf20Sopenharmony_ci } else { 23378c2ecf20Sopenharmony_ci current->signal->nr_threads++; 23388c2ecf20Sopenharmony_ci atomic_inc(¤t->signal->live); 23398c2ecf20Sopenharmony_ci refcount_inc(¤t->signal->sigcnt); 23408c2ecf20Sopenharmony_ci task_join_group_stop(p); 23418c2ecf20Sopenharmony_ci list_add_tail_rcu(&p->thread_group, 23428c2ecf20Sopenharmony_ci &p->group_leader->thread_group); 23438c2ecf20Sopenharmony_ci list_add_tail_rcu(&p->thread_node, 23448c2ecf20Sopenharmony_ci &p->signal->thread_head); 23458c2ecf20Sopenharmony_ci } 23468c2ecf20Sopenharmony_ci attach_pid(p, PIDTYPE_PID); 23478c2ecf20Sopenharmony_ci nr_threads++; 23488c2ecf20Sopenharmony_ci } 23498c2ecf20Sopenharmony_ci total_forks++; 23508c2ecf20Sopenharmony_ci hlist_del_init(&delayed.node); 23518c2ecf20Sopenharmony_ci spin_unlock(¤t->sighand->siglock); 23528c2ecf20Sopenharmony_ci syscall_tracepoint_update(p); 23538c2ecf20Sopenharmony_ci write_unlock_irq(&tasklist_lock); 23548c2ecf20Sopenharmony_ci 23558c2ecf20Sopenharmony_ci if (pidfile) 23568c2ecf20Sopenharmony_ci fd_install(pidfd, pidfile); 23578c2ecf20Sopenharmony_ci 23588c2ecf20Sopenharmony_ci proc_fork_connector(p); 23598c2ecf20Sopenharmony_ci sched_post_fork(p, args); 23608c2ecf20Sopenharmony_ci cgroup_post_fork(p, args); 23618c2ecf20Sopenharmony_ci perf_event_fork(p); 23628c2ecf20Sopenharmony_ci 23638c2ecf20Sopenharmony_ci trace_task_newtask(p, clone_flags); 23648c2ecf20Sopenharmony_ci uprobe_copy_process(p, clone_flags); 23658c2ecf20Sopenharmony_ci 23668c2ecf20Sopenharmony_ci copy_oom_score_adj(clone_flags, p); 23678c2ecf20Sopenharmony_ci 23688c2ecf20Sopenharmony_ci return p; 23698c2ecf20Sopenharmony_ci 23708c2ecf20Sopenharmony_cibad_fork_cancel_cgroup: 23718c2ecf20Sopenharmony_ci spin_unlock(¤t->sighand->siglock); 23728c2ecf20Sopenharmony_ci write_unlock_irq(&tasklist_lock); 23738c2ecf20Sopenharmony_ci cgroup_cancel_fork(p, args); 23748c2ecf20Sopenharmony_cibad_fork_put_pidfd: 23758c2ecf20Sopenharmony_ci if (clone_flags & CLONE_PIDFD) { 23768c2ecf20Sopenharmony_ci fput(pidfile); 23778c2ecf20Sopenharmony_ci put_unused_fd(pidfd); 23788c2ecf20Sopenharmony_ci } 23798c2ecf20Sopenharmony_cibad_fork_free_pid: 23808c2ecf20Sopenharmony_ci if (pid != &init_struct_pid) 23818c2ecf20Sopenharmony_ci free_pid(pid); 23828c2ecf20Sopenharmony_cibad_fork_cleanup_thread: 23838c2ecf20Sopenharmony_ci exit_thread(p); 23848c2ecf20Sopenharmony_cibad_fork_cleanup_io: 23858c2ecf20Sopenharmony_ci if (p->io_context) 23868c2ecf20Sopenharmony_ci exit_io_context(p); 23878c2ecf20Sopenharmony_cibad_fork_cleanup_namespaces: 23888c2ecf20Sopenharmony_ci exit_task_namespaces(p); 23898c2ecf20Sopenharmony_cibad_fork_cleanup_mm: 23908c2ecf20Sopenharmony_ci if (p->mm) { 23918c2ecf20Sopenharmony_ci mm_clear_owner(p->mm, p); 23928c2ecf20Sopenharmony_ci mmput(p->mm); 23938c2ecf20Sopenharmony_ci } 23948c2ecf20Sopenharmony_cibad_fork_cleanup_signal: 23958c2ecf20Sopenharmony_ci if (!(clone_flags & CLONE_THREAD)) 23968c2ecf20Sopenharmony_ci free_signal_struct(p->signal); 23978c2ecf20Sopenharmony_cibad_fork_cleanup_sighand: 23988c2ecf20Sopenharmony_ci __cleanup_sighand(p->sighand); 23998c2ecf20Sopenharmony_cibad_fork_cleanup_fs: 24008c2ecf20Sopenharmony_ci exit_fs(p); /* blocking */ 24018c2ecf20Sopenharmony_cibad_fork_cleanup_files: 24028c2ecf20Sopenharmony_ci exit_files(p); /* blocking */ 24038c2ecf20Sopenharmony_cibad_fork_cleanup_semundo: 24048c2ecf20Sopenharmony_ci exit_sem(p); 24058c2ecf20Sopenharmony_cibad_fork_cleanup_security: 24068c2ecf20Sopenharmony_ci security_task_free(p); 24078c2ecf20Sopenharmony_cibad_fork_cleanup_audit: 24088c2ecf20Sopenharmony_ci audit_free(p); 24098c2ecf20Sopenharmony_cibad_fork_cleanup_perf: 24108c2ecf20Sopenharmony_ci perf_event_free_task(p); 24118c2ecf20Sopenharmony_cibad_fork_cleanup_policy: 24128c2ecf20Sopenharmony_ci lockdep_free_task(p); 24138c2ecf20Sopenharmony_ci free_task_load_ptrs(p); 24148c2ecf20Sopenharmony_ci#ifdef CONFIG_NUMA 24158c2ecf20Sopenharmony_ci mpol_put(p->mempolicy); 24168c2ecf20Sopenharmony_cibad_fork_cleanup_threadgroup_lock: 24178c2ecf20Sopenharmony_ci#endif 24188c2ecf20Sopenharmony_ci delayacct_tsk_free(p); 24198c2ecf20Sopenharmony_cibad_fork_cleanup_count: 24208c2ecf20Sopenharmony_ci atomic_dec(&p->cred->user->processes); 24218c2ecf20Sopenharmony_ci exit_creds(p); 24228c2ecf20Sopenharmony_cibad_fork_free: 24238c2ecf20Sopenharmony_ci p->state = TASK_DEAD; 24248c2ecf20Sopenharmony_ci put_task_stack(p); 24258c2ecf20Sopenharmony_ci delayed_free_task(p); 24268c2ecf20Sopenharmony_cifork_out: 24278c2ecf20Sopenharmony_ci spin_lock_irq(¤t->sighand->siglock); 24288c2ecf20Sopenharmony_ci hlist_del_init(&delayed.node); 24298c2ecf20Sopenharmony_ci spin_unlock_irq(¤t->sighand->siglock); 24308c2ecf20Sopenharmony_ci return ERR_PTR(retval); 24318c2ecf20Sopenharmony_ci} 24328c2ecf20Sopenharmony_ci 24338c2ecf20Sopenharmony_cistatic inline void init_idle_pids(struct task_struct *idle) 24348c2ecf20Sopenharmony_ci{ 24358c2ecf20Sopenharmony_ci enum pid_type type; 24368c2ecf20Sopenharmony_ci 24378c2ecf20Sopenharmony_ci for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { 24388c2ecf20Sopenharmony_ci INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */ 24398c2ecf20Sopenharmony_ci init_task_pid(idle, type, &init_struct_pid); 24408c2ecf20Sopenharmony_ci } 24418c2ecf20Sopenharmony_ci} 24428c2ecf20Sopenharmony_ci 24438c2ecf20Sopenharmony_cistruct task_struct * __init fork_idle(int cpu) 24448c2ecf20Sopenharmony_ci{ 24458c2ecf20Sopenharmony_ci struct task_struct *task; 24468c2ecf20Sopenharmony_ci struct kernel_clone_args args = { 24478c2ecf20Sopenharmony_ci .flags = CLONE_VM, 24488c2ecf20Sopenharmony_ci }; 24498c2ecf20Sopenharmony_ci 24508c2ecf20Sopenharmony_ci task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args); 24518c2ecf20Sopenharmony_ci if (!IS_ERR(task)) { 24528c2ecf20Sopenharmony_ci init_idle_pids(task); 24538c2ecf20Sopenharmony_ci init_idle(task, cpu); 24548c2ecf20Sopenharmony_ci } 24558c2ecf20Sopenharmony_ci 24568c2ecf20Sopenharmony_ci return task; 24578c2ecf20Sopenharmony_ci} 24588c2ecf20Sopenharmony_ci 24598c2ecf20Sopenharmony_ci/* 24608c2ecf20Sopenharmony_ci * This is like kernel_clone(), but shaved down and tailored to just 24618c2ecf20Sopenharmony_ci * creating io_uring workers. It returns a created task, or an error pointer. 24628c2ecf20Sopenharmony_ci * The returned task is inactive, and the caller must fire it up through 24638c2ecf20Sopenharmony_ci * wake_up_new_task(p). All signals are blocked in the created task. 24648c2ecf20Sopenharmony_ci */ 24658c2ecf20Sopenharmony_cistruct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node) 24668c2ecf20Sopenharmony_ci{ 24678c2ecf20Sopenharmony_ci unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD| 24688c2ecf20Sopenharmony_ci CLONE_IO; 24698c2ecf20Sopenharmony_ci struct kernel_clone_args args = { 24708c2ecf20Sopenharmony_ci .flags = ((lower_32_bits(flags) | CLONE_VM | 24718c2ecf20Sopenharmony_ci CLONE_UNTRACED) & ~CSIGNAL), 24728c2ecf20Sopenharmony_ci .exit_signal = (lower_32_bits(flags) & CSIGNAL), 24738c2ecf20Sopenharmony_ci .stack = (unsigned long)fn, 24748c2ecf20Sopenharmony_ci .stack_size = (unsigned long)arg, 24758c2ecf20Sopenharmony_ci .io_thread = 1, 24768c2ecf20Sopenharmony_ci }; 24778c2ecf20Sopenharmony_ci 24788c2ecf20Sopenharmony_ci return copy_process(NULL, 0, node, &args); 24798c2ecf20Sopenharmony_ci} 24808c2ecf20Sopenharmony_ci 24818c2ecf20Sopenharmony_ci/* 24828c2ecf20Sopenharmony_ci * Ok, this is the main fork-routine. 24838c2ecf20Sopenharmony_ci * 24848c2ecf20Sopenharmony_ci * It copies the process, and if successful kick-starts 24858c2ecf20Sopenharmony_ci * it and waits for it to finish using the VM if required. 24868c2ecf20Sopenharmony_ci * 24878c2ecf20Sopenharmony_ci * args->exit_signal is expected to be checked for sanity by the caller. 24888c2ecf20Sopenharmony_ci */ 24898c2ecf20Sopenharmony_cipid_t kernel_clone(struct kernel_clone_args *args) 24908c2ecf20Sopenharmony_ci{ 24918c2ecf20Sopenharmony_ci u64 clone_flags = args->flags; 24928c2ecf20Sopenharmony_ci struct completion vfork; 24938c2ecf20Sopenharmony_ci struct pid *pid; 24948c2ecf20Sopenharmony_ci struct task_struct *p; 24958c2ecf20Sopenharmony_ci int trace = 0; 24968c2ecf20Sopenharmony_ci pid_t nr; 24978c2ecf20Sopenharmony_ci 24988c2ecf20Sopenharmony_ci /* 24998c2ecf20Sopenharmony_ci * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument 25008c2ecf20Sopenharmony_ci * to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are 25018c2ecf20Sopenharmony_ci * mutually exclusive. With clone3() CLONE_PIDFD has grown a separate 25028c2ecf20Sopenharmony_ci * field in struct clone_args and it still doesn't make sense to have 25038c2ecf20Sopenharmony_ci * them both point at the same memory location. Performing this check 25048c2ecf20Sopenharmony_ci * here has the advantage that we don't need to have a separate helper 25058c2ecf20Sopenharmony_ci * to check for legacy clone(). 25068c2ecf20Sopenharmony_ci */ 25078c2ecf20Sopenharmony_ci if ((args->flags & CLONE_PIDFD) && 25088c2ecf20Sopenharmony_ci (args->flags & CLONE_PARENT_SETTID) && 25098c2ecf20Sopenharmony_ci (args->pidfd == args->parent_tid)) 25108c2ecf20Sopenharmony_ci return -EINVAL; 25118c2ecf20Sopenharmony_ci 25128c2ecf20Sopenharmony_ci /* 25138c2ecf20Sopenharmony_ci * Determine whether and which event to report to ptracer. When 25148c2ecf20Sopenharmony_ci * called from kernel_thread or CLONE_UNTRACED is explicitly 25158c2ecf20Sopenharmony_ci * requested, no event is reported; otherwise, report if the event 25168c2ecf20Sopenharmony_ci * for the type of forking is enabled. 25178c2ecf20Sopenharmony_ci */ 25188c2ecf20Sopenharmony_ci if (!(clone_flags & CLONE_UNTRACED)) { 25198c2ecf20Sopenharmony_ci if (clone_flags & CLONE_VFORK) 25208c2ecf20Sopenharmony_ci trace = PTRACE_EVENT_VFORK; 25218c2ecf20Sopenharmony_ci else if (args->exit_signal != SIGCHLD) 25228c2ecf20Sopenharmony_ci trace = PTRACE_EVENT_CLONE; 25238c2ecf20Sopenharmony_ci else 25248c2ecf20Sopenharmony_ci trace = PTRACE_EVENT_FORK; 25258c2ecf20Sopenharmony_ci 25268c2ecf20Sopenharmony_ci if (likely(!ptrace_event_enabled(current, trace))) 25278c2ecf20Sopenharmony_ci trace = 0; 25288c2ecf20Sopenharmony_ci } 25298c2ecf20Sopenharmony_ci 25308c2ecf20Sopenharmony_ci p = copy_process(NULL, trace, NUMA_NO_NODE, args); 25318c2ecf20Sopenharmony_ci add_latent_entropy(); 25328c2ecf20Sopenharmony_ci 25338c2ecf20Sopenharmony_ci if (IS_ERR(p)) 25348c2ecf20Sopenharmony_ci return PTR_ERR(p); 25358c2ecf20Sopenharmony_ci 25368c2ecf20Sopenharmony_ci /* 25378c2ecf20Sopenharmony_ci * Do this prior waking up the new thread - the thread pointer 25388c2ecf20Sopenharmony_ci * might get invalid after that point, if the thread exits quickly. 25398c2ecf20Sopenharmony_ci */ 25408c2ecf20Sopenharmony_ci trace_sched_process_fork(current, p); 25418c2ecf20Sopenharmony_ci 25428c2ecf20Sopenharmony_ci pid = get_task_pid(p, PIDTYPE_PID); 25438c2ecf20Sopenharmony_ci nr = pid_vnr(pid); 25448c2ecf20Sopenharmony_ci 25458c2ecf20Sopenharmony_ci if (clone_flags & CLONE_PARENT_SETTID) 25468c2ecf20Sopenharmony_ci put_user(nr, args->parent_tid); 25478c2ecf20Sopenharmony_ci 25488c2ecf20Sopenharmony_ci if (clone_flags & CLONE_VFORK) { 25498c2ecf20Sopenharmony_ci p->vfork_done = &vfork; 25508c2ecf20Sopenharmony_ci init_completion(&vfork); 25518c2ecf20Sopenharmony_ci get_task_struct(p); 25528c2ecf20Sopenharmony_ci } 25538c2ecf20Sopenharmony_ci 25548c2ecf20Sopenharmony_ci CALL_HCK_LITE_HOOK(ced_kernel_clone_lhck, p); 25558c2ecf20Sopenharmony_ci wake_up_new_task(p); 25568c2ecf20Sopenharmony_ci 25578c2ecf20Sopenharmony_ci /* forking complete and child started to run, tell ptracer */ 25588c2ecf20Sopenharmony_ci if (unlikely(trace)) 25598c2ecf20Sopenharmony_ci ptrace_event_pid(trace, pid); 25608c2ecf20Sopenharmony_ci 25618c2ecf20Sopenharmony_ci if (clone_flags & CLONE_VFORK) { 25628c2ecf20Sopenharmony_ci if (!wait_for_vfork_done(p, &vfork)) 25638c2ecf20Sopenharmony_ci ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); 25648c2ecf20Sopenharmony_ci } 25658c2ecf20Sopenharmony_ci 25668c2ecf20Sopenharmony_ci put_pid(pid); 25678c2ecf20Sopenharmony_ci return nr; 25688c2ecf20Sopenharmony_ci} 25698c2ecf20Sopenharmony_ci 25708c2ecf20Sopenharmony_ci/* 25718c2ecf20Sopenharmony_ci * Create a kernel thread. 25728c2ecf20Sopenharmony_ci */ 25738c2ecf20Sopenharmony_cipid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) 25748c2ecf20Sopenharmony_ci{ 25758c2ecf20Sopenharmony_ci struct kernel_clone_args args = { 25768c2ecf20Sopenharmony_ci .flags = ((lower_32_bits(flags) | CLONE_VM | 25778c2ecf20Sopenharmony_ci CLONE_UNTRACED) & ~CSIGNAL), 25788c2ecf20Sopenharmony_ci .exit_signal = (lower_32_bits(flags) & CSIGNAL), 25798c2ecf20Sopenharmony_ci .stack = (unsigned long)fn, 25808c2ecf20Sopenharmony_ci .stack_size = (unsigned long)arg, 25818c2ecf20Sopenharmony_ci }; 25828c2ecf20Sopenharmony_ci 25838c2ecf20Sopenharmony_ci return kernel_clone(&args); 25848c2ecf20Sopenharmony_ci} 25858c2ecf20Sopenharmony_ci 25868c2ecf20Sopenharmony_ci#ifdef __ARCH_WANT_SYS_FORK 25878c2ecf20Sopenharmony_ciSYSCALL_DEFINE0(fork) 25888c2ecf20Sopenharmony_ci{ 25898c2ecf20Sopenharmony_ci#ifdef CONFIG_MMU 25908c2ecf20Sopenharmony_ci struct kernel_clone_args args = { 25918c2ecf20Sopenharmony_ci .exit_signal = SIGCHLD, 25928c2ecf20Sopenharmony_ci }; 25938c2ecf20Sopenharmony_ci 25948c2ecf20Sopenharmony_ci return kernel_clone(&args); 25958c2ecf20Sopenharmony_ci#else 25968c2ecf20Sopenharmony_ci /* can not support in nommu mode */ 25978c2ecf20Sopenharmony_ci return -EINVAL; 25988c2ecf20Sopenharmony_ci#endif 25998c2ecf20Sopenharmony_ci} 26008c2ecf20Sopenharmony_ci#endif 26018c2ecf20Sopenharmony_ci 26028c2ecf20Sopenharmony_ci#ifdef __ARCH_WANT_SYS_VFORK 26038c2ecf20Sopenharmony_ciSYSCALL_DEFINE0(vfork) 26048c2ecf20Sopenharmony_ci{ 26058c2ecf20Sopenharmony_ci struct kernel_clone_args args = { 26068c2ecf20Sopenharmony_ci .flags = CLONE_VFORK | CLONE_VM, 26078c2ecf20Sopenharmony_ci .exit_signal = SIGCHLD, 26088c2ecf20Sopenharmony_ci }; 26098c2ecf20Sopenharmony_ci 26108c2ecf20Sopenharmony_ci return kernel_clone(&args); 26118c2ecf20Sopenharmony_ci} 26128c2ecf20Sopenharmony_ci#endif 26138c2ecf20Sopenharmony_ci 26148c2ecf20Sopenharmony_ci#ifdef __ARCH_WANT_SYS_CLONE 26158c2ecf20Sopenharmony_ci#ifdef CONFIG_CLONE_BACKWARDS 26168c2ecf20Sopenharmony_ciSYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, 26178c2ecf20Sopenharmony_ci int __user *, parent_tidptr, 26188c2ecf20Sopenharmony_ci unsigned long, tls, 26198c2ecf20Sopenharmony_ci int __user *, child_tidptr) 26208c2ecf20Sopenharmony_ci#elif defined(CONFIG_CLONE_BACKWARDS2) 26218c2ecf20Sopenharmony_ciSYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, 26228c2ecf20Sopenharmony_ci int __user *, parent_tidptr, 26238c2ecf20Sopenharmony_ci int __user *, child_tidptr, 26248c2ecf20Sopenharmony_ci unsigned long, tls) 26258c2ecf20Sopenharmony_ci#elif defined(CONFIG_CLONE_BACKWARDS3) 26268c2ecf20Sopenharmony_ciSYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, 26278c2ecf20Sopenharmony_ci int, stack_size, 26288c2ecf20Sopenharmony_ci int __user *, parent_tidptr, 26298c2ecf20Sopenharmony_ci int __user *, child_tidptr, 26308c2ecf20Sopenharmony_ci unsigned long, tls) 26318c2ecf20Sopenharmony_ci#else 26328c2ecf20Sopenharmony_ciSYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, 26338c2ecf20Sopenharmony_ci int __user *, parent_tidptr, 26348c2ecf20Sopenharmony_ci int __user *, child_tidptr, 26358c2ecf20Sopenharmony_ci unsigned long, tls) 26368c2ecf20Sopenharmony_ci#endif 26378c2ecf20Sopenharmony_ci{ 26388c2ecf20Sopenharmony_ci struct kernel_clone_args args = { 26398c2ecf20Sopenharmony_ci .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), 26408c2ecf20Sopenharmony_ci .pidfd = parent_tidptr, 26418c2ecf20Sopenharmony_ci .child_tid = child_tidptr, 26428c2ecf20Sopenharmony_ci .parent_tid = parent_tidptr, 26438c2ecf20Sopenharmony_ci .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), 26448c2ecf20Sopenharmony_ci .stack = newsp, 26458c2ecf20Sopenharmony_ci .tls = tls, 26468c2ecf20Sopenharmony_ci }; 26478c2ecf20Sopenharmony_ci 26488c2ecf20Sopenharmony_ci return kernel_clone(&args); 26498c2ecf20Sopenharmony_ci} 26508c2ecf20Sopenharmony_ci#endif 26518c2ecf20Sopenharmony_ci 26528c2ecf20Sopenharmony_ci#ifdef __ARCH_WANT_SYS_CLONE3 26538c2ecf20Sopenharmony_ci 26548c2ecf20Sopenharmony_cinoinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, 26558c2ecf20Sopenharmony_ci struct clone_args __user *uargs, 26568c2ecf20Sopenharmony_ci size_t usize) 26578c2ecf20Sopenharmony_ci{ 26588c2ecf20Sopenharmony_ci int err; 26598c2ecf20Sopenharmony_ci struct clone_args args; 26608c2ecf20Sopenharmony_ci pid_t *kset_tid = kargs->set_tid; 26618c2ecf20Sopenharmony_ci 26628c2ecf20Sopenharmony_ci BUILD_BUG_ON(offsetofend(struct clone_args, tls) != 26638c2ecf20Sopenharmony_ci CLONE_ARGS_SIZE_VER0); 26648c2ecf20Sopenharmony_ci BUILD_BUG_ON(offsetofend(struct clone_args, set_tid_size) != 26658c2ecf20Sopenharmony_ci CLONE_ARGS_SIZE_VER1); 26668c2ecf20Sopenharmony_ci BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) != 26678c2ecf20Sopenharmony_ci CLONE_ARGS_SIZE_VER2); 26688c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2); 26698c2ecf20Sopenharmony_ci 26708c2ecf20Sopenharmony_ci if (unlikely(usize > PAGE_SIZE)) 26718c2ecf20Sopenharmony_ci return -E2BIG; 26728c2ecf20Sopenharmony_ci if (unlikely(usize < CLONE_ARGS_SIZE_VER0)) 26738c2ecf20Sopenharmony_ci return -EINVAL; 26748c2ecf20Sopenharmony_ci 26758c2ecf20Sopenharmony_ci err = copy_struct_from_user(&args, sizeof(args), uargs, usize); 26768c2ecf20Sopenharmony_ci if (err) 26778c2ecf20Sopenharmony_ci return err; 26788c2ecf20Sopenharmony_ci 26798c2ecf20Sopenharmony_ci if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL)) 26808c2ecf20Sopenharmony_ci return -EINVAL; 26818c2ecf20Sopenharmony_ci 26828c2ecf20Sopenharmony_ci if (unlikely(!args.set_tid && args.set_tid_size > 0)) 26838c2ecf20Sopenharmony_ci return -EINVAL; 26848c2ecf20Sopenharmony_ci 26858c2ecf20Sopenharmony_ci if (unlikely(args.set_tid && args.set_tid_size == 0)) 26868c2ecf20Sopenharmony_ci return -EINVAL; 26878c2ecf20Sopenharmony_ci 26888c2ecf20Sopenharmony_ci /* 26898c2ecf20Sopenharmony_ci * Verify that higher 32bits of exit_signal are unset and that 26908c2ecf20Sopenharmony_ci * it is a valid signal 26918c2ecf20Sopenharmony_ci */ 26928c2ecf20Sopenharmony_ci if (unlikely((args.exit_signal & ~((u64)CSIGNAL)) || 26938c2ecf20Sopenharmony_ci !valid_signal(args.exit_signal))) 26948c2ecf20Sopenharmony_ci return -EINVAL; 26958c2ecf20Sopenharmony_ci 26968c2ecf20Sopenharmony_ci if ((args.flags & CLONE_INTO_CGROUP) && 26978c2ecf20Sopenharmony_ci (args.cgroup > INT_MAX || usize < CLONE_ARGS_SIZE_VER2)) 26988c2ecf20Sopenharmony_ci return -EINVAL; 26998c2ecf20Sopenharmony_ci 27008c2ecf20Sopenharmony_ci *kargs = (struct kernel_clone_args){ 27018c2ecf20Sopenharmony_ci .flags = args.flags, 27028c2ecf20Sopenharmony_ci .pidfd = u64_to_user_ptr(args.pidfd), 27038c2ecf20Sopenharmony_ci .child_tid = u64_to_user_ptr(args.child_tid), 27048c2ecf20Sopenharmony_ci .parent_tid = u64_to_user_ptr(args.parent_tid), 27058c2ecf20Sopenharmony_ci .exit_signal = args.exit_signal, 27068c2ecf20Sopenharmony_ci .stack = args.stack, 27078c2ecf20Sopenharmony_ci .stack_size = args.stack_size, 27088c2ecf20Sopenharmony_ci .tls = args.tls, 27098c2ecf20Sopenharmony_ci .set_tid_size = args.set_tid_size, 27108c2ecf20Sopenharmony_ci .cgroup = args.cgroup, 27118c2ecf20Sopenharmony_ci }; 27128c2ecf20Sopenharmony_ci 27138c2ecf20Sopenharmony_ci if (args.set_tid && 27148c2ecf20Sopenharmony_ci copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid), 27158c2ecf20Sopenharmony_ci (kargs->set_tid_size * sizeof(pid_t)))) 27168c2ecf20Sopenharmony_ci return -EFAULT; 27178c2ecf20Sopenharmony_ci 27188c2ecf20Sopenharmony_ci kargs->set_tid = kset_tid; 27198c2ecf20Sopenharmony_ci 27208c2ecf20Sopenharmony_ci return 0; 27218c2ecf20Sopenharmony_ci} 27228c2ecf20Sopenharmony_ci 27238c2ecf20Sopenharmony_ci/** 27248c2ecf20Sopenharmony_ci * clone3_stack_valid - check and prepare stack 27258c2ecf20Sopenharmony_ci * @kargs: kernel clone args 27268c2ecf20Sopenharmony_ci * 27278c2ecf20Sopenharmony_ci * Verify that the stack arguments userspace gave us are sane. 27288c2ecf20Sopenharmony_ci * In addition, set the stack direction for userspace since it's easy for us to 27298c2ecf20Sopenharmony_ci * determine. 27308c2ecf20Sopenharmony_ci */ 27318c2ecf20Sopenharmony_cistatic inline bool clone3_stack_valid(struct kernel_clone_args *kargs) 27328c2ecf20Sopenharmony_ci{ 27338c2ecf20Sopenharmony_ci if (kargs->stack == 0) { 27348c2ecf20Sopenharmony_ci if (kargs->stack_size > 0) 27358c2ecf20Sopenharmony_ci return false; 27368c2ecf20Sopenharmony_ci } else { 27378c2ecf20Sopenharmony_ci if (kargs->stack_size == 0) 27388c2ecf20Sopenharmony_ci return false; 27398c2ecf20Sopenharmony_ci 27408c2ecf20Sopenharmony_ci if (!access_ok((void __user *)kargs->stack, kargs->stack_size)) 27418c2ecf20Sopenharmony_ci return false; 27428c2ecf20Sopenharmony_ci 27438c2ecf20Sopenharmony_ci#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64) 27448c2ecf20Sopenharmony_ci kargs->stack += kargs->stack_size; 27458c2ecf20Sopenharmony_ci#endif 27468c2ecf20Sopenharmony_ci } 27478c2ecf20Sopenharmony_ci 27488c2ecf20Sopenharmony_ci return true; 27498c2ecf20Sopenharmony_ci} 27508c2ecf20Sopenharmony_ci 27518c2ecf20Sopenharmony_cistatic bool clone3_args_valid(struct kernel_clone_args *kargs) 27528c2ecf20Sopenharmony_ci{ 27538c2ecf20Sopenharmony_ci /* Verify that no unknown flags are passed along. */ 27548c2ecf20Sopenharmony_ci if (kargs->flags & 27558c2ecf20Sopenharmony_ci ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP)) 27568c2ecf20Sopenharmony_ci return false; 27578c2ecf20Sopenharmony_ci 27588c2ecf20Sopenharmony_ci /* 27598c2ecf20Sopenharmony_ci * - make the CLONE_DETACHED bit reuseable for clone3 27608c2ecf20Sopenharmony_ci * - make the CSIGNAL bits reuseable for clone3 27618c2ecf20Sopenharmony_ci */ 27628c2ecf20Sopenharmony_ci if (kargs->flags & (CLONE_DETACHED | (CSIGNAL & (~CLONE_NEWTIME)))) 27638c2ecf20Sopenharmony_ci return false; 27648c2ecf20Sopenharmony_ci 27658c2ecf20Sopenharmony_ci if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) == 27668c2ecf20Sopenharmony_ci (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) 27678c2ecf20Sopenharmony_ci return false; 27688c2ecf20Sopenharmony_ci 27698c2ecf20Sopenharmony_ci if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) && 27708c2ecf20Sopenharmony_ci kargs->exit_signal) 27718c2ecf20Sopenharmony_ci return false; 27728c2ecf20Sopenharmony_ci 27738c2ecf20Sopenharmony_ci if (!clone3_stack_valid(kargs)) 27748c2ecf20Sopenharmony_ci return false; 27758c2ecf20Sopenharmony_ci 27768c2ecf20Sopenharmony_ci return true; 27778c2ecf20Sopenharmony_ci} 27788c2ecf20Sopenharmony_ci 27798c2ecf20Sopenharmony_ci/** 27808c2ecf20Sopenharmony_ci * clone3 - create a new process with specific properties 27818c2ecf20Sopenharmony_ci * @uargs: argument structure 27828c2ecf20Sopenharmony_ci * @size: size of @uargs 27838c2ecf20Sopenharmony_ci * 27848c2ecf20Sopenharmony_ci * clone3() is the extensible successor to clone()/clone2(). 27858c2ecf20Sopenharmony_ci * It takes a struct as argument that is versioned by its size. 27868c2ecf20Sopenharmony_ci * 27878c2ecf20Sopenharmony_ci * Return: On success, a positive PID for the child process. 27888c2ecf20Sopenharmony_ci * On error, a negative errno number. 27898c2ecf20Sopenharmony_ci */ 27908c2ecf20Sopenharmony_ciSYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) 27918c2ecf20Sopenharmony_ci{ 27928c2ecf20Sopenharmony_ci int err; 27938c2ecf20Sopenharmony_ci 27948c2ecf20Sopenharmony_ci struct kernel_clone_args kargs; 27958c2ecf20Sopenharmony_ci pid_t set_tid[MAX_PID_NS_LEVEL]; 27968c2ecf20Sopenharmony_ci 27978c2ecf20Sopenharmony_ci kargs.set_tid = set_tid; 27988c2ecf20Sopenharmony_ci 27998c2ecf20Sopenharmony_ci err = copy_clone_args_from_user(&kargs, uargs, size); 28008c2ecf20Sopenharmony_ci if (err) 28018c2ecf20Sopenharmony_ci return err; 28028c2ecf20Sopenharmony_ci 28038c2ecf20Sopenharmony_ci if (!clone3_args_valid(&kargs)) 28048c2ecf20Sopenharmony_ci return -EINVAL; 28058c2ecf20Sopenharmony_ci 28068c2ecf20Sopenharmony_ci return kernel_clone(&kargs); 28078c2ecf20Sopenharmony_ci} 28088c2ecf20Sopenharmony_ci#endif 28098c2ecf20Sopenharmony_ci 28108c2ecf20Sopenharmony_civoid walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data) 28118c2ecf20Sopenharmony_ci{ 28128c2ecf20Sopenharmony_ci struct task_struct *leader, *parent, *child; 28138c2ecf20Sopenharmony_ci int res; 28148c2ecf20Sopenharmony_ci 28158c2ecf20Sopenharmony_ci read_lock(&tasklist_lock); 28168c2ecf20Sopenharmony_ci leader = top = top->group_leader; 28178c2ecf20Sopenharmony_cidown: 28188c2ecf20Sopenharmony_ci for_each_thread(leader, parent) { 28198c2ecf20Sopenharmony_ci list_for_each_entry(child, &parent->children, sibling) { 28208c2ecf20Sopenharmony_ci res = visitor(child, data); 28218c2ecf20Sopenharmony_ci if (res) { 28228c2ecf20Sopenharmony_ci if (res < 0) 28238c2ecf20Sopenharmony_ci goto out; 28248c2ecf20Sopenharmony_ci leader = child; 28258c2ecf20Sopenharmony_ci goto down; 28268c2ecf20Sopenharmony_ci } 28278c2ecf20Sopenharmony_ciup: 28288c2ecf20Sopenharmony_ci ; 28298c2ecf20Sopenharmony_ci } 28308c2ecf20Sopenharmony_ci } 28318c2ecf20Sopenharmony_ci 28328c2ecf20Sopenharmony_ci if (leader != top) { 28338c2ecf20Sopenharmony_ci child = leader; 28348c2ecf20Sopenharmony_ci parent = child->real_parent; 28358c2ecf20Sopenharmony_ci leader = parent->group_leader; 28368c2ecf20Sopenharmony_ci goto up; 28378c2ecf20Sopenharmony_ci } 28388c2ecf20Sopenharmony_ciout: 28398c2ecf20Sopenharmony_ci read_unlock(&tasklist_lock); 28408c2ecf20Sopenharmony_ci} 28418c2ecf20Sopenharmony_ci 28428c2ecf20Sopenharmony_ci#ifndef ARCH_MIN_MMSTRUCT_ALIGN 28438c2ecf20Sopenharmony_ci#define ARCH_MIN_MMSTRUCT_ALIGN 0 28448c2ecf20Sopenharmony_ci#endif 28458c2ecf20Sopenharmony_ci 28468c2ecf20Sopenharmony_cistatic void sighand_ctor(void *data) 28478c2ecf20Sopenharmony_ci{ 28488c2ecf20Sopenharmony_ci struct sighand_struct *sighand = data; 28498c2ecf20Sopenharmony_ci 28508c2ecf20Sopenharmony_ci spin_lock_init(&sighand->siglock); 28518c2ecf20Sopenharmony_ci init_waitqueue_head(&sighand->signalfd_wqh); 28528c2ecf20Sopenharmony_ci} 28538c2ecf20Sopenharmony_ci 28548c2ecf20Sopenharmony_civoid __init mm_cache_init(void) 28558c2ecf20Sopenharmony_ci{ 28568c2ecf20Sopenharmony_ci unsigned int mm_size; 28578c2ecf20Sopenharmony_ci 28588c2ecf20Sopenharmony_ci /* 28598c2ecf20Sopenharmony_ci * The mm_cpumask is located at the end of mm_struct, and is 28608c2ecf20Sopenharmony_ci * dynamically sized based on the maximum CPU number this system 28618c2ecf20Sopenharmony_ci * can have, taking hotplug into account (nr_cpu_ids). 28628c2ecf20Sopenharmony_ci */ 28638c2ecf20Sopenharmony_ci mm_size = sizeof(struct mm_struct) + cpumask_size(); 28648c2ecf20Sopenharmony_ci 28658c2ecf20Sopenharmony_ci mm_cachep = kmem_cache_create_usercopy("mm_struct", 28668c2ecf20Sopenharmony_ci mm_size, ARCH_MIN_MMSTRUCT_ALIGN, 28678c2ecf20Sopenharmony_ci SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, 28688c2ecf20Sopenharmony_ci offsetof(struct mm_struct, saved_auxv), 28698c2ecf20Sopenharmony_ci sizeof_field(struct mm_struct, saved_auxv), 28708c2ecf20Sopenharmony_ci NULL); 28718c2ecf20Sopenharmony_ci} 28728c2ecf20Sopenharmony_ci 28738c2ecf20Sopenharmony_civoid __init proc_caches_init(void) 28748c2ecf20Sopenharmony_ci{ 28758c2ecf20Sopenharmony_ci sighand_cachep = kmem_cache_create("sighand_cache", 28768c2ecf20Sopenharmony_ci sizeof(struct sighand_struct), 0, 28778c2ecf20Sopenharmony_ci SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| 28788c2ecf20Sopenharmony_ci SLAB_ACCOUNT, sighand_ctor); 28798c2ecf20Sopenharmony_ci signal_cachep = kmem_cache_create("signal_cache", 28808c2ecf20Sopenharmony_ci sizeof(struct signal_struct), 0, 28818c2ecf20Sopenharmony_ci SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, 28828c2ecf20Sopenharmony_ci NULL); 28838c2ecf20Sopenharmony_ci files_cachep = kmem_cache_create("files_cache", 28848c2ecf20Sopenharmony_ci sizeof(struct files_struct), 0, 28858c2ecf20Sopenharmony_ci SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, 28868c2ecf20Sopenharmony_ci NULL); 28878c2ecf20Sopenharmony_ci fs_cachep = kmem_cache_create("fs_cache", 28888c2ecf20Sopenharmony_ci sizeof(struct fs_struct), 0, 28898c2ecf20Sopenharmony_ci SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, 28908c2ecf20Sopenharmony_ci NULL); 28918c2ecf20Sopenharmony_ci 28928c2ecf20Sopenharmony_ci vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); 28938c2ecf20Sopenharmony_ci mmap_init(); 28948c2ecf20Sopenharmony_ci nsproxy_cache_init(); 28958c2ecf20Sopenharmony_ci} 28968c2ecf20Sopenharmony_ci 28978c2ecf20Sopenharmony_ci/* 28988c2ecf20Sopenharmony_ci * Check constraints on flags passed to the unshare system call. 28998c2ecf20Sopenharmony_ci */ 29008c2ecf20Sopenharmony_cistatic int check_unshare_flags(unsigned long unshare_flags) 29018c2ecf20Sopenharmony_ci{ 29028c2ecf20Sopenharmony_ci if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| 29038c2ecf20Sopenharmony_ci CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| 29048c2ecf20Sopenharmony_ci CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET| 29058c2ecf20Sopenharmony_ci CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP| 29068c2ecf20Sopenharmony_ci CLONE_NEWTIME)) 29078c2ecf20Sopenharmony_ci return -EINVAL; 29088c2ecf20Sopenharmony_ci /* 29098c2ecf20Sopenharmony_ci * Not implemented, but pretend it works if there is nothing 29108c2ecf20Sopenharmony_ci * to unshare. Note that unsharing the address space or the 29118c2ecf20Sopenharmony_ci * signal handlers also need to unshare the signal queues (aka 29128c2ecf20Sopenharmony_ci * CLONE_THREAD). 29138c2ecf20Sopenharmony_ci */ 29148c2ecf20Sopenharmony_ci if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { 29158c2ecf20Sopenharmony_ci if (!thread_group_empty(current)) 29168c2ecf20Sopenharmony_ci return -EINVAL; 29178c2ecf20Sopenharmony_ci } 29188c2ecf20Sopenharmony_ci if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) { 29198c2ecf20Sopenharmony_ci if (refcount_read(¤t->sighand->count) > 1) 29208c2ecf20Sopenharmony_ci return -EINVAL; 29218c2ecf20Sopenharmony_ci } 29228c2ecf20Sopenharmony_ci if (unshare_flags & CLONE_VM) { 29238c2ecf20Sopenharmony_ci if (!current_is_single_threaded()) 29248c2ecf20Sopenharmony_ci return -EINVAL; 29258c2ecf20Sopenharmony_ci } 29268c2ecf20Sopenharmony_ci 29278c2ecf20Sopenharmony_ci return 0; 29288c2ecf20Sopenharmony_ci} 29298c2ecf20Sopenharmony_ci 29308c2ecf20Sopenharmony_ci/* 29318c2ecf20Sopenharmony_ci * Unshare the filesystem structure if it is being shared 29328c2ecf20Sopenharmony_ci */ 29338c2ecf20Sopenharmony_cistatic int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) 29348c2ecf20Sopenharmony_ci{ 29358c2ecf20Sopenharmony_ci struct fs_struct *fs = current->fs; 29368c2ecf20Sopenharmony_ci 29378c2ecf20Sopenharmony_ci if (!(unshare_flags & CLONE_FS) || !fs) 29388c2ecf20Sopenharmony_ci return 0; 29398c2ecf20Sopenharmony_ci 29408c2ecf20Sopenharmony_ci /* don't need lock here; in the worst case we'll do useless copy */ 29418c2ecf20Sopenharmony_ci if (fs->users == 1) 29428c2ecf20Sopenharmony_ci return 0; 29438c2ecf20Sopenharmony_ci 29448c2ecf20Sopenharmony_ci *new_fsp = copy_fs_struct(fs); 29458c2ecf20Sopenharmony_ci if (!*new_fsp) 29468c2ecf20Sopenharmony_ci return -ENOMEM; 29478c2ecf20Sopenharmony_ci 29488c2ecf20Sopenharmony_ci return 0; 29498c2ecf20Sopenharmony_ci} 29508c2ecf20Sopenharmony_ci 29518c2ecf20Sopenharmony_ci/* 29528c2ecf20Sopenharmony_ci * Unshare file descriptor table if it is being shared 29538c2ecf20Sopenharmony_ci */ 29548c2ecf20Sopenharmony_ciint unshare_fd(unsigned long unshare_flags, unsigned int max_fds, 29558c2ecf20Sopenharmony_ci struct files_struct **new_fdp) 29568c2ecf20Sopenharmony_ci{ 29578c2ecf20Sopenharmony_ci struct files_struct *fd = current->files; 29588c2ecf20Sopenharmony_ci int error = 0; 29598c2ecf20Sopenharmony_ci 29608c2ecf20Sopenharmony_ci if ((unshare_flags & CLONE_FILES) && 29618c2ecf20Sopenharmony_ci (fd && atomic_read(&fd->count) > 1)) { 29628c2ecf20Sopenharmony_ci *new_fdp = dup_fd(fd, max_fds, &error); 29638c2ecf20Sopenharmony_ci if (!*new_fdp) 29648c2ecf20Sopenharmony_ci return error; 29658c2ecf20Sopenharmony_ci } 29668c2ecf20Sopenharmony_ci 29678c2ecf20Sopenharmony_ci return 0; 29688c2ecf20Sopenharmony_ci} 29698c2ecf20Sopenharmony_ci 29708c2ecf20Sopenharmony_ci/* 29718c2ecf20Sopenharmony_ci * unshare allows a process to 'unshare' part of the process 29728c2ecf20Sopenharmony_ci * context which was originally shared using clone. copy_* 29738c2ecf20Sopenharmony_ci * functions used by kernel_clone() cannot be used here directly 29748c2ecf20Sopenharmony_ci * because they modify an inactive task_struct that is being 29758c2ecf20Sopenharmony_ci * constructed. Here we are modifying the current, active, 29768c2ecf20Sopenharmony_ci * task_struct. 29778c2ecf20Sopenharmony_ci */ 29788c2ecf20Sopenharmony_ciint ksys_unshare(unsigned long unshare_flags) 29798c2ecf20Sopenharmony_ci{ 29808c2ecf20Sopenharmony_ci struct fs_struct *fs, *new_fs = NULL; 29818c2ecf20Sopenharmony_ci struct files_struct *fd, *new_fd = NULL; 29828c2ecf20Sopenharmony_ci struct cred *new_cred = NULL; 29838c2ecf20Sopenharmony_ci struct nsproxy *new_nsproxy = NULL; 29848c2ecf20Sopenharmony_ci int do_sysvsem = 0; 29858c2ecf20Sopenharmony_ci int err; 29868c2ecf20Sopenharmony_ci 29878c2ecf20Sopenharmony_ci /* 29888c2ecf20Sopenharmony_ci * If unsharing a user namespace must also unshare the thread group 29898c2ecf20Sopenharmony_ci * and unshare the filesystem root and working directories. 29908c2ecf20Sopenharmony_ci */ 29918c2ecf20Sopenharmony_ci if (unshare_flags & CLONE_NEWUSER) 29928c2ecf20Sopenharmony_ci unshare_flags |= CLONE_THREAD | CLONE_FS; 29938c2ecf20Sopenharmony_ci /* 29948c2ecf20Sopenharmony_ci * If unsharing vm, must also unshare signal handlers. 29958c2ecf20Sopenharmony_ci */ 29968c2ecf20Sopenharmony_ci if (unshare_flags & CLONE_VM) 29978c2ecf20Sopenharmony_ci unshare_flags |= CLONE_SIGHAND; 29988c2ecf20Sopenharmony_ci /* 29998c2ecf20Sopenharmony_ci * If unsharing a signal handlers, must also unshare the signal queues. 30008c2ecf20Sopenharmony_ci */ 30018c2ecf20Sopenharmony_ci if (unshare_flags & CLONE_SIGHAND) 30028c2ecf20Sopenharmony_ci unshare_flags |= CLONE_THREAD; 30038c2ecf20Sopenharmony_ci /* 30048c2ecf20Sopenharmony_ci * If unsharing namespace, must also unshare filesystem information. 30058c2ecf20Sopenharmony_ci */ 30068c2ecf20Sopenharmony_ci if (unshare_flags & CLONE_NEWNS) 30078c2ecf20Sopenharmony_ci unshare_flags |= CLONE_FS; 30088c2ecf20Sopenharmony_ci 30098c2ecf20Sopenharmony_ci err = check_unshare_flags(unshare_flags); 30108c2ecf20Sopenharmony_ci if (err) 30118c2ecf20Sopenharmony_ci goto bad_unshare_out; 30128c2ecf20Sopenharmony_ci /* 30138c2ecf20Sopenharmony_ci * CLONE_NEWIPC must also detach from the undolist: after switching 30148c2ecf20Sopenharmony_ci * to a new ipc namespace, the semaphore arrays from the old 30158c2ecf20Sopenharmony_ci * namespace are unreachable. 30168c2ecf20Sopenharmony_ci */ 30178c2ecf20Sopenharmony_ci if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) 30188c2ecf20Sopenharmony_ci do_sysvsem = 1; 30198c2ecf20Sopenharmony_ci err = unshare_fs(unshare_flags, &new_fs); 30208c2ecf20Sopenharmony_ci if (err) 30218c2ecf20Sopenharmony_ci goto bad_unshare_out; 30228c2ecf20Sopenharmony_ci err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd); 30238c2ecf20Sopenharmony_ci if (err) 30248c2ecf20Sopenharmony_ci goto bad_unshare_cleanup_fs; 30258c2ecf20Sopenharmony_ci err = unshare_userns(unshare_flags, &new_cred); 30268c2ecf20Sopenharmony_ci if (err) 30278c2ecf20Sopenharmony_ci goto bad_unshare_cleanup_fd; 30288c2ecf20Sopenharmony_ci err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, 30298c2ecf20Sopenharmony_ci new_cred, new_fs); 30308c2ecf20Sopenharmony_ci if (err) 30318c2ecf20Sopenharmony_ci goto bad_unshare_cleanup_cred; 30328c2ecf20Sopenharmony_ci 30338c2ecf20Sopenharmony_ci if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { 30348c2ecf20Sopenharmony_ci if (do_sysvsem) { 30358c2ecf20Sopenharmony_ci /* 30368c2ecf20Sopenharmony_ci * CLONE_SYSVSEM is equivalent to sys_exit(). 30378c2ecf20Sopenharmony_ci */ 30388c2ecf20Sopenharmony_ci exit_sem(current); 30398c2ecf20Sopenharmony_ci } 30408c2ecf20Sopenharmony_ci if (unshare_flags & CLONE_NEWIPC) { 30418c2ecf20Sopenharmony_ci /* Orphan segments in old ns (see sem above). */ 30428c2ecf20Sopenharmony_ci exit_shm(current); 30438c2ecf20Sopenharmony_ci shm_init_task(current); 30448c2ecf20Sopenharmony_ci } 30458c2ecf20Sopenharmony_ci 30468c2ecf20Sopenharmony_ci if (new_nsproxy) 30478c2ecf20Sopenharmony_ci switch_task_namespaces(current, new_nsproxy); 30488c2ecf20Sopenharmony_ci 30498c2ecf20Sopenharmony_ci task_lock(current); 30508c2ecf20Sopenharmony_ci 30518c2ecf20Sopenharmony_ci if (new_fs) { 30528c2ecf20Sopenharmony_ci fs = current->fs; 30538c2ecf20Sopenharmony_ci spin_lock(&fs->lock); 30548c2ecf20Sopenharmony_ci current->fs = new_fs; 30558c2ecf20Sopenharmony_ci if (--fs->users) 30568c2ecf20Sopenharmony_ci new_fs = NULL; 30578c2ecf20Sopenharmony_ci else 30588c2ecf20Sopenharmony_ci new_fs = fs; 30598c2ecf20Sopenharmony_ci spin_unlock(&fs->lock); 30608c2ecf20Sopenharmony_ci } 30618c2ecf20Sopenharmony_ci 30628c2ecf20Sopenharmony_ci if (new_fd) { 30638c2ecf20Sopenharmony_ci fd = current->files; 30648c2ecf20Sopenharmony_ci current->files = new_fd; 30658c2ecf20Sopenharmony_ci new_fd = fd; 30668c2ecf20Sopenharmony_ci } 30678c2ecf20Sopenharmony_ci 30688c2ecf20Sopenharmony_ci task_unlock(current); 30698c2ecf20Sopenharmony_ci 30708c2ecf20Sopenharmony_ci if (new_cred) { 30718c2ecf20Sopenharmony_ci /* Install the new user namespace */ 30728c2ecf20Sopenharmony_ci commit_creds(new_cred); 30738c2ecf20Sopenharmony_ci new_cred = NULL; 30748c2ecf20Sopenharmony_ci } 30758c2ecf20Sopenharmony_ci } 30768c2ecf20Sopenharmony_ci 30778c2ecf20Sopenharmony_ci perf_event_namespaces(current); 30788c2ecf20Sopenharmony_ci 30798c2ecf20Sopenharmony_cibad_unshare_cleanup_cred: 30808c2ecf20Sopenharmony_ci if (new_cred) 30818c2ecf20Sopenharmony_ci put_cred(new_cred); 30828c2ecf20Sopenharmony_cibad_unshare_cleanup_fd: 30838c2ecf20Sopenharmony_ci if (new_fd) 30848c2ecf20Sopenharmony_ci put_files_struct(new_fd); 30858c2ecf20Sopenharmony_ci 30868c2ecf20Sopenharmony_cibad_unshare_cleanup_fs: 30878c2ecf20Sopenharmony_ci if (new_fs) 30888c2ecf20Sopenharmony_ci free_fs_struct(new_fs); 30898c2ecf20Sopenharmony_ci 30908c2ecf20Sopenharmony_cibad_unshare_out: 30918c2ecf20Sopenharmony_ci return err; 30928c2ecf20Sopenharmony_ci} 30938c2ecf20Sopenharmony_ci 30948c2ecf20Sopenharmony_ciSYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) 30958c2ecf20Sopenharmony_ci{ 30968c2ecf20Sopenharmony_ci return ksys_unshare(unshare_flags); 30978c2ecf20Sopenharmony_ci} 30988c2ecf20Sopenharmony_ci 30998c2ecf20Sopenharmony_ci/* 31008c2ecf20Sopenharmony_ci * Helper to unshare the files of the current task. 31018c2ecf20Sopenharmony_ci * We don't want to expose copy_files internals to 31028c2ecf20Sopenharmony_ci * the exec layer of the kernel. 31038c2ecf20Sopenharmony_ci */ 31048c2ecf20Sopenharmony_ci 31058c2ecf20Sopenharmony_ciint unshare_files(struct files_struct **displaced) 31068c2ecf20Sopenharmony_ci{ 31078c2ecf20Sopenharmony_ci struct task_struct *task = current; 31088c2ecf20Sopenharmony_ci struct files_struct *copy = NULL; 31098c2ecf20Sopenharmony_ci int error; 31108c2ecf20Sopenharmony_ci 31118c2ecf20Sopenharmony_ci error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, ©); 31128c2ecf20Sopenharmony_ci if (error || !copy) { 31138c2ecf20Sopenharmony_ci *displaced = NULL; 31148c2ecf20Sopenharmony_ci return error; 31158c2ecf20Sopenharmony_ci } 31168c2ecf20Sopenharmony_ci *displaced = task->files; 31178c2ecf20Sopenharmony_ci task_lock(task); 31188c2ecf20Sopenharmony_ci task->files = copy; 31198c2ecf20Sopenharmony_ci task_unlock(task); 31208c2ecf20Sopenharmony_ci return 0; 31218c2ecf20Sopenharmony_ci} 31228c2ecf20Sopenharmony_ci 31238c2ecf20Sopenharmony_ciint sysctl_max_threads(struct ctl_table *table, int write, 31248c2ecf20Sopenharmony_ci void *buffer, size_t *lenp, loff_t *ppos) 31258c2ecf20Sopenharmony_ci{ 31268c2ecf20Sopenharmony_ci struct ctl_table t; 31278c2ecf20Sopenharmony_ci int ret; 31288c2ecf20Sopenharmony_ci int threads = max_threads; 31298c2ecf20Sopenharmony_ci int min = 1; 31308c2ecf20Sopenharmony_ci int max = MAX_THREADS; 31318c2ecf20Sopenharmony_ci 31328c2ecf20Sopenharmony_ci t = *table; 31338c2ecf20Sopenharmony_ci t.data = &threads; 31348c2ecf20Sopenharmony_ci t.extra1 = &min; 31358c2ecf20Sopenharmony_ci t.extra2 = &max; 31368c2ecf20Sopenharmony_ci 31378c2ecf20Sopenharmony_ci ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); 31388c2ecf20Sopenharmony_ci if (ret || !write) 31398c2ecf20Sopenharmony_ci return ret; 31408c2ecf20Sopenharmony_ci 31418c2ecf20Sopenharmony_ci max_threads = threads; 31428c2ecf20Sopenharmony_ci 31438c2ecf20Sopenharmony_ci return 0; 31448c2ecf20Sopenharmony_ci} 3145