18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  linux/kernel/fork.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *  Copyright (C) 1991, 1992  Linus Torvalds
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci/*
98c2ecf20Sopenharmony_ci *  'fork.c' contains the help-routines for the 'fork' system call
108c2ecf20Sopenharmony_ci * (see also entry.S and others).
118c2ecf20Sopenharmony_ci * Fork is rather simple, once you get the hang of it, but the memory
128c2ecf20Sopenharmony_ci * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
138c2ecf20Sopenharmony_ci */
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#include <linux/anon_inodes.h>
168c2ecf20Sopenharmony_ci#include <linux/slab.h>
178c2ecf20Sopenharmony_ci#include <linux/sched/autogroup.h>
188c2ecf20Sopenharmony_ci#include <linux/sched/mm.h>
198c2ecf20Sopenharmony_ci#include <linux/sched/coredump.h>
208c2ecf20Sopenharmony_ci#include <linux/sched/user.h>
218c2ecf20Sopenharmony_ci#include <linux/sched/numa_balancing.h>
228c2ecf20Sopenharmony_ci#include <linux/sched/stat.h>
238c2ecf20Sopenharmony_ci#include <linux/sched/task.h>
248c2ecf20Sopenharmony_ci#include <linux/sched/task_stack.h>
258c2ecf20Sopenharmony_ci#include <linux/sched/cputime.h>
268c2ecf20Sopenharmony_ci#include <linux/seq_file.h>
278c2ecf20Sopenharmony_ci#include <linux/rtmutex.h>
288c2ecf20Sopenharmony_ci#include <linux/init.h>
298c2ecf20Sopenharmony_ci#include <linux/unistd.h>
308c2ecf20Sopenharmony_ci#include <linux/module.h>
318c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
328c2ecf20Sopenharmony_ci#include <linux/completion.h>
338c2ecf20Sopenharmony_ci#include <linux/personality.h>
348c2ecf20Sopenharmony_ci#include <linux/mempolicy.h>
358c2ecf20Sopenharmony_ci#include <linux/sem.h>
368c2ecf20Sopenharmony_ci#include <linux/file.h>
378c2ecf20Sopenharmony_ci#include <linux/fdtable.h>
388c2ecf20Sopenharmony_ci#include <linux/iocontext.h>
398c2ecf20Sopenharmony_ci#include <linux/key.h>
408c2ecf20Sopenharmony_ci#include <linux/binfmts.h>
418c2ecf20Sopenharmony_ci#include <linux/mman.h>
428c2ecf20Sopenharmony_ci#include <linux/mmu_notifier.h>
438c2ecf20Sopenharmony_ci#include <linux/fs.h>
448c2ecf20Sopenharmony_ci#include <linux/mm.h>
458c2ecf20Sopenharmony_ci#include <linux/mm_inline.h>
468c2ecf20Sopenharmony_ci#include <linux/vmacache.h>
478c2ecf20Sopenharmony_ci#include <linux/nsproxy.h>
488c2ecf20Sopenharmony_ci#include <linux/capability.h>
498c2ecf20Sopenharmony_ci#include <linux/cpu.h>
508c2ecf20Sopenharmony_ci#include <linux/cgroup.h>
518c2ecf20Sopenharmony_ci#include <linux/security.h>
528c2ecf20Sopenharmony_ci#include <linux/hugetlb.h>
538c2ecf20Sopenharmony_ci#include <linux/seccomp.h>
548c2ecf20Sopenharmony_ci#include <linux/swap.h>
558c2ecf20Sopenharmony_ci#include <linux/syscalls.h>
568c2ecf20Sopenharmony_ci#include <linux/jiffies.h>
578c2ecf20Sopenharmony_ci#include <linux/futex.h>
588c2ecf20Sopenharmony_ci#include <linux/compat.h>
598c2ecf20Sopenharmony_ci#include <linux/kthread.h>
608c2ecf20Sopenharmony_ci#include <linux/task_io_accounting_ops.h>
618c2ecf20Sopenharmony_ci#include <linux/rcupdate.h>
628c2ecf20Sopenharmony_ci#include <linux/ptrace.h>
638c2ecf20Sopenharmony_ci#include <linux/mount.h>
648c2ecf20Sopenharmony_ci#include <linux/audit.h>
658c2ecf20Sopenharmony_ci#include <linux/memcontrol.h>
668c2ecf20Sopenharmony_ci#include <linux/ftrace.h>
678c2ecf20Sopenharmony_ci#include <linux/proc_fs.h>
688c2ecf20Sopenharmony_ci#include <linux/profile.h>
698c2ecf20Sopenharmony_ci#include <linux/rmap.h>
708c2ecf20Sopenharmony_ci#include <linux/ksm.h>
718c2ecf20Sopenharmony_ci#include <linux/acct.h>
728c2ecf20Sopenharmony_ci#include <linux/userfaultfd_k.h>
738c2ecf20Sopenharmony_ci#include <linux/tsacct_kern.h>
748c2ecf20Sopenharmony_ci#include <linux/cn_proc.h>
758c2ecf20Sopenharmony_ci#include <linux/freezer.h>
768c2ecf20Sopenharmony_ci#include <linux/delayacct.h>
778c2ecf20Sopenharmony_ci#include <linux/taskstats_kern.h>
788c2ecf20Sopenharmony_ci#include <linux/random.h>
798c2ecf20Sopenharmony_ci#include <linux/tty.h>
808c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
818c2ecf20Sopenharmony_ci#include <linux/fs_struct.h>
828c2ecf20Sopenharmony_ci#include <linux/magic.h>
838c2ecf20Sopenharmony_ci#include <linux/perf_event.h>
848c2ecf20Sopenharmony_ci#include <linux/posix-timers.h>
858c2ecf20Sopenharmony_ci#include <linux/user-return-notifier.h>
868c2ecf20Sopenharmony_ci#include <linux/oom.h>
878c2ecf20Sopenharmony_ci#include <linux/khugepaged.h>
888c2ecf20Sopenharmony_ci#include <linux/signalfd.h>
898c2ecf20Sopenharmony_ci#include <linux/uprobes.h>
908c2ecf20Sopenharmony_ci#include <linux/aio.h>
918c2ecf20Sopenharmony_ci#include <linux/compiler.h>
928c2ecf20Sopenharmony_ci#include <linux/sysctl.h>
938c2ecf20Sopenharmony_ci#include <linux/kcov.h>
948c2ecf20Sopenharmony_ci#include <linux/livepatch.h>
958c2ecf20Sopenharmony_ci#include <linux/thread_info.h>
968c2ecf20Sopenharmony_ci#include <linux/stackleak.h>
978c2ecf20Sopenharmony_ci#include <linux/kasan.h>
988c2ecf20Sopenharmony_ci#include <linux/scs.h>
998c2ecf20Sopenharmony_ci#include <linux/io_uring.h>
1008c2ecf20Sopenharmony_ci#ifdef CONFIG_RECLAIM_ACCT
1018c2ecf20Sopenharmony_ci#include <linux/reclaim_acct.h>
1028c2ecf20Sopenharmony_ci#endif
1038c2ecf20Sopenharmony_ci#include <linux/mm_purgeable.h>
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci#include <asm/pgalloc.h>
1068c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
1078c2ecf20Sopenharmony_ci#include <asm/mmu_context.h>
1088c2ecf20Sopenharmony_ci#include <asm/cacheflush.h>
1098c2ecf20Sopenharmony_ci#include <asm/tlbflush.h>
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci#include <trace/events/sched.h>
1128c2ecf20Sopenharmony_ci#include <linux/hck/lite_hck_ced.h>
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci#define CREATE_TRACE_POINTS
1158c2ecf20Sopenharmony_ci#include <trace/events/task.h>
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci/*
1188c2ecf20Sopenharmony_ci * Minimum number of threads to boot the kernel
1198c2ecf20Sopenharmony_ci */
1208c2ecf20Sopenharmony_ci#define MIN_THREADS 20
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci/*
1238c2ecf20Sopenharmony_ci * Maximum number of threads
1248c2ecf20Sopenharmony_ci */
1258c2ecf20Sopenharmony_ci#define MAX_THREADS FUTEX_TID_MASK
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci/*
1288c2ecf20Sopenharmony_ci * Protected counters by write_lock_irq(&tasklist_lock)
1298c2ecf20Sopenharmony_ci */
1308c2ecf20Sopenharmony_ciunsigned long total_forks;	/* Handle normal Linux uptimes. */
1318c2ecf20Sopenharmony_ciint nr_threads;			/* The idle threads do not count.. */
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_cistatic int max_threads;		/* tunable limit on nr_threads */
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci#define NAMED_ARRAY_INDEX(x)	[x] = __stringify(x)
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_cistatic const char * const resident_page_types[] = {
1388c2ecf20Sopenharmony_ci	NAMED_ARRAY_INDEX(MM_FILEPAGES),
1398c2ecf20Sopenharmony_ci	NAMED_ARRAY_INDEX(MM_ANONPAGES),
1408c2ecf20Sopenharmony_ci	NAMED_ARRAY_INDEX(MM_SWAPENTS),
1418c2ecf20Sopenharmony_ci	NAMED_ARRAY_INDEX(MM_SHMEMPAGES),
1428c2ecf20Sopenharmony_ci};
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ciDEFINE_PER_CPU(unsigned long, process_counts) = 0;
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci__cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci#ifdef CONFIG_PROVE_RCU
1498c2ecf20Sopenharmony_ciint lockdep_tasklist_lock_is_held(void)
1508c2ecf20Sopenharmony_ci{
1518c2ecf20Sopenharmony_ci	return lockdep_is_held(&tasklist_lock);
1528c2ecf20Sopenharmony_ci}
1538c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
1548c2ecf20Sopenharmony_ci#endif /* #ifdef CONFIG_PROVE_RCU */
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ciint nr_processes(void)
1578c2ecf20Sopenharmony_ci{
1588c2ecf20Sopenharmony_ci	int cpu;
1598c2ecf20Sopenharmony_ci	int total = 0;
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	for_each_possible_cpu(cpu)
1628c2ecf20Sopenharmony_ci		total += per_cpu(process_counts, cpu);
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci	return total;
1658c2ecf20Sopenharmony_ci}
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_civoid __weak arch_release_task_struct(struct task_struct *tsk)
1688c2ecf20Sopenharmony_ci{
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
1728c2ecf20Sopenharmony_cistatic struct kmem_cache *task_struct_cachep;
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_cistatic inline struct task_struct *alloc_task_struct_node(int node)
1758c2ecf20Sopenharmony_ci{
1768c2ecf20Sopenharmony_ci	return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_cistatic inline void free_task_struct(struct task_struct *tsk)
1808c2ecf20Sopenharmony_ci{
1818c2ecf20Sopenharmony_ci	kmem_cache_free(task_struct_cachep, tsk);
1828c2ecf20Sopenharmony_ci}
1838c2ecf20Sopenharmony_ci#endif
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci/*
1888c2ecf20Sopenharmony_ci * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
1898c2ecf20Sopenharmony_ci * kmemcache based allocator.
1908c2ecf20Sopenharmony_ci */
1918c2ecf20Sopenharmony_ci# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK
1948c2ecf20Sopenharmony_ci/*
1958c2ecf20Sopenharmony_ci * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
1968c2ecf20Sopenharmony_ci * flush.  Try to minimize the number of calls by caching stacks.
1978c2ecf20Sopenharmony_ci */
1988c2ecf20Sopenharmony_ci#define NR_CACHED_STACKS 2
1998c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_cistatic int free_vm_stack_cache(unsigned int cpu)
2028c2ecf20Sopenharmony_ci{
2038c2ecf20Sopenharmony_ci	struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
2048c2ecf20Sopenharmony_ci	int i;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	for (i = 0; i < NR_CACHED_STACKS; i++) {
2078c2ecf20Sopenharmony_ci		struct vm_struct *vm_stack = cached_vm_stacks[i];
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci		if (!vm_stack)
2108c2ecf20Sopenharmony_ci			continue;
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci		vfree(vm_stack->addr);
2138c2ecf20Sopenharmony_ci		cached_vm_stacks[i] = NULL;
2148c2ecf20Sopenharmony_ci	}
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	return 0;
2178c2ecf20Sopenharmony_ci}
2188c2ecf20Sopenharmony_ci#endif
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_cistatic unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
2218c2ecf20Sopenharmony_ci{
2228c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK
2238c2ecf20Sopenharmony_ci	void *stack;
2248c2ecf20Sopenharmony_ci	int i;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	for (i = 0; i < NR_CACHED_STACKS; i++) {
2278c2ecf20Sopenharmony_ci		struct vm_struct *s;
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci		s = this_cpu_xchg(cached_stacks[i], NULL);
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci		if (!s)
2328c2ecf20Sopenharmony_ci			continue;
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci		/* Clear the KASAN shadow of the stack. */
2358c2ecf20Sopenharmony_ci		kasan_unpoison_shadow(s->addr, THREAD_SIZE);
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci		/* Clear stale pointers from reused stack. */
2388c2ecf20Sopenharmony_ci		memset(s->addr, 0, THREAD_SIZE);
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci		tsk->stack_vm_area = s;
2418c2ecf20Sopenharmony_ci		tsk->stack = s->addr;
2428c2ecf20Sopenharmony_ci		return s->addr;
2438c2ecf20Sopenharmony_ci	}
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci	/*
2468c2ecf20Sopenharmony_ci	 * Allocated stacks are cached and later reused by new threads,
2478c2ecf20Sopenharmony_ci	 * so memcg accounting is performed manually on assigning/releasing
2488c2ecf20Sopenharmony_ci	 * stacks to tasks. Drop __GFP_ACCOUNT.
2498c2ecf20Sopenharmony_ci	 */
2508c2ecf20Sopenharmony_ci	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
2518c2ecf20Sopenharmony_ci				     VMALLOC_START, VMALLOC_END,
2528c2ecf20Sopenharmony_ci				     THREADINFO_GFP & ~__GFP_ACCOUNT,
2538c2ecf20Sopenharmony_ci				     PAGE_KERNEL,
2548c2ecf20Sopenharmony_ci				     0, node, __builtin_return_address(0));
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci	/*
2578c2ecf20Sopenharmony_ci	 * We can't call find_vm_area() in interrupt context, and
2588c2ecf20Sopenharmony_ci	 * free_thread_stack() can be called in interrupt context,
2598c2ecf20Sopenharmony_ci	 * so cache the vm_struct.
2608c2ecf20Sopenharmony_ci	 */
2618c2ecf20Sopenharmony_ci	if (stack) {
2628c2ecf20Sopenharmony_ci		tsk->stack_vm_area = find_vm_area(stack);
2638c2ecf20Sopenharmony_ci		tsk->stack = stack;
2648c2ecf20Sopenharmony_ci	}
2658c2ecf20Sopenharmony_ci	return stack;
2668c2ecf20Sopenharmony_ci#else
2678c2ecf20Sopenharmony_ci	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
2688c2ecf20Sopenharmony_ci					     THREAD_SIZE_ORDER);
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci	if (likely(page)) {
2718c2ecf20Sopenharmony_ci		tsk->stack = kasan_reset_tag(page_address(page));
2728c2ecf20Sopenharmony_ci		return tsk->stack;
2738c2ecf20Sopenharmony_ci	}
2748c2ecf20Sopenharmony_ci	return NULL;
2758c2ecf20Sopenharmony_ci#endif
2768c2ecf20Sopenharmony_ci}
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_cistatic inline void free_thread_stack(struct task_struct *tsk)
2798c2ecf20Sopenharmony_ci{
2808c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK
2818c2ecf20Sopenharmony_ci	struct vm_struct *vm = task_stack_vm_area(tsk);
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	if (vm) {
2848c2ecf20Sopenharmony_ci		int i;
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
2878c2ecf20Sopenharmony_ci			memcg_kmem_uncharge_page(vm->pages[i], 0);
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci		for (i = 0; i < NR_CACHED_STACKS; i++) {
2908c2ecf20Sopenharmony_ci			if (this_cpu_cmpxchg(cached_stacks[i],
2918c2ecf20Sopenharmony_ci					NULL, tsk->stack_vm_area) != NULL)
2928c2ecf20Sopenharmony_ci				continue;
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_ci			return;
2958c2ecf20Sopenharmony_ci		}
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci		vfree_atomic(tsk->stack);
2988c2ecf20Sopenharmony_ci		return;
2998c2ecf20Sopenharmony_ci	}
3008c2ecf20Sopenharmony_ci#endif
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
3038c2ecf20Sopenharmony_ci}
3048c2ecf20Sopenharmony_ci# else
3058c2ecf20Sopenharmony_cistatic struct kmem_cache *thread_stack_cache;
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_cistatic unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
3088c2ecf20Sopenharmony_ci						  int node)
3098c2ecf20Sopenharmony_ci{
3108c2ecf20Sopenharmony_ci	unsigned long *stack;
3118c2ecf20Sopenharmony_ci	stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
3128c2ecf20Sopenharmony_ci	stack = kasan_reset_tag(stack);
3138c2ecf20Sopenharmony_ci	tsk->stack = stack;
3148c2ecf20Sopenharmony_ci	return stack;
3158c2ecf20Sopenharmony_ci}
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_cistatic void free_thread_stack(struct task_struct *tsk)
3188c2ecf20Sopenharmony_ci{
3198c2ecf20Sopenharmony_ci	kmem_cache_free(thread_stack_cache, tsk->stack);
3208c2ecf20Sopenharmony_ci}
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_civoid thread_stack_cache_init(void)
3238c2ecf20Sopenharmony_ci{
3248c2ecf20Sopenharmony_ci	thread_stack_cache = kmem_cache_create_usercopy("thread_stack",
3258c2ecf20Sopenharmony_ci					THREAD_SIZE, THREAD_SIZE, 0, 0,
3268c2ecf20Sopenharmony_ci					THREAD_SIZE, NULL);
3278c2ecf20Sopenharmony_ci	BUG_ON(thread_stack_cache == NULL);
3288c2ecf20Sopenharmony_ci}
3298c2ecf20Sopenharmony_ci# endif
3308c2ecf20Sopenharmony_ci#endif
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_ci/* SLAB cache for signal_struct structures (tsk->signal) */
3338c2ecf20Sopenharmony_cistatic struct kmem_cache *signal_cachep;
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci/* SLAB cache for sighand_struct structures (tsk->sighand) */
3368c2ecf20Sopenharmony_cistruct kmem_cache *sighand_cachep;
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci/* SLAB cache for files_struct structures (tsk->files) */
3398c2ecf20Sopenharmony_cistruct kmem_cache *files_cachep;
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci/* SLAB cache for fs_struct structures (tsk->fs) */
3428c2ecf20Sopenharmony_cistruct kmem_cache *fs_cachep;
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci/* SLAB cache for vm_area_struct structures */
3458c2ecf20Sopenharmony_cistatic struct kmem_cache *vm_area_cachep;
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_ci/* SLAB cache for mm_struct structures (tsk->mm) */
3488c2ecf20Sopenharmony_cistatic struct kmem_cache *mm_cachep;
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_cistruct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
3518c2ecf20Sopenharmony_ci{
3528c2ecf20Sopenharmony_ci	struct vm_area_struct *vma;
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
3558c2ecf20Sopenharmony_ci	if (vma)
3568c2ecf20Sopenharmony_ci		vma_init(vma, mm);
3578c2ecf20Sopenharmony_ci	return vma;
3588c2ecf20Sopenharmony_ci}
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_cistruct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
3618c2ecf20Sopenharmony_ci{
3628c2ecf20Sopenharmony_ci	struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci	if (new) {
3658c2ecf20Sopenharmony_ci		ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
3668c2ecf20Sopenharmony_ci		ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
3678c2ecf20Sopenharmony_ci		/*
3688c2ecf20Sopenharmony_ci		 * orig->shared.rb may be modified concurrently, but the clone
3698c2ecf20Sopenharmony_ci		 * will be reinitialized.
3708c2ecf20Sopenharmony_ci		 */
3718c2ecf20Sopenharmony_ci		*new = data_race(*orig);
3728c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&new->anon_vma_chain);
3738c2ecf20Sopenharmony_ci		new->vm_next = new->vm_prev = NULL;
3748c2ecf20Sopenharmony_ci		dup_anon_vma_name(orig, new);
3758c2ecf20Sopenharmony_ci	}
3768c2ecf20Sopenharmony_ci	return new;
3778c2ecf20Sopenharmony_ci}
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_civoid vm_area_free(struct vm_area_struct *vma)
3808c2ecf20Sopenharmony_ci{
3818c2ecf20Sopenharmony_ci	free_anon_vma_name(vma);
3828c2ecf20Sopenharmony_ci	kmem_cache_free(vm_area_cachep, vma);
3838c2ecf20Sopenharmony_ci}
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_cistatic void account_kernel_stack(struct task_struct *tsk, int account)
3868c2ecf20Sopenharmony_ci{
3878c2ecf20Sopenharmony_ci	void *stack = task_stack_page(tsk);
3888c2ecf20Sopenharmony_ci	struct vm_struct *vm = task_stack_vm_area(tsk);
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci	/* All stack pages are in the same node. */
3928c2ecf20Sopenharmony_ci	if (vm)
3938c2ecf20Sopenharmony_ci		mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB,
3948c2ecf20Sopenharmony_ci				      account * (THREAD_SIZE / 1024));
3958c2ecf20Sopenharmony_ci	else
3968c2ecf20Sopenharmony_ci		mod_lruvec_slab_state(stack, NR_KERNEL_STACK_KB,
3978c2ecf20Sopenharmony_ci				      account * (THREAD_SIZE / 1024));
3988c2ecf20Sopenharmony_ci}
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_cistatic int memcg_charge_kernel_stack(struct task_struct *tsk)
4018c2ecf20Sopenharmony_ci{
4028c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK
4038c2ecf20Sopenharmony_ci	struct vm_struct *vm = task_stack_vm_area(tsk);
4048c2ecf20Sopenharmony_ci	int ret;
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci	if (vm) {
4098c2ecf20Sopenharmony_ci		int i;
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
4148c2ecf20Sopenharmony_ci			/*
4158c2ecf20Sopenharmony_ci			 * If memcg_kmem_charge_page() fails, page->mem_cgroup
4168c2ecf20Sopenharmony_ci			 * pointer is NULL, and memcg_kmem_uncharge_page() in
4178c2ecf20Sopenharmony_ci			 * free_thread_stack() will ignore this page.
4188c2ecf20Sopenharmony_ci			 */
4198c2ecf20Sopenharmony_ci			ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
4208c2ecf20Sopenharmony_ci						     0);
4218c2ecf20Sopenharmony_ci			if (ret)
4228c2ecf20Sopenharmony_ci				return ret;
4238c2ecf20Sopenharmony_ci		}
4248c2ecf20Sopenharmony_ci	}
4258c2ecf20Sopenharmony_ci#endif
4268c2ecf20Sopenharmony_ci	return 0;
4278c2ecf20Sopenharmony_ci}
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_cistatic void release_task_stack(struct task_struct *tsk)
4308c2ecf20Sopenharmony_ci{
4318c2ecf20Sopenharmony_ci	if (WARN_ON(tsk->state != TASK_DEAD))
4328c2ecf20Sopenharmony_ci		return;  /* Better to leak the stack than to free prematurely */
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci	account_kernel_stack(tsk, -1);
4358c2ecf20Sopenharmony_ci	free_thread_stack(tsk);
4368c2ecf20Sopenharmony_ci	tsk->stack = NULL;
4378c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK
4388c2ecf20Sopenharmony_ci	tsk->stack_vm_area = NULL;
4398c2ecf20Sopenharmony_ci#endif
4408c2ecf20Sopenharmony_ci}
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_ci#ifdef CONFIG_THREAD_INFO_IN_TASK
4438c2ecf20Sopenharmony_civoid put_task_stack(struct task_struct *tsk)
4448c2ecf20Sopenharmony_ci{
4458c2ecf20Sopenharmony_ci	if (refcount_dec_and_test(&tsk->stack_refcount))
4468c2ecf20Sopenharmony_ci		release_task_stack(tsk);
4478c2ecf20Sopenharmony_ci}
4488c2ecf20Sopenharmony_ci#endif
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_civoid free_task(struct task_struct *tsk)
4518c2ecf20Sopenharmony_ci{
4528c2ecf20Sopenharmony_ci#ifdef CONFIG_SECCOMP
4538c2ecf20Sopenharmony_ci	WARN_ON_ONCE(tsk->seccomp.filter);
4548c2ecf20Sopenharmony_ci#endif
4558c2ecf20Sopenharmony_ci	scs_release(tsk);
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci#ifndef CONFIG_THREAD_INFO_IN_TASK
4588c2ecf20Sopenharmony_ci	/*
4598c2ecf20Sopenharmony_ci	 * The task is finally done with both the stack and thread_info,
4608c2ecf20Sopenharmony_ci	 * so free both.
4618c2ecf20Sopenharmony_ci	 */
4628c2ecf20Sopenharmony_ci	release_task_stack(tsk);
4638c2ecf20Sopenharmony_ci#else
4648c2ecf20Sopenharmony_ci	/*
4658c2ecf20Sopenharmony_ci	 * If the task had a separate stack allocation, it should be gone
4668c2ecf20Sopenharmony_ci	 * by now.
4678c2ecf20Sopenharmony_ci	 */
4688c2ecf20Sopenharmony_ci	WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0);
4698c2ecf20Sopenharmony_ci#endif
4708c2ecf20Sopenharmony_ci	rt_mutex_debug_task_free(tsk);
4718c2ecf20Sopenharmony_ci	ftrace_graph_exit_task(tsk);
4728c2ecf20Sopenharmony_ci	arch_release_task_struct(tsk);
4738c2ecf20Sopenharmony_ci	if (tsk->flags & PF_KTHREAD)
4748c2ecf20Sopenharmony_ci		free_kthread_struct(tsk);
4758c2ecf20Sopenharmony_ci	free_task_struct(tsk);
4768c2ecf20Sopenharmony_ci}
4778c2ecf20Sopenharmony_ciEXPORT_SYMBOL(free_task);
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci#ifdef CONFIG_MMU
4808c2ecf20Sopenharmony_cistatic __latent_entropy int dup_mmap(struct mm_struct *mm,
4818c2ecf20Sopenharmony_ci					struct mm_struct *oldmm)
4828c2ecf20Sopenharmony_ci{
4838c2ecf20Sopenharmony_ci	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
4848c2ecf20Sopenharmony_ci	struct rb_node **rb_link, *rb_parent;
4858c2ecf20Sopenharmony_ci	int retval;
4868c2ecf20Sopenharmony_ci	unsigned long charge;
4878c2ecf20Sopenharmony_ci	LIST_HEAD(uf);
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci	uprobe_start_dup_mmap();
4908c2ecf20Sopenharmony_ci	if (mmap_write_lock_killable(oldmm)) {
4918c2ecf20Sopenharmony_ci		retval = -EINTR;
4928c2ecf20Sopenharmony_ci		goto fail_uprobe_end;
4938c2ecf20Sopenharmony_ci	}
4948c2ecf20Sopenharmony_ci	flush_cache_dup_mm(oldmm);
4958c2ecf20Sopenharmony_ci	uprobe_dup_mmap(oldmm, mm);
4968c2ecf20Sopenharmony_ci	/*
4978c2ecf20Sopenharmony_ci	 * Not linked in yet - no deadlock potential:
4988c2ecf20Sopenharmony_ci	 */
4998c2ecf20Sopenharmony_ci	mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ci	/* No ordering required: file already has been exposed. */
5028c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci	mm->total_vm = oldmm->total_vm;
5058c2ecf20Sopenharmony_ci	mm->data_vm = oldmm->data_vm;
5068c2ecf20Sopenharmony_ci	mm->exec_vm = oldmm->exec_vm;
5078c2ecf20Sopenharmony_ci	mm->stack_vm = oldmm->stack_vm;
5088c2ecf20Sopenharmony_ci
5098c2ecf20Sopenharmony_ci	rb_link = &mm->mm_rb.rb_node;
5108c2ecf20Sopenharmony_ci	rb_parent = NULL;
5118c2ecf20Sopenharmony_ci	pprev = &mm->mmap;
5128c2ecf20Sopenharmony_ci	retval = ksm_fork(mm, oldmm);
5138c2ecf20Sopenharmony_ci	if (retval)
5148c2ecf20Sopenharmony_ci		goto out;
5158c2ecf20Sopenharmony_ci	retval = khugepaged_fork(mm, oldmm);
5168c2ecf20Sopenharmony_ci	if (retval)
5178c2ecf20Sopenharmony_ci		goto out;
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci	prev = NULL;
5208c2ecf20Sopenharmony_ci	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
5218c2ecf20Sopenharmony_ci		struct file *file;
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ci		if (mpnt->vm_flags & VM_DONTCOPY) {
5248c2ecf20Sopenharmony_ci			vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
5258c2ecf20Sopenharmony_ci			continue;
5268c2ecf20Sopenharmony_ci		}
5278c2ecf20Sopenharmony_ci		charge = 0;
5288c2ecf20Sopenharmony_ci		/*
5298c2ecf20Sopenharmony_ci		 * Don't duplicate many vmas if we've been oom-killed (for
5308c2ecf20Sopenharmony_ci		 * example)
5318c2ecf20Sopenharmony_ci		 */
5328c2ecf20Sopenharmony_ci		if (fatal_signal_pending(current)) {
5338c2ecf20Sopenharmony_ci			retval = -EINTR;
5348c2ecf20Sopenharmony_ci			goto out;
5358c2ecf20Sopenharmony_ci		}
5368c2ecf20Sopenharmony_ci		if (mpnt->vm_flags & VM_ACCOUNT) {
5378c2ecf20Sopenharmony_ci			unsigned long len = vma_pages(mpnt);
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci			if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
5408c2ecf20Sopenharmony_ci				goto fail_nomem;
5418c2ecf20Sopenharmony_ci			charge = len;
5428c2ecf20Sopenharmony_ci		}
5438c2ecf20Sopenharmony_ci		tmp = vm_area_dup(mpnt);
5448c2ecf20Sopenharmony_ci		if (!tmp)
5458c2ecf20Sopenharmony_ci			goto fail_nomem;
5468c2ecf20Sopenharmony_ci		retval = vma_dup_policy(mpnt, tmp);
5478c2ecf20Sopenharmony_ci		if (retval)
5488c2ecf20Sopenharmony_ci			goto fail_nomem_policy;
5498c2ecf20Sopenharmony_ci		tmp->vm_mm = mm;
5508c2ecf20Sopenharmony_ci		retval = dup_userfaultfd(tmp, &uf);
5518c2ecf20Sopenharmony_ci		if (retval)
5528c2ecf20Sopenharmony_ci			goto fail_nomem_anon_vma_fork;
5538c2ecf20Sopenharmony_ci		if (tmp->vm_flags & VM_WIPEONFORK) {
5548c2ecf20Sopenharmony_ci			/*
5558c2ecf20Sopenharmony_ci			 * VM_WIPEONFORK gets a clean slate in the child.
5568c2ecf20Sopenharmony_ci			 * Don't prepare anon_vma until fault since we don't
5578c2ecf20Sopenharmony_ci			 * copy page for current vma.
5588c2ecf20Sopenharmony_ci			 */
5598c2ecf20Sopenharmony_ci			tmp->anon_vma = NULL;
5608c2ecf20Sopenharmony_ci		} else if (anon_vma_fork(tmp, mpnt))
5618c2ecf20Sopenharmony_ci			goto fail_nomem_anon_vma_fork;
5628c2ecf20Sopenharmony_ci		tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
5638c2ecf20Sopenharmony_ci		file = tmp->vm_file;
5648c2ecf20Sopenharmony_ci		if (file) {
5658c2ecf20Sopenharmony_ci			struct inode *inode = file_inode(file);
5668c2ecf20Sopenharmony_ci			struct address_space *mapping = file->f_mapping;
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci			get_file(file);
5698c2ecf20Sopenharmony_ci			if (tmp->vm_flags & VM_DENYWRITE)
5708c2ecf20Sopenharmony_ci				put_write_access(inode);
5718c2ecf20Sopenharmony_ci			i_mmap_lock_write(mapping);
5728c2ecf20Sopenharmony_ci			if (tmp->vm_flags & VM_SHARED)
5738c2ecf20Sopenharmony_ci				mapping_allow_writable(mapping);
5748c2ecf20Sopenharmony_ci			flush_dcache_mmap_lock(mapping);
5758c2ecf20Sopenharmony_ci			/* insert tmp into the share list, just after mpnt */
5768c2ecf20Sopenharmony_ci			vma_interval_tree_insert_after(tmp, mpnt,
5778c2ecf20Sopenharmony_ci					&mapping->i_mmap);
5788c2ecf20Sopenharmony_ci			flush_dcache_mmap_unlock(mapping);
5798c2ecf20Sopenharmony_ci			i_mmap_unlock_write(mapping);
5808c2ecf20Sopenharmony_ci		}
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci		/*
5838c2ecf20Sopenharmony_ci		 * Clear hugetlb-related page reserves for children. This only
5848c2ecf20Sopenharmony_ci		 * affects MAP_PRIVATE mappings. Faults generated by the child
5858c2ecf20Sopenharmony_ci		 * are not guaranteed to succeed, even if read-only
5868c2ecf20Sopenharmony_ci		 */
5878c2ecf20Sopenharmony_ci		if (is_vm_hugetlb_page(tmp))
5888c2ecf20Sopenharmony_ci			reset_vma_resv_huge_pages(tmp);
5898c2ecf20Sopenharmony_ci
5908c2ecf20Sopenharmony_ci		/*
5918c2ecf20Sopenharmony_ci		 * Link in the new vma and copy the page table entries.
5928c2ecf20Sopenharmony_ci		 */
5938c2ecf20Sopenharmony_ci		*pprev = tmp;
5948c2ecf20Sopenharmony_ci		pprev = &tmp->vm_next;
5958c2ecf20Sopenharmony_ci		tmp->vm_prev = prev;
5968c2ecf20Sopenharmony_ci		prev = tmp;
5978c2ecf20Sopenharmony_ci
5988c2ecf20Sopenharmony_ci		__vma_link_rb(mm, tmp, rb_link, rb_parent);
5998c2ecf20Sopenharmony_ci		rb_link = &tmp->vm_rb.rb_right;
6008c2ecf20Sopenharmony_ci		rb_parent = &tmp->vm_rb;
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci		mm->map_count++;
6038c2ecf20Sopenharmony_ci		if (!(tmp->vm_flags & VM_WIPEONFORK))
6048c2ecf20Sopenharmony_ci			retval = copy_page_range(tmp, mpnt);
6058c2ecf20Sopenharmony_ci
6068c2ecf20Sopenharmony_ci		if (tmp->vm_ops && tmp->vm_ops->open)
6078c2ecf20Sopenharmony_ci			tmp->vm_ops->open(tmp);
6088c2ecf20Sopenharmony_ci
6098c2ecf20Sopenharmony_ci		if (retval)
6108c2ecf20Sopenharmony_ci			goto out;
6118c2ecf20Sopenharmony_ci	}
6128c2ecf20Sopenharmony_ci	/* a new mm has just been created */
6138c2ecf20Sopenharmony_ci	retval = arch_dup_mmap(oldmm, mm);
6148c2ecf20Sopenharmony_ciout:
6158c2ecf20Sopenharmony_ci	mmap_write_unlock(mm);
6168c2ecf20Sopenharmony_ci	flush_tlb_mm(oldmm);
6178c2ecf20Sopenharmony_ci	mmap_write_unlock(oldmm);
6188c2ecf20Sopenharmony_ci	dup_userfaultfd_complete(&uf);
6198c2ecf20Sopenharmony_cifail_uprobe_end:
6208c2ecf20Sopenharmony_ci	uprobe_end_dup_mmap();
6218c2ecf20Sopenharmony_ci	return retval;
6228c2ecf20Sopenharmony_cifail_nomem_anon_vma_fork:
6238c2ecf20Sopenharmony_ci	mpol_put(vma_policy(tmp));
6248c2ecf20Sopenharmony_cifail_nomem_policy:
6258c2ecf20Sopenharmony_ci	vm_area_free(tmp);
6268c2ecf20Sopenharmony_cifail_nomem:
6278c2ecf20Sopenharmony_ci	retval = -ENOMEM;
6288c2ecf20Sopenharmony_ci	vm_unacct_memory(charge);
6298c2ecf20Sopenharmony_ci	goto out;
6308c2ecf20Sopenharmony_ci}
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_cistatic inline int mm_alloc_pgd(struct mm_struct *mm)
6338c2ecf20Sopenharmony_ci{
6348c2ecf20Sopenharmony_ci	mm_init_uxpgd(mm);
6358c2ecf20Sopenharmony_ci	mm->pgd = pgd_alloc(mm);
6368c2ecf20Sopenharmony_ci	if (unlikely(!mm->pgd))
6378c2ecf20Sopenharmony_ci		return -ENOMEM;
6388c2ecf20Sopenharmony_ci	return 0;
6398c2ecf20Sopenharmony_ci}
6408c2ecf20Sopenharmony_ci
6418c2ecf20Sopenharmony_cistatic inline void mm_free_pgd(struct mm_struct *mm)
6428c2ecf20Sopenharmony_ci{
6438c2ecf20Sopenharmony_ci	pgd_free(mm, mm->pgd);
6448c2ecf20Sopenharmony_ci	mm_clear_uxpgd(mm);
6458c2ecf20Sopenharmony_ci}
6468c2ecf20Sopenharmony_ci#else
6478c2ecf20Sopenharmony_cistatic int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
6488c2ecf20Sopenharmony_ci{
6498c2ecf20Sopenharmony_ci	mmap_write_lock(oldmm);
6508c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
6518c2ecf20Sopenharmony_ci	mmap_write_unlock(oldmm);
6528c2ecf20Sopenharmony_ci	return 0;
6538c2ecf20Sopenharmony_ci}
6548c2ecf20Sopenharmony_ci#define mm_alloc_pgd(mm)	(0)
6558c2ecf20Sopenharmony_ci#define mm_free_pgd(mm)
6568c2ecf20Sopenharmony_ci#endif /* CONFIG_MMU */
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_cistatic void check_mm(struct mm_struct *mm)
6598c2ecf20Sopenharmony_ci{
6608c2ecf20Sopenharmony_ci	int i;
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci	BUILD_BUG_ON_MSG(ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS,
6638c2ecf20Sopenharmony_ci			 "Please make sure 'struct resident_page_types[]' is updated as well");
6648c2ecf20Sopenharmony_ci
6658c2ecf20Sopenharmony_ci	for (i = 0; i < NR_MM_COUNTERS; i++) {
6668c2ecf20Sopenharmony_ci		long x = atomic_long_read(&mm->rss_stat.count[i]);
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_ci		if (unlikely(x))
6698c2ecf20Sopenharmony_ci			pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
6708c2ecf20Sopenharmony_ci				 mm, resident_page_types[i], x);
6718c2ecf20Sopenharmony_ci	}
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_ci	if (mm_pgtables_bytes(mm))
6748c2ecf20Sopenharmony_ci		pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
6758c2ecf20Sopenharmony_ci				mm_pgtables_bytes(mm));
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
6788c2ecf20Sopenharmony_ci	VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
6798c2ecf20Sopenharmony_ci#endif
6808c2ecf20Sopenharmony_ci}
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_ci#define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
6838c2ecf20Sopenharmony_ci#define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
6848c2ecf20Sopenharmony_ci
6858c2ecf20Sopenharmony_ci/*
6868c2ecf20Sopenharmony_ci * Called when the last reference to the mm
6878c2ecf20Sopenharmony_ci * is dropped: either by a lazy thread or by
6888c2ecf20Sopenharmony_ci * mmput. Free the page directory and the mm.
6898c2ecf20Sopenharmony_ci */
6908c2ecf20Sopenharmony_civoid __mmdrop(struct mm_struct *mm)
6918c2ecf20Sopenharmony_ci{
6928c2ecf20Sopenharmony_ci	BUG_ON(mm == &init_mm);
6938c2ecf20Sopenharmony_ci	WARN_ON_ONCE(mm == current->mm);
6948c2ecf20Sopenharmony_ci	WARN_ON_ONCE(mm == current->active_mm);
6958c2ecf20Sopenharmony_ci	mm_free_pgd(mm);
6968c2ecf20Sopenharmony_ci	destroy_context(mm);
6978c2ecf20Sopenharmony_ci	mmu_notifier_subscriptions_destroy(mm);
6988c2ecf20Sopenharmony_ci	check_mm(mm);
6998c2ecf20Sopenharmony_ci	put_user_ns(mm->user_ns);
7008c2ecf20Sopenharmony_ci	free_mm(mm);
7018c2ecf20Sopenharmony_ci}
7028c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__mmdrop);
7038c2ecf20Sopenharmony_ci
7048c2ecf20Sopenharmony_cistatic void mmdrop_async_fn(struct work_struct *work)
7058c2ecf20Sopenharmony_ci{
7068c2ecf20Sopenharmony_ci	struct mm_struct *mm;
7078c2ecf20Sopenharmony_ci
7088c2ecf20Sopenharmony_ci	mm = container_of(work, struct mm_struct, async_put_work);
7098c2ecf20Sopenharmony_ci	__mmdrop(mm);
7108c2ecf20Sopenharmony_ci}
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_cistatic void mmdrop_async(struct mm_struct *mm)
7138c2ecf20Sopenharmony_ci{
7148c2ecf20Sopenharmony_ci	if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
7158c2ecf20Sopenharmony_ci		INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
7168c2ecf20Sopenharmony_ci		schedule_work(&mm->async_put_work);
7178c2ecf20Sopenharmony_ci	}
7188c2ecf20Sopenharmony_ci}
7198c2ecf20Sopenharmony_ci
7208c2ecf20Sopenharmony_cistatic inline void free_signal_struct(struct signal_struct *sig)
7218c2ecf20Sopenharmony_ci{
7228c2ecf20Sopenharmony_ci	taskstats_tgid_free(sig);
7238c2ecf20Sopenharmony_ci	sched_autogroup_exit(sig);
7248c2ecf20Sopenharmony_ci	/*
7258c2ecf20Sopenharmony_ci	 * __mmdrop is not safe to call from softirq context on x86 due to
7268c2ecf20Sopenharmony_ci	 * pgd_dtor so postpone it to the async context
7278c2ecf20Sopenharmony_ci	 */
7288c2ecf20Sopenharmony_ci	if (sig->oom_mm)
7298c2ecf20Sopenharmony_ci		mmdrop_async(sig->oom_mm);
7308c2ecf20Sopenharmony_ci	kmem_cache_free(signal_cachep, sig);
7318c2ecf20Sopenharmony_ci}
7328c2ecf20Sopenharmony_ci
7338c2ecf20Sopenharmony_cistatic inline void put_signal_struct(struct signal_struct *sig)
7348c2ecf20Sopenharmony_ci{
7358c2ecf20Sopenharmony_ci	if (refcount_dec_and_test(&sig->sigcnt))
7368c2ecf20Sopenharmony_ci		free_signal_struct(sig);
7378c2ecf20Sopenharmony_ci}
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_civoid __put_task_struct(struct task_struct *tsk)
7408c2ecf20Sopenharmony_ci{
7418c2ecf20Sopenharmony_ci	WARN_ON(!tsk->exit_state);
7428c2ecf20Sopenharmony_ci	WARN_ON(refcount_read(&tsk->usage));
7438c2ecf20Sopenharmony_ci	WARN_ON(tsk == current);
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci	io_uring_free(tsk);
7468c2ecf20Sopenharmony_ci	cgroup_free(tsk);
7478c2ecf20Sopenharmony_ci	task_numa_free(tsk, true);
7488c2ecf20Sopenharmony_ci	security_task_free(tsk);
7498c2ecf20Sopenharmony_ci	exit_creds(tsk);
7508c2ecf20Sopenharmony_ci	delayacct_tsk_free(tsk);
7518c2ecf20Sopenharmony_ci	put_signal_struct(tsk->signal);
7528c2ecf20Sopenharmony_ci
7538c2ecf20Sopenharmony_ci	if (!profile_handoff_task(tsk))
7548c2ecf20Sopenharmony_ci		free_task(tsk);
7558c2ecf20Sopenharmony_ci}
7568c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__put_task_struct);
7578c2ecf20Sopenharmony_ci
7588c2ecf20Sopenharmony_civoid __put_task_struct_rcu_cb(struct rcu_head *rhp)
7598c2ecf20Sopenharmony_ci{
7608c2ecf20Sopenharmony_ci	struct task_struct *task = container_of(rhp, struct task_struct, rcu);
7618c2ecf20Sopenharmony_ci
7628c2ecf20Sopenharmony_ci	__put_task_struct(task);
7638c2ecf20Sopenharmony_ci}
7648c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__put_task_struct_rcu_cb);
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_civoid __init __weak arch_task_cache_init(void) { }
7678c2ecf20Sopenharmony_ci
7688c2ecf20Sopenharmony_ci/*
7698c2ecf20Sopenharmony_ci * set_max_threads
7708c2ecf20Sopenharmony_ci */
7718c2ecf20Sopenharmony_cistatic void set_max_threads(unsigned int max_threads_suggested)
7728c2ecf20Sopenharmony_ci{
7738c2ecf20Sopenharmony_ci	u64 threads;
7748c2ecf20Sopenharmony_ci	unsigned long nr_pages = totalram_pages();
7758c2ecf20Sopenharmony_ci
7768c2ecf20Sopenharmony_ci	/*
7778c2ecf20Sopenharmony_ci	 * The number of threads shall be limited such that the thread
7788c2ecf20Sopenharmony_ci	 * structures may only consume a small part of the available memory.
7798c2ecf20Sopenharmony_ci	 */
7808c2ecf20Sopenharmony_ci	if (fls64(nr_pages) + fls64(PAGE_SIZE) > 64)
7818c2ecf20Sopenharmony_ci		threads = MAX_THREADS;
7828c2ecf20Sopenharmony_ci	else
7838c2ecf20Sopenharmony_ci		threads = div64_u64((u64) nr_pages * (u64) PAGE_SIZE,
7848c2ecf20Sopenharmony_ci				    (u64) THREAD_SIZE * 8UL);
7858c2ecf20Sopenharmony_ci
7868c2ecf20Sopenharmony_ci	if (threads > max_threads_suggested)
7878c2ecf20Sopenharmony_ci		threads = max_threads_suggested;
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci	max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
7908c2ecf20Sopenharmony_ci}
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
7938c2ecf20Sopenharmony_ci/* Initialized by the architecture: */
7948c2ecf20Sopenharmony_ciint arch_task_struct_size __read_mostly;
7958c2ecf20Sopenharmony_ci#endif
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_ci#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
7988c2ecf20Sopenharmony_cistatic void task_struct_whitelist(unsigned long *offset, unsigned long *size)
7998c2ecf20Sopenharmony_ci{
8008c2ecf20Sopenharmony_ci	/* Fetch thread_struct whitelist for the architecture. */
8018c2ecf20Sopenharmony_ci	arch_thread_struct_whitelist(offset, size);
8028c2ecf20Sopenharmony_ci
8038c2ecf20Sopenharmony_ci	/*
8048c2ecf20Sopenharmony_ci	 * Handle zero-sized whitelist or empty thread_struct, otherwise
8058c2ecf20Sopenharmony_ci	 * adjust offset to position of thread_struct in task_struct.
8068c2ecf20Sopenharmony_ci	 */
8078c2ecf20Sopenharmony_ci	if (unlikely(*size == 0))
8088c2ecf20Sopenharmony_ci		*offset = 0;
8098c2ecf20Sopenharmony_ci	else
8108c2ecf20Sopenharmony_ci		*offset += offsetof(struct task_struct, thread);
8118c2ecf20Sopenharmony_ci}
8128c2ecf20Sopenharmony_ci#endif /* CONFIG_ARCH_TASK_STRUCT_ALLOCATOR */
8138c2ecf20Sopenharmony_ci
8148c2ecf20Sopenharmony_civoid __init fork_init(void)
8158c2ecf20Sopenharmony_ci{
8168c2ecf20Sopenharmony_ci	int i;
8178c2ecf20Sopenharmony_ci#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
8188c2ecf20Sopenharmony_ci#ifndef ARCH_MIN_TASKALIGN
8198c2ecf20Sopenharmony_ci#define ARCH_MIN_TASKALIGN	0
8208c2ecf20Sopenharmony_ci#endif
8218c2ecf20Sopenharmony_ci	int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
8228c2ecf20Sopenharmony_ci	unsigned long useroffset, usersize;
8238c2ecf20Sopenharmony_ci
8248c2ecf20Sopenharmony_ci	/* create a slab on which task_structs can be allocated */
8258c2ecf20Sopenharmony_ci	task_struct_whitelist(&useroffset, &usersize);
8268c2ecf20Sopenharmony_ci	task_struct_cachep = kmem_cache_create_usercopy("task_struct",
8278c2ecf20Sopenharmony_ci			arch_task_struct_size, align,
8288c2ecf20Sopenharmony_ci			SLAB_PANIC|SLAB_ACCOUNT,
8298c2ecf20Sopenharmony_ci			useroffset, usersize, NULL);
8308c2ecf20Sopenharmony_ci#endif
8318c2ecf20Sopenharmony_ci
8328c2ecf20Sopenharmony_ci	/* do the arch specific task caches init */
8338c2ecf20Sopenharmony_ci	arch_task_cache_init();
8348c2ecf20Sopenharmony_ci
8358c2ecf20Sopenharmony_ci	set_max_threads(MAX_THREADS);
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_ci	init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
8388c2ecf20Sopenharmony_ci	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
8398c2ecf20Sopenharmony_ci	init_task.signal->rlim[RLIMIT_SIGPENDING] =
8408c2ecf20Sopenharmony_ci		init_task.signal->rlim[RLIMIT_NPROC];
8418c2ecf20Sopenharmony_ci
8428c2ecf20Sopenharmony_ci	for (i = 0; i < UCOUNT_COUNTS; i++) {
8438c2ecf20Sopenharmony_ci		init_user_ns.ucount_max[i] = max_threads/2;
8448c2ecf20Sopenharmony_ci	}
8458c2ecf20Sopenharmony_ci
8468c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK
8478c2ecf20Sopenharmony_ci	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
8488c2ecf20Sopenharmony_ci			  NULL, free_vm_stack_cache);
8498c2ecf20Sopenharmony_ci#endif
8508c2ecf20Sopenharmony_ci
8518c2ecf20Sopenharmony_ci	scs_init();
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_ci	lockdep_init_task(&init_task);
8548c2ecf20Sopenharmony_ci	uprobes_init();
8558c2ecf20Sopenharmony_ci}
8568c2ecf20Sopenharmony_ci
8578c2ecf20Sopenharmony_ciint __weak arch_dup_task_struct(struct task_struct *dst,
8588c2ecf20Sopenharmony_ci					       struct task_struct *src)
8598c2ecf20Sopenharmony_ci{
8608c2ecf20Sopenharmony_ci	*dst = *src;
8618c2ecf20Sopenharmony_ci	return 0;
8628c2ecf20Sopenharmony_ci}
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_civoid set_task_stack_end_magic(struct task_struct *tsk)
8658c2ecf20Sopenharmony_ci{
8668c2ecf20Sopenharmony_ci	unsigned long *stackend;
8678c2ecf20Sopenharmony_ci
8688c2ecf20Sopenharmony_ci	stackend = end_of_stack(tsk);
8698c2ecf20Sopenharmony_ci	*stackend = STACK_END_MAGIC;	/* for overflow detection */
8708c2ecf20Sopenharmony_ci}
8718c2ecf20Sopenharmony_ci
8728c2ecf20Sopenharmony_cistatic struct task_struct *dup_task_struct(struct task_struct *orig, int node)
8738c2ecf20Sopenharmony_ci{
8748c2ecf20Sopenharmony_ci	struct task_struct *tsk;
8758c2ecf20Sopenharmony_ci	unsigned long *stack;
8768c2ecf20Sopenharmony_ci	struct vm_struct *stack_vm_area __maybe_unused;
8778c2ecf20Sopenharmony_ci	int err;
8788c2ecf20Sopenharmony_ci
8798c2ecf20Sopenharmony_ci	if (node == NUMA_NO_NODE)
8808c2ecf20Sopenharmony_ci		node = tsk_fork_get_node(orig);
8818c2ecf20Sopenharmony_ci	tsk = alloc_task_struct_node(node);
8828c2ecf20Sopenharmony_ci	if (!tsk)
8838c2ecf20Sopenharmony_ci		return NULL;
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci	stack = alloc_thread_stack_node(tsk, node);
8868c2ecf20Sopenharmony_ci	if (!stack)
8878c2ecf20Sopenharmony_ci		goto free_tsk;
8888c2ecf20Sopenharmony_ci
8898c2ecf20Sopenharmony_ci	if (memcg_charge_kernel_stack(tsk))
8908c2ecf20Sopenharmony_ci		goto free_stack;
8918c2ecf20Sopenharmony_ci
8928c2ecf20Sopenharmony_ci	stack_vm_area = task_stack_vm_area(tsk);
8938c2ecf20Sopenharmony_ci
8948c2ecf20Sopenharmony_ci	err = arch_dup_task_struct(tsk, orig);
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci#ifdef CONFIG_ACCESS_TOKENID
8978c2ecf20Sopenharmony_ci	tsk->token = orig->token;
8988c2ecf20Sopenharmony_ci	tsk->ftoken = 0;
8998c2ecf20Sopenharmony_ci#endif
9008c2ecf20Sopenharmony_ci	/*
9018c2ecf20Sopenharmony_ci	 * arch_dup_task_struct() clobbers the stack-related fields.  Make
9028c2ecf20Sopenharmony_ci	 * sure they're properly initialized before using any stack-related
9038c2ecf20Sopenharmony_ci	 * functions again.
9048c2ecf20Sopenharmony_ci	 */
9058c2ecf20Sopenharmony_ci	tsk->stack = stack;
9068c2ecf20Sopenharmony_ci#ifdef CONFIG_VMAP_STACK
9078c2ecf20Sopenharmony_ci	tsk->stack_vm_area = stack_vm_area;
9088c2ecf20Sopenharmony_ci#endif
9098c2ecf20Sopenharmony_ci#ifdef CONFIG_THREAD_INFO_IN_TASK
9108c2ecf20Sopenharmony_ci	refcount_set(&tsk->stack_refcount, 1);
9118c2ecf20Sopenharmony_ci#endif
9128c2ecf20Sopenharmony_ci
9138c2ecf20Sopenharmony_ci	if (err)
9148c2ecf20Sopenharmony_ci		goto free_stack;
9158c2ecf20Sopenharmony_ci
9168c2ecf20Sopenharmony_ci	err = scs_prepare(tsk, node);
9178c2ecf20Sopenharmony_ci	if (err)
9188c2ecf20Sopenharmony_ci		goto free_stack;
9198c2ecf20Sopenharmony_ci
9208c2ecf20Sopenharmony_ci#ifdef CONFIG_SECCOMP
9218c2ecf20Sopenharmony_ci	/*
9228c2ecf20Sopenharmony_ci	 * We must handle setting up seccomp filters once we're under
9238c2ecf20Sopenharmony_ci	 * the sighand lock in case orig has changed between now and
9248c2ecf20Sopenharmony_ci	 * then. Until then, filter must be NULL to avoid messing up
9258c2ecf20Sopenharmony_ci	 * the usage counts on the error path calling free_task.
9268c2ecf20Sopenharmony_ci	 */
9278c2ecf20Sopenharmony_ci	tsk->seccomp.filter = NULL;
9288c2ecf20Sopenharmony_ci#endif
9298c2ecf20Sopenharmony_ci
9308c2ecf20Sopenharmony_ci	setup_thread_stack(tsk, orig);
9318c2ecf20Sopenharmony_ci	clear_user_return_notifier(tsk);
9328c2ecf20Sopenharmony_ci	clear_tsk_need_resched(tsk);
9338c2ecf20Sopenharmony_ci	set_task_stack_end_magic(tsk);
9348c2ecf20Sopenharmony_ci
9358c2ecf20Sopenharmony_ci#ifdef CONFIG_STACKPROTECTOR
9368c2ecf20Sopenharmony_ci	tsk->stack_canary = get_random_canary();
9378c2ecf20Sopenharmony_ci#endif
9388c2ecf20Sopenharmony_ci	if (orig->cpus_ptr == &orig->cpus_mask)
9398c2ecf20Sopenharmony_ci		tsk->cpus_ptr = &tsk->cpus_mask;
9408c2ecf20Sopenharmony_ci
9418c2ecf20Sopenharmony_ci	/*
9428c2ecf20Sopenharmony_ci	 * One for the user space visible state that goes away when reaped.
9438c2ecf20Sopenharmony_ci	 * One for the scheduler.
9448c2ecf20Sopenharmony_ci	 */
9458c2ecf20Sopenharmony_ci	refcount_set(&tsk->rcu_users, 2);
9468c2ecf20Sopenharmony_ci	/* One for the rcu users */
9478c2ecf20Sopenharmony_ci	refcount_set(&tsk->usage, 1);
9488c2ecf20Sopenharmony_ci#ifdef CONFIG_BLK_DEV_IO_TRACE
9498c2ecf20Sopenharmony_ci	tsk->btrace_seq = 0;
9508c2ecf20Sopenharmony_ci#endif
9518c2ecf20Sopenharmony_ci	tsk->splice_pipe = NULL;
9528c2ecf20Sopenharmony_ci	tsk->task_frag.page = NULL;
9538c2ecf20Sopenharmony_ci	tsk->wake_q.next = NULL;
9548c2ecf20Sopenharmony_ci	tsk->pf_io_worker = NULL;
9558c2ecf20Sopenharmony_ci
9568c2ecf20Sopenharmony_ci	account_kernel_stack(tsk, 1);
9578c2ecf20Sopenharmony_ci
9588c2ecf20Sopenharmony_ci	kcov_task_init(tsk);
9598c2ecf20Sopenharmony_ci
9608c2ecf20Sopenharmony_ci#ifdef CONFIG_FAULT_INJECTION
9618c2ecf20Sopenharmony_ci	tsk->fail_nth = 0;
9628c2ecf20Sopenharmony_ci#endif
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci#ifdef CONFIG_BLK_CGROUP
9658c2ecf20Sopenharmony_ci	tsk->throttle_queue = NULL;
9668c2ecf20Sopenharmony_ci	tsk->use_memdelay = 0;
9678c2ecf20Sopenharmony_ci#endif
9688c2ecf20Sopenharmony_ci
9698c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG
9708c2ecf20Sopenharmony_ci	tsk->active_memcg = NULL;
9718c2ecf20Sopenharmony_ci#endif
9728c2ecf20Sopenharmony_ci	return tsk;
9738c2ecf20Sopenharmony_ci
9748c2ecf20Sopenharmony_cifree_stack:
9758c2ecf20Sopenharmony_ci	free_thread_stack(tsk);
9768c2ecf20Sopenharmony_cifree_tsk:
9778c2ecf20Sopenharmony_ci	free_task_struct(tsk);
9788c2ecf20Sopenharmony_ci	return NULL;
9798c2ecf20Sopenharmony_ci}
9808c2ecf20Sopenharmony_ci
9818c2ecf20Sopenharmony_ci__cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
9828c2ecf20Sopenharmony_ci
9838c2ecf20Sopenharmony_cistatic unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
9848c2ecf20Sopenharmony_ci
9858c2ecf20Sopenharmony_cistatic int __init coredump_filter_setup(char *s)
9868c2ecf20Sopenharmony_ci{
9878c2ecf20Sopenharmony_ci	default_dump_filter =
9888c2ecf20Sopenharmony_ci		(simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
9898c2ecf20Sopenharmony_ci		MMF_DUMP_FILTER_MASK;
9908c2ecf20Sopenharmony_ci	return 1;
9918c2ecf20Sopenharmony_ci}
9928c2ecf20Sopenharmony_ci
9938c2ecf20Sopenharmony_ci__setup("coredump_filter=", coredump_filter_setup);
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_ci#include <linux/init_task.h>
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_cistatic void mm_init_aio(struct mm_struct *mm)
9988c2ecf20Sopenharmony_ci{
9998c2ecf20Sopenharmony_ci#ifdef CONFIG_AIO
10008c2ecf20Sopenharmony_ci	spin_lock_init(&mm->ioctx_lock);
10018c2ecf20Sopenharmony_ci	mm->ioctx_table = NULL;
10028c2ecf20Sopenharmony_ci#endif
10038c2ecf20Sopenharmony_ci}
10048c2ecf20Sopenharmony_ci
10058c2ecf20Sopenharmony_cistatic __always_inline void mm_clear_owner(struct mm_struct *mm,
10068c2ecf20Sopenharmony_ci					   struct task_struct *p)
10078c2ecf20Sopenharmony_ci{
10088c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG
10098c2ecf20Sopenharmony_ci	if (mm->owner == p)
10108c2ecf20Sopenharmony_ci		WRITE_ONCE(mm->owner, NULL);
10118c2ecf20Sopenharmony_ci#endif
10128c2ecf20Sopenharmony_ci}
10138c2ecf20Sopenharmony_ci
10148c2ecf20Sopenharmony_cistatic void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
10158c2ecf20Sopenharmony_ci{
10168c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMCG
10178c2ecf20Sopenharmony_ci	mm->owner = p;
10188c2ecf20Sopenharmony_ci#endif
10198c2ecf20Sopenharmony_ci}
10208c2ecf20Sopenharmony_ci
10218c2ecf20Sopenharmony_cistatic void mm_init_pasid(struct mm_struct *mm)
10228c2ecf20Sopenharmony_ci{
10238c2ecf20Sopenharmony_ci#ifdef CONFIG_IOMMU_SUPPORT
10248c2ecf20Sopenharmony_ci	mm->pasid = INIT_PASID;
10258c2ecf20Sopenharmony_ci#endif
10268c2ecf20Sopenharmony_ci}
10278c2ecf20Sopenharmony_ci
10288c2ecf20Sopenharmony_cistatic void mm_init_uprobes_state(struct mm_struct *mm)
10298c2ecf20Sopenharmony_ci{
10308c2ecf20Sopenharmony_ci#ifdef CONFIG_UPROBES
10318c2ecf20Sopenharmony_ci	mm->uprobes_state.xol_area = NULL;
10328c2ecf20Sopenharmony_ci#endif
10338c2ecf20Sopenharmony_ci}
10348c2ecf20Sopenharmony_ci
10358c2ecf20Sopenharmony_cistatic struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
10368c2ecf20Sopenharmony_ci	struct user_namespace *user_ns)
10378c2ecf20Sopenharmony_ci{
10388c2ecf20Sopenharmony_ci	mm->mmap = NULL;
10398c2ecf20Sopenharmony_ci	mm->mm_rb = RB_ROOT;
10408c2ecf20Sopenharmony_ci	mm->vmacache_seqnum = 0;
10418c2ecf20Sopenharmony_ci#ifdef CONFIG_RSS_THRESHOLD
10428c2ecf20Sopenharmony_ci	mm->rss_threshold = 0;
10438c2ecf20Sopenharmony_ci#endif
10448c2ecf20Sopenharmony_ci	atomic_set(&mm->mm_users, 1);
10458c2ecf20Sopenharmony_ci	atomic_set(&mm->mm_count, 1);
10468c2ecf20Sopenharmony_ci	seqcount_init(&mm->write_protect_seq);
10478c2ecf20Sopenharmony_ci	mmap_init_lock(mm);
10488c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&mm->mmlist);
10498c2ecf20Sopenharmony_ci	mm->core_state = NULL;
10508c2ecf20Sopenharmony_ci	mm_pgtables_bytes_init(mm);
10518c2ecf20Sopenharmony_ci	mm->map_count = 0;
10528c2ecf20Sopenharmony_ci	mm->locked_vm = 0;
10538c2ecf20Sopenharmony_ci	atomic_set(&mm->has_pinned, 0);
10548c2ecf20Sopenharmony_ci	atomic64_set(&mm->pinned_vm, 0);
10558c2ecf20Sopenharmony_ci	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
10568c2ecf20Sopenharmony_ci	spin_lock_init(&mm->page_table_lock);
10578c2ecf20Sopenharmony_ci	spin_lock_init(&mm->arg_lock);
10588c2ecf20Sopenharmony_ci	mm_init_cpumask(mm);
10598c2ecf20Sopenharmony_ci	mm_init_aio(mm);
10608c2ecf20Sopenharmony_ci	mm_init_owner(mm, p);
10618c2ecf20Sopenharmony_ci	mm_init_pasid(mm);
10628c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(mm->exe_file, NULL);
10638c2ecf20Sopenharmony_ci	mmu_notifier_subscriptions_init(mm);
10648c2ecf20Sopenharmony_ci	init_tlb_flush_pending(mm);
10658c2ecf20Sopenharmony_ci#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
10668c2ecf20Sopenharmony_ci	mm->pmd_huge_pte = NULL;
10678c2ecf20Sopenharmony_ci#endif
10688c2ecf20Sopenharmony_ci	mm_init_uprobes_state(mm);
10698c2ecf20Sopenharmony_ci	hugetlb_count_init(mm);
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci	if (current->mm) {
10728c2ecf20Sopenharmony_ci		mm->flags = current->mm->flags & MMF_INIT_MASK;
10738c2ecf20Sopenharmony_ci		mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;
10748c2ecf20Sopenharmony_ci	} else {
10758c2ecf20Sopenharmony_ci		mm->flags = default_dump_filter;
10768c2ecf20Sopenharmony_ci		mm->def_flags = 0;
10778c2ecf20Sopenharmony_ci	}
10788c2ecf20Sopenharmony_ci
10798c2ecf20Sopenharmony_ci	if (mm_alloc_pgd(mm))
10808c2ecf20Sopenharmony_ci		goto fail_nopgd;
10818c2ecf20Sopenharmony_ci
10828c2ecf20Sopenharmony_ci	if (init_new_context(p, mm))
10838c2ecf20Sopenharmony_ci		goto fail_nocontext;
10848c2ecf20Sopenharmony_ci
10858c2ecf20Sopenharmony_ci	mm->user_ns = get_user_ns(user_ns);
10868c2ecf20Sopenharmony_ci	return mm;
10878c2ecf20Sopenharmony_ci
10888c2ecf20Sopenharmony_cifail_nocontext:
10898c2ecf20Sopenharmony_ci	mm_free_pgd(mm);
10908c2ecf20Sopenharmony_cifail_nopgd:
10918c2ecf20Sopenharmony_ci	free_mm(mm);
10928c2ecf20Sopenharmony_ci	return NULL;
10938c2ecf20Sopenharmony_ci}
10948c2ecf20Sopenharmony_ci
10958c2ecf20Sopenharmony_ci/*
10968c2ecf20Sopenharmony_ci * Allocate and initialize an mm_struct.
10978c2ecf20Sopenharmony_ci */
10988c2ecf20Sopenharmony_cistruct mm_struct *mm_alloc(void)
10998c2ecf20Sopenharmony_ci{
11008c2ecf20Sopenharmony_ci	struct mm_struct *mm;
11018c2ecf20Sopenharmony_ci
11028c2ecf20Sopenharmony_ci	mm = allocate_mm();
11038c2ecf20Sopenharmony_ci	if (!mm)
11048c2ecf20Sopenharmony_ci		return NULL;
11058c2ecf20Sopenharmony_ci
11068c2ecf20Sopenharmony_ci	memset(mm, 0, sizeof(*mm));
11078c2ecf20Sopenharmony_ci	return mm_init(mm, current, current_user_ns());
11088c2ecf20Sopenharmony_ci}
11098c2ecf20Sopenharmony_ci
11108c2ecf20Sopenharmony_cistatic inline void __mmput(struct mm_struct *mm)
11118c2ecf20Sopenharmony_ci{
11128c2ecf20Sopenharmony_ci	VM_BUG_ON(atomic_read(&mm->mm_users));
11138c2ecf20Sopenharmony_ci
11148c2ecf20Sopenharmony_ci	uprobe_clear_state(mm);
11158c2ecf20Sopenharmony_ci	exit_aio(mm);
11168c2ecf20Sopenharmony_ci	ksm_exit(mm);
11178c2ecf20Sopenharmony_ci	khugepaged_exit(mm); /* must run before exit_mmap */
11188c2ecf20Sopenharmony_ci	exit_mmap(mm);
11198c2ecf20Sopenharmony_ci	mm_put_huge_zero_page(mm);
11208c2ecf20Sopenharmony_ci	set_mm_exe_file(mm, NULL);
11218c2ecf20Sopenharmony_ci	if (!list_empty(&mm->mmlist)) {
11228c2ecf20Sopenharmony_ci		spin_lock(&mmlist_lock);
11238c2ecf20Sopenharmony_ci		list_del(&mm->mmlist);
11248c2ecf20Sopenharmony_ci		spin_unlock(&mmlist_lock);
11258c2ecf20Sopenharmony_ci	}
11268c2ecf20Sopenharmony_ci	if (mm->binfmt)
11278c2ecf20Sopenharmony_ci		module_put(mm->binfmt->module);
11288c2ecf20Sopenharmony_ci	mmdrop(mm);
11298c2ecf20Sopenharmony_ci}
11308c2ecf20Sopenharmony_ci
11318c2ecf20Sopenharmony_ci/*
11328c2ecf20Sopenharmony_ci * Decrement the use count and release all resources for an mm.
11338c2ecf20Sopenharmony_ci */
11348c2ecf20Sopenharmony_civoid mmput(struct mm_struct *mm)
11358c2ecf20Sopenharmony_ci{
11368c2ecf20Sopenharmony_ci	might_sleep();
11378c2ecf20Sopenharmony_ci
11388c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&mm->mm_users))
11398c2ecf20Sopenharmony_ci		__mmput(mm);
11408c2ecf20Sopenharmony_ci}
11418c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmput);
11428c2ecf20Sopenharmony_ci
11438c2ecf20Sopenharmony_ci#ifdef CONFIG_MMU
11448c2ecf20Sopenharmony_cistatic void mmput_async_fn(struct work_struct *work)
11458c2ecf20Sopenharmony_ci{
11468c2ecf20Sopenharmony_ci	struct mm_struct *mm = container_of(work, struct mm_struct,
11478c2ecf20Sopenharmony_ci					    async_put_work);
11488c2ecf20Sopenharmony_ci
11498c2ecf20Sopenharmony_ci	__mmput(mm);
11508c2ecf20Sopenharmony_ci}
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_civoid mmput_async(struct mm_struct *mm)
11538c2ecf20Sopenharmony_ci{
11548c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&mm->mm_users)) {
11558c2ecf20Sopenharmony_ci		INIT_WORK(&mm->async_put_work, mmput_async_fn);
11568c2ecf20Sopenharmony_ci		schedule_work(&mm->async_put_work);
11578c2ecf20Sopenharmony_ci	}
11588c2ecf20Sopenharmony_ci}
11598c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmput_async);
11608c2ecf20Sopenharmony_ci#endif
11618c2ecf20Sopenharmony_ci
11628c2ecf20Sopenharmony_ci/**
11638c2ecf20Sopenharmony_ci * set_mm_exe_file - change a reference to the mm's executable file
11648c2ecf20Sopenharmony_ci *
11658c2ecf20Sopenharmony_ci * This changes mm's executable file (shown as symlink /proc/[pid]/exe).
11668c2ecf20Sopenharmony_ci *
11678c2ecf20Sopenharmony_ci * Main users are mmput() and sys_execve(). Callers prevent concurrent
11688c2ecf20Sopenharmony_ci * invocations: in mmput() nobody alive left, in execve task is single
11698c2ecf20Sopenharmony_ci * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
11708c2ecf20Sopenharmony_ci * mm->exe_file, but does so without using set_mm_exe_file() in order
11718c2ecf20Sopenharmony_ci * to do avoid the need for any locks.
11728c2ecf20Sopenharmony_ci */
11738c2ecf20Sopenharmony_civoid set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
11748c2ecf20Sopenharmony_ci{
11758c2ecf20Sopenharmony_ci	struct file *old_exe_file;
11768c2ecf20Sopenharmony_ci
11778c2ecf20Sopenharmony_ci	/*
11788c2ecf20Sopenharmony_ci	 * It is safe to dereference the exe_file without RCU as
11798c2ecf20Sopenharmony_ci	 * this function is only called if nobody else can access
11808c2ecf20Sopenharmony_ci	 * this mm -- see comment above for justification.
11818c2ecf20Sopenharmony_ci	 */
11828c2ecf20Sopenharmony_ci	old_exe_file = rcu_dereference_raw(mm->exe_file);
11838c2ecf20Sopenharmony_ci
11848c2ecf20Sopenharmony_ci	if (new_exe_file)
11858c2ecf20Sopenharmony_ci		get_file(new_exe_file);
11868c2ecf20Sopenharmony_ci	rcu_assign_pointer(mm->exe_file, new_exe_file);
11878c2ecf20Sopenharmony_ci	if (old_exe_file)
11888c2ecf20Sopenharmony_ci		fput(old_exe_file);
11898c2ecf20Sopenharmony_ci}
11908c2ecf20Sopenharmony_ci
11918c2ecf20Sopenharmony_ci/**
11928c2ecf20Sopenharmony_ci * get_mm_exe_file - acquire a reference to the mm's executable file
11938c2ecf20Sopenharmony_ci *
11948c2ecf20Sopenharmony_ci * Returns %NULL if mm has no associated executable file.
11958c2ecf20Sopenharmony_ci * User must release file via fput().
11968c2ecf20Sopenharmony_ci */
11978c2ecf20Sopenharmony_cistruct file *get_mm_exe_file(struct mm_struct *mm)
11988c2ecf20Sopenharmony_ci{
11998c2ecf20Sopenharmony_ci	struct file *exe_file;
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_ci	rcu_read_lock();
12028c2ecf20Sopenharmony_ci	exe_file = rcu_dereference(mm->exe_file);
12038c2ecf20Sopenharmony_ci	if (exe_file && !get_file_rcu(exe_file))
12048c2ecf20Sopenharmony_ci		exe_file = NULL;
12058c2ecf20Sopenharmony_ci	rcu_read_unlock();
12068c2ecf20Sopenharmony_ci	return exe_file;
12078c2ecf20Sopenharmony_ci}
12088c2ecf20Sopenharmony_ciEXPORT_SYMBOL(get_mm_exe_file);
12098c2ecf20Sopenharmony_ci
12108c2ecf20Sopenharmony_ci/**
12118c2ecf20Sopenharmony_ci * get_task_exe_file - acquire a reference to the task's executable file
12128c2ecf20Sopenharmony_ci *
12138c2ecf20Sopenharmony_ci * Returns %NULL if task's mm (if any) has no associated executable file or
12148c2ecf20Sopenharmony_ci * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
12158c2ecf20Sopenharmony_ci * User must release file via fput().
12168c2ecf20Sopenharmony_ci */
12178c2ecf20Sopenharmony_cistruct file *get_task_exe_file(struct task_struct *task)
12188c2ecf20Sopenharmony_ci{
12198c2ecf20Sopenharmony_ci	struct file *exe_file = NULL;
12208c2ecf20Sopenharmony_ci	struct mm_struct *mm;
12218c2ecf20Sopenharmony_ci
12228c2ecf20Sopenharmony_ci	task_lock(task);
12238c2ecf20Sopenharmony_ci	mm = task->mm;
12248c2ecf20Sopenharmony_ci	if (mm) {
12258c2ecf20Sopenharmony_ci		if (!(task->flags & PF_KTHREAD))
12268c2ecf20Sopenharmony_ci			exe_file = get_mm_exe_file(mm);
12278c2ecf20Sopenharmony_ci	}
12288c2ecf20Sopenharmony_ci	task_unlock(task);
12298c2ecf20Sopenharmony_ci	return exe_file;
12308c2ecf20Sopenharmony_ci}
12318c2ecf20Sopenharmony_ciEXPORT_SYMBOL(get_task_exe_file);
12328c2ecf20Sopenharmony_ci
12338c2ecf20Sopenharmony_ci/**
12348c2ecf20Sopenharmony_ci * get_task_mm - acquire a reference to the task's mm
12358c2ecf20Sopenharmony_ci *
12368c2ecf20Sopenharmony_ci * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
12378c2ecf20Sopenharmony_ci * this kernel workthread has transiently adopted a user mm with use_mm,
12388c2ecf20Sopenharmony_ci * to do its AIO) is not set and if so returns a reference to it, after
12398c2ecf20Sopenharmony_ci * bumping up the use count.  User must release the mm via mmput()
12408c2ecf20Sopenharmony_ci * after use.  Typically used by /proc and ptrace.
12418c2ecf20Sopenharmony_ci */
12428c2ecf20Sopenharmony_cistruct mm_struct *get_task_mm(struct task_struct *task)
12438c2ecf20Sopenharmony_ci{
12448c2ecf20Sopenharmony_ci	struct mm_struct *mm;
12458c2ecf20Sopenharmony_ci
12468c2ecf20Sopenharmony_ci	task_lock(task);
12478c2ecf20Sopenharmony_ci	mm = task->mm;
12488c2ecf20Sopenharmony_ci	if (mm) {
12498c2ecf20Sopenharmony_ci		if (task->flags & PF_KTHREAD)
12508c2ecf20Sopenharmony_ci			mm = NULL;
12518c2ecf20Sopenharmony_ci		else
12528c2ecf20Sopenharmony_ci			mmget(mm);
12538c2ecf20Sopenharmony_ci	}
12548c2ecf20Sopenharmony_ci	task_unlock(task);
12558c2ecf20Sopenharmony_ci	return mm;
12568c2ecf20Sopenharmony_ci}
12578c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(get_task_mm);
12588c2ecf20Sopenharmony_ci
12598c2ecf20Sopenharmony_cistruct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
12608c2ecf20Sopenharmony_ci{
12618c2ecf20Sopenharmony_ci	struct mm_struct *mm;
12628c2ecf20Sopenharmony_ci	int err;
12638c2ecf20Sopenharmony_ci
12648c2ecf20Sopenharmony_ci	err =  down_read_killable(&task->signal->exec_update_lock);
12658c2ecf20Sopenharmony_ci	if (err)
12668c2ecf20Sopenharmony_ci		return ERR_PTR(err);
12678c2ecf20Sopenharmony_ci
12688c2ecf20Sopenharmony_ci	mm = get_task_mm(task);
12698c2ecf20Sopenharmony_ci	if (mm && mm != current->mm &&
12708c2ecf20Sopenharmony_ci			!ptrace_may_access(task, mode)) {
12718c2ecf20Sopenharmony_ci		mmput(mm);
12728c2ecf20Sopenharmony_ci		mm = ERR_PTR(-EACCES);
12738c2ecf20Sopenharmony_ci	}
12748c2ecf20Sopenharmony_ci	up_read(&task->signal->exec_update_lock);
12758c2ecf20Sopenharmony_ci
12768c2ecf20Sopenharmony_ci	return mm;
12778c2ecf20Sopenharmony_ci}
12788c2ecf20Sopenharmony_ci
12798c2ecf20Sopenharmony_cistatic void complete_vfork_done(struct task_struct *tsk)
12808c2ecf20Sopenharmony_ci{
12818c2ecf20Sopenharmony_ci	struct completion *vfork;
12828c2ecf20Sopenharmony_ci
12838c2ecf20Sopenharmony_ci	task_lock(tsk);
12848c2ecf20Sopenharmony_ci	vfork = tsk->vfork_done;
12858c2ecf20Sopenharmony_ci	if (likely(vfork)) {
12868c2ecf20Sopenharmony_ci		tsk->vfork_done = NULL;
12878c2ecf20Sopenharmony_ci		complete(vfork);
12888c2ecf20Sopenharmony_ci	}
12898c2ecf20Sopenharmony_ci	task_unlock(tsk);
12908c2ecf20Sopenharmony_ci}
12918c2ecf20Sopenharmony_ci
12928c2ecf20Sopenharmony_cistatic int wait_for_vfork_done(struct task_struct *child,
12938c2ecf20Sopenharmony_ci				struct completion *vfork)
12948c2ecf20Sopenharmony_ci{
12958c2ecf20Sopenharmony_ci	int killed;
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_ci	freezer_do_not_count();
12988c2ecf20Sopenharmony_ci	cgroup_enter_frozen();
12998c2ecf20Sopenharmony_ci	killed = wait_for_completion_killable(vfork);
13008c2ecf20Sopenharmony_ci	cgroup_leave_frozen(false);
13018c2ecf20Sopenharmony_ci	freezer_count();
13028c2ecf20Sopenharmony_ci
13038c2ecf20Sopenharmony_ci	if (killed) {
13048c2ecf20Sopenharmony_ci		task_lock(child);
13058c2ecf20Sopenharmony_ci		child->vfork_done = NULL;
13068c2ecf20Sopenharmony_ci		task_unlock(child);
13078c2ecf20Sopenharmony_ci	}
13088c2ecf20Sopenharmony_ci
13098c2ecf20Sopenharmony_ci	put_task_struct(child);
13108c2ecf20Sopenharmony_ci	return killed;
13118c2ecf20Sopenharmony_ci}
13128c2ecf20Sopenharmony_ci
13138c2ecf20Sopenharmony_ci/* Please note the differences between mmput and mm_release.
13148c2ecf20Sopenharmony_ci * mmput is called whenever we stop holding onto a mm_struct,
13158c2ecf20Sopenharmony_ci * error success whatever.
13168c2ecf20Sopenharmony_ci *
13178c2ecf20Sopenharmony_ci * mm_release is called after a mm_struct has been removed
13188c2ecf20Sopenharmony_ci * from the current process.
13198c2ecf20Sopenharmony_ci *
13208c2ecf20Sopenharmony_ci * This difference is important for error handling, when we
13218c2ecf20Sopenharmony_ci * only half set up a mm_struct for a new process and need to restore
13228c2ecf20Sopenharmony_ci * the old one.  Because we mmput the new mm_struct before
13238c2ecf20Sopenharmony_ci * restoring the old one. . .
13248c2ecf20Sopenharmony_ci * Eric Biederman 10 January 1998
13258c2ecf20Sopenharmony_ci */
13268c2ecf20Sopenharmony_cistatic void mm_release(struct task_struct *tsk, struct mm_struct *mm)
13278c2ecf20Sopenharmony_ci{
13288c2ecf20Sopenharmony_ci	uprobe_free_utask(tsk);
13298c2ecf20Sopenharmony_ci
13308c2ecf20Sopenharmony_ci	/* Get rid of any cached register state */
13318c2ecf20Sopenharmony_ci	deactivate_mm(tsk, mm);
13328c2ecf20Sopenharmony_ci
13338c2ecf20Sopenharmony_ci	/*
13348c2ecf20Sopenharmony_ci	 * Signal userspace if we're not exiting with a core dump
13358c2ecf20Sopenharmony_ci	 * because we want to leave the value intact for debugging
13368c2ecf20Sopenharmony_ci	 * purposes.
13378c2ecf20Sopenharmony_ci	 */
13388c2ecf20Sopenharmony_ci	if (tsk->clear_child_tid) {
13398c2ecf20Sopenharmony_ci		if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
13408c2ecf20Sopenharmony_ci		    atomic_read(&mm->mm_users) > 1) {
13418c2ecf20Sopenharmony_ci			/*
13428c2ecf20Sopenharmony_ci			 * We don't check the error code - if userspace has
13438c2ecf20Sopenharmony_ci			 * not set up a proper pointer then tough luck.
13448c2ecf20Sopenharmony_ci			 */
13458c2ecf20Sopenharmony_ci			put_user(0, tsk->clear_child_tid);
13468c2ecf20Sopenharmony_ci			do_futex(tsk->clear_child_tid, FUTEX_WAKE,
13478c2ecf20Sopenharmony_ci					1, NULL, NULL, 0, 0);
13488c2ecf20Sopenharmony_ci		}
13498c2ecf20Sopenharmony_ci		tsk->clear_child_tid = NULL;
13508c2ecf20Sopenharmony_ci	}
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci	/*
13538c2ecf20Sopenharmony_ci	 * All done, finally we can wake up parent and return this mm to him.
13548c2ecf20Sopenharmony_ci	 * Also kthread_stop() uses this completion for synchronization.
13558c2ecf20Sopenharmony_ci	 */
13568c2ecf20Sopenharmony_ci	if (tsk->vfork_done)
13578c2ecf20Sopenharmony_ci		complete_vfork_done(tsk);
13588c2ecf20Sopenharmony_ci}
13598c2ecf20Sopenharmony_ci
13608c2ecf20Sopenharmony_civoid exit_mm_release(struct task_struct *tsk, struct mm_struct *mm)
13618c2ecf20Sopenharmony_ci{
13628c2ecf20Sopenharmony_ci	futex_exit_release(tsk);
13638c2ecf20Sopenharmony_ci	mm_release(tsk, mm);
13648c2ecf20Sopenharmony_ci}
13658c2ecf20Sopenharmony_ci
13668c2ecf20Sopenharmony_civoid exec_mm_release(struct task_struct *tsk, struct mm_struct *mm)
13678c2ecf20Sopenharmony_ci{
13688c2ecf20Sopenharmony_ci	futex_exec_release(tsk);
13698c2ecf20Sopenharmony_ci	mm_release(tsk, mm);
13708c2ecf20Sopenharmony_ci}
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_ci/**
13738c2ecf20Sopenharmony_ci * dup_mm() - duplicates an existing mm structure
13748c2ecf20Sopenharmony_ci * @tsk: the task_struct with which the new mm will be associated.
13758c2ecf20Sopenharmony_ci * @oldmm: the mm to duplicate.
13768c2ecf20Sopenharmony_ci *
13778c2ecf20Sopenharmony_ci * Allocates a new mm structure and duplicates the provided @oldmm structure
13788c2ecf20Sopenharmony_ci * content into it.
13798c2ecf20Sopenharmony_ci *
13808c2ecf20Sopenharmony_ci * Return: the duplicated mm or NULL on failure.
13818c2ecf20Sopenharmony_ci */
13828c2ecf20Sopenharmony_cistatic struct mm_struct *dup_mm(struct task_struct *tsk,
13838c2ecf20Sopenharmony_ci				struct mm_struct *oldmm)
13848c2ecf20Sopenharmony_ci{
13858c2ecf20Sopenharmony_ci	struct mm_struct *mm;
13868c2ecf20Sopenharmony_ci	int err;
13878c2ecf20Sopenharmony_ci
13888c2ecf20Sopenharmony_ci	mm = allocate_mm();
13898c2ecf20Sopenharmony_ci	if (!mm)
13908c2ecf20Sopenharmony_ci		goto fail_nomem;
13918c2ecf20Sopenharmony_ci
13928c2ecf20Sopenharmony_ci	memcpy(mm, oldmm, sizeof(*mm));
13938c2ecf20Sopenharmony_ci
13948c2ecf20Sopenharmony_ci	if (!mm_init(mm, tsk, mm->user_ns))
13958c2ecf20Sopenharmony_ci		goto fail_nomem;
13968c2ecf20Sopenharmony_ci
13978c2ecf20Sopenharmony_ci	err = dup_mmap(mm, oldmm);
13988c2ecf20Sopenharmony_ci	if (err)
13998c2ecf20Sopenharmony_ci		goto free_pt;
14008c2ecf20Sopenharmony_ci
14018c2ecf20Sopenharmony_ci	mm->hiwater_rss = get_mm_rss(mm);
14028c2ecf20Sopenharmony_ci	mm->hiwater_vm = mm->total_vm;
14038c2ecf20Sopenharmony_ci
14048c2ecf20Sopenharmony_ci	if (mm->binfmt && !try_module_get(mm->binfmt->module))
14058c2ecf20Sopenharmony_ci		goto free_pt;
14068c2ecf20Sopenharmony_ci
14078c2ecf20Sopenharmony_ci	return mm;
14088c2ecf20Sopenharmony_ci
14098c2ecf20Sopenharmony_cifree_pt:
14108c2ecf20Sopenharmony_ci	/* don't put binfmt in mmput, we haven't got module yet */
14118c2ecf20Sopenharmony_ci	mm->binfmt = NULL;
14128c2ecf20Sopenharmony_ci	mm_init_owner(mm, NULL);
14138c2ecf20Sopenharmony_ci	mmput(mm);
14148c2ecf20Sopenharmony_ci
14158c2ecf20Sopenharmony_cifail_nomem:
14168c2ecf20Sopenharmony_ci	return NULL;
14178c2ecf20Sopenharmony_ci}
14188c2ecf20Sopenharmony_ci
14198c2ecf20Sopenharmony_cistatic int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
14208c2ecf20Sopenharmony_ci{
14218c2ecf20Sopenharmony_ci	struct mm_struct *mm, *oldmm;
14228c2ecf20Sopenharmony_ci	int retval;
14238c2ecf20Sopenharmony_ci
14248c2ecf20Sopenharmony_ci	tsk->min_flt = tsk->maj_flt = 0;
14258c2ecf20Sopenharmony_ci	tsk->nvcsw = tsk->nivcsw = 0;
14268c2ecf20Sopenharmony_ci#ifdef CONFIG_DETECT_HUNG_TASK
14278c2ecf20Sopenharmony_ci	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
14288c2ecf20Sopenharmony_ci	tsk->last_switch_time = 0;
14298c2ecf20Sopenharmony_ci#endif
14308c2ecf20Sopenharmony_ci
14318c2ecf20Sopenharmony_ci	tsk->mm = NULL;
14328c2ecf20Sopenharmony_ci	tsk->active_mm = NULL;
14338c2ecf20Sopenharmony_ci
14348c2ecf20Sopenharmony_ci	/*
14358c2ecf20Sopenharmony_ci	 * Are we cloning a kernel thread?
14368c2ecf20Sopenharmony_ci	 *
14378c2ecf20Sopenharmony_ci	 * We need to steal a active VM for that..
14388c2ecf20Sopenharmony_ci	 */
14398c2ecf20Sopenharmony_ci	oldmm = current->mm;
14408c2ecf20Sopenharmony_ci	if (!oldmm)
14418c2ecf20Sopenharmony_ci		return 0;
14428c2ecf20Sopenharmony_ci
14438c2ecf20Sopenharmony_ci	/* initialize the new vmacache entries */
14448c2ecf20Sopenharmony_ci	vmacache_flush(tsk);
14458c2ecf20Sopenharmony_ci
14468c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_VM) {
14478c2ecf20Sopenharmony_ci		mmget(oldmm);
14488c2ecf20Sopenharmony_ci		mm = oldmm;
14498c2ecf20Sopenharmony_ci		goto good_mm;
14508c2ecf20Sopenharmony_ci	}
14518c2ecf20Sopenharmony_ci
14528c2ecf20Sopenharmony_ci	retval = -ENOMEM;
14538c2ecf20Sopenharmony_ci	mm = dup_mm(tsk, current->mm);
14548c2ecf20Sopenharmony_ci	if (!mm)
14558c2ecf20Sopenharmony_ci		goto fail_nomem;
14568c2ecf20Sopenharmony_ci
14578c2ecf20Sopenharmony_cigood_mm:
14588c2ecf20Sopenharmony_ci	tsk->mm = mm;
14598c2ecf20Sopenharmony_ci	tsk->active_mm = mm;
14608c2ecf20Sopenharmony_ci	return 0;
14618c2ecf20Sopenharmony_ci
14628c2ecf20Sopenharmony_cifail_nomem:
14638c2ecf20Sopenharmony_ci	return retval;
14648c2ecf20Sopenharmony_ci}
14658c2ecf20Sopenharmony_ci
14668c2ecf20Sopenharmony_cistatic int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
14678c2ecf20Sopenharmony_ci{
14688c2ecf20Sopenharmony_ci	struct fs_struct *fs = current->fs;
14698c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_FS) {
14708c2ecf20Sopenharmony_ci		/* tsk->fs is already what we want */
14718c2ecf20Sopenharmony_ci		spin_lock(&fs->lock);
14728c2ecf20Sopenharmony_ci		if (fs->in_exec) {
14738c2ecf20Sopenharmony_ci			spin_unlock(&fs->lock);
14748c2ecf20Sopenharmony_ci			return -EAGAIN;
14758c2ecf20Sopenharmony_ci		}
14768c2ecf20Sopenharmony_ci		fs->users++;
14778c2ecf20Sopenharmony_ci		spin_unlock(&fs->lock);
14788c2ecf20Sopenharmony_ci		return 0;
14798c2ecf20Sopenharmony_ci	}
14808c2ecf20Sopenharmony_ci	tsk->fs = copy_fs_struct(fs);
14818c2ecf20Sopenharmony_ci	if (!tsk->fs)
14828c2ecf20Sopenharmony_ci		return -ENOMEM;
14838c2ecf20Sopenharmony_ci	return 0;
14848c2ecf20Sopenharmony_ci}
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_cistatic int copy_files(unsigned long clone_flags, struct task_struct *tsk)
14878c2ecf20Sopenharmony_ci{
14888c2ecf20Sopenharmony_ci	struct files_struct *oldf, *newf;
14898c2ecf20Sopenharmony_ci	int error = 0;
14908c2ecf20Sopenharmony_ci
14918c2ecf20Sopenharmony_ci	/*
14928c2ecf20Sopenharmony_ci	 * A background process may not have any files ...
14938c2ecf20Sopenharmony_ci	 */
14948c2ecf20Sopenharmony_ci	oldf = current->files;
14958c2ecf20Sopenharmony_ci	if (!oldf)
14968c2ecf20Sopenharmony_ci		goto out;
14978c2ecf20Sopenharmony_ci
14988c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_FILES) {
14998c2ecf20Sopenharmony_ci		atomic_inc(&oldf->count);
15008c2ecf20Sopenharmony_ci		goto out;
15018c2ecf20Sopenharmony_ci	}
15028c2ecf20Sopenharmony_ci
15038c2ecf20Sopenharmony_ci	newf = dup_fd(oldf, NR_OPEN_MAX, &error);
15048c2ecf20Sopenharmony_ci	if (!newf)
15058c2ecf20Sopenharmony_ci		goto out;
15068c2ecf20Sopenharmony_ci
15078c2ecf20Sopenharmony_ci	tsk->files = newf;
15088c2ecf20Sopenharmony_ci	error = 0;
15098c2ecf20Sopenharmony_ciout:
15108c2ecf20Sopenharmony_ci	return error;
15118c2ecf20Sopenharmony_ci}
15128c2ecf20Sopenharmony_ci
15138c2ecf20Sopenharmony_cistatic int copy_io(unsigned long clone_flags, struct task_struct *tsk)
15148c2ecf20Sopenharmony_ci{
15158c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK
15168c2ecf20Sopenharmony_ci	struct io_context *ioc = current->io_context;
15178c2ecf20Sopenharmony_ci	struct io_context *new_ioc;
15188c2ecf20Sopenharmony_ci
15198c2ecf20Sopenharmony_ci	if (!ioc)
15208c2ecf20Sopenharmony_ci		return 0;
15218c2ecf20Sopenharmony_ci	/*
15228c2ecf20Sopenharmony_ci	 * Share io context with parent, if CLONE_IO is set
15238c2ecf20Sopenharmony_ci	 */
15248c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_IO) {
15258c2ecf20Sopenharmony_ci		ioc_task_link(ioc);
15268c2ecf20Sopenharmony_ci		tsk->io_context = ioc;
15278c2ecf20Sopenharmony_ci	} else if (ioprio_valid(ioc->ioprio)) {
15288c2ecf20Sopenharmony_ci		new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
15298c2ecf20Sopenharmony_ci		if (unlikely(!new_ioc))
15308c2ecf20Sopenharmony_ci			return -ENOMEM;
15318c2ecf20Sopenharmony_ci
15328c2ecf20Sopenharmony_ci		new_ioc->ioprio = ioc->ioprio;
15338c2ecf20Sopenharmony_ci		put_io_context(new_ioc);
15348c2ecf20Sopenharmony_ci	}
15358c2ecf20Sopenharmony_ci#endif
15368c2ecf20Sopenharmony_ci	return 0;
15378c2ecf20Sopenharmony_ci}
15388c2ecf20Sopenharmony_ci
15398c2ecf20Sopenharmony_cistatic int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
15408c2ecf20Sopenharmony_ci{
15418c2ecf20Sopenharmony_ci	struct sighand_struct *sig;
15428c2ecf20Sopenharmony_ci
15438c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_SIGHAND) {
15448c2ecf20Sopenharmony_ci		refcount_inc(&current->sighand->count);
15458c2ecf20Sopenharmony_ci		return 0;
15468c2ecf20Sopenharmony_ci	}
15478c2ecf20Sopenharmony_ci	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
15488c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(tsk->sighand, sig);
15498c2ecf20Sopenharmony_ci	if (!sig)
15508c2ecf20Sopenharmony_ci		return -ENOMEM;
15518c2ecf20Sopenharmony_ci
15528c2ecf20Sopenharmony_ci	refcount_set(&sig->count, 1);
15538c2ecf20Sopenharmony_ci	spin_lock_irq(&current->sighand->siglock);
15548c2ecf20Sopenharmony_ci	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
15558c2ecf20Sopenharmony_ci	spin_unlock_irq(&current->sighand->siglock);
15568c2ecf20Sopenharmony_ci
15578c2ecf20Sopenharmony_ci	/* Reset all signal handler not set to SIG_IGN to SIG_DFL. */
15588c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_CLEAR_SIGHAND)
15598c2ecf20Sopenharmony_ci		flush_signal_handlers(tsk, 0);
15608c2ecf20Sopenharmony_ci
15618c2ecf20Sopenharmony_ci	return 0;
15628c2ecf20Sopenharmony_ci}
15638c2ecf20Sopenharmony_ci
15648c2ecf20Sopenharmony_civoid __cleanup_sighand(struct sighand_struct *sighand)
15658c2ecf20Sopenharmony_ci{
15668c2ecf20Sopenharmony_ci	if (refcount_dec_and_test(&sighand->count)) {
15678c2ecf20Sopenharmony_ci		signalfd_cleanup(sighand);
15688c2ecf20Sopenharmony_ci		/*
15698c2ecf20Sopenharmony_ci		 * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
15708c2ecf20Sopenharmony_ci		 * without an RCU grace period, see __lock_task_sighand().
15718c2ecf20Sopenharmony_ci		 */
15728c2ecf20Sopenharmony_ci		kmem_cache_free(sighand_cachep, sighand);
15738c2ecf20Sopenharmony_ci	}
15748c2ecf20Sopenharmony_ci}
15758c2ecf20Sopenharmony_ci
15768c2ecf20Sopenharmony_ci/*
15778c2ecf20Sopenharmony_ci * Initialize POSIX timer handling for a thread group.
15788c2ecf20Sopenharmony_ci */
15798c2ecf20Sopenharmony_cistatic void posix_cpu_timers_init_group(struct signal_struct *sig)
15808c2ecf20Sopenharmony_ci{
15818c2ecf20Sopenharmony_ci	struct posix_cputimers *pct = &sig->posix_cputimers;
15828c2ecf20Sopenharmony_ci	unsigned long cpu_limit;
15838c2ecf20Sopenharmony_ci
15848c2ecf20Sopenharmony_ci	cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
15858c2ecf20Sopenharmony_ci	posix_cputimers_group_init(pct, cpu_limit);
15868c2ecf20Sopenharmony_ci}
15878c2ecf20Sopenharmony_ci
15888c2ecf20Sopenharmony_cistatic int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
15898c2ecf20Sopenharmony_ci{
15908c2ecf20Sopenharmony_ci	struct signal_struct *sig;
15918c2ecf20Sopenharmony_ci
15928c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_THREAD)
15938c2ecf20Sopenharmony_ci		return 0;
15948c2ecf20Sopenharmony_ci
15958c2ecf20Sopenharmony_ci	sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
15968c2ecf20Sopenharmony_ci	tsk->signal = sig;
15978c2ecf20Sopenharmony_ci	if (!sig)
15988c2ecf20Sopenharmony_ci		return -ENOMEM;
15998c2ecf20Sopenharmony_ci
16008c2ecf20Sopenharmony_ci	sig->nr_threads = 1;
16018c2ecf20Sopenharmony_ci	atomic_set(&sig->live, 1);
16028c2ecf20Sopenharmony_ci	refcount_set(&sig->sigcnt, 1);
16038c2ecf20Sopenharmony_ci
16048c2ecf20Sopenharmony_ci	/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
16058c2ecf20Sopenharmony_ci	sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
16068c2ecf20Sopenharmony_ci	tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head);
16078c2ecf20Sopenharmony_ci
16088c2ecf20Sopenharmony_ci	init_waitqueue_head(&sig->wait_chldexit);
16098c2ecf20Sopenharmony_ci	sig->curr_target = tsk;
16108c2ecf20Sopenharmony_ci	init_sigpending(&sig->shared_pending);
16118c2ecf20Sopenharmony_ci	INIT_HLIST_HEAD(&sig->multiprocess);
16128c2ecf20Sopenharmony_ci	seqlock_init(&sig->stats_lock);
16138c2ecf20Sopenharmony_ci	prev_cputime_init(&sig->prev_cputime);
16148c2ecf20Sopenharmony_ci
16158c2ecf20Sopenharmony_ci#ifdef CONFIG_POSIX_TIMERS
16168c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&sig->posix_timers);
16178c2ecf20Sopenharmony_ci	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
16188c2ecf20Sopenharmony_ci	sig->real_timer.function = it_real_fn;
16198c2ecf20Sopenharmony_ci#endif
16208c2ecf20Sopenharmony_ci
16218c2ecf20Sopenharmony_ci	task_lock(current->group_leader);
16228c2ecf20Sopenharmony_ci	memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
16238c2ecf20Sopenharmony_ci	task_unlock(current->group_leader);
16248c2ecf20Sopenharmony_ci
16258c2ecf20Sopenharmony_ci	posix_cpu_timers_init_group(sig);
16268c2ecf20Sopenharmony_ci
16278c2ecf20Sopenharmony_ci	tty_audit_fork(sig);
16288c2ecf20Sopenharmony_ci	sched_autogroup_fork(sig);
16298c2ecf20Sopenharmony_ci
16308c2ecf20Sopenharmony_ci	sig->oom_score_adj = current->signal->oom_score_adj;
16318c2ecf20Sopenharmony_ci	sig->oom_score_adj_min = current->signal->oom_score_adj_min;
16328c2ecf20Sopenharmony_ci
16338c2ecf20Sopenharmony_ci	mutex_init(&sig->cred_guard_mutex);
16348c2ecf20Sopenharmony_ci	init_rwsem(&sig->exec_update_lock);
16358c2ecf20Sopenharmony_ci
16368c2ecf20Sopenharmony_ci	return 0;
16378c2ecf20Sopenharmony_ci}
16388c2ecf20Sopenharmony_ci
16398c2ecf20Sopenharmony_cistatic void copy_seccomp(struct task_struct *p)
16408c2ecf20Sopenharmony_ci{
16418c2ecf20Sopenharmony_ci#ifdef CONFIG_SECCOMP
16428c2ecf20Sopenharmony_ci	/*
16438c2ecf20Sopenharmony_ci	 * Must be called with sighand->lock held, which is common to
16448c2ecf20Sopenharmony_ci	 * all threads in the group. Holding cred_guard_mutex is not
16458c2ecf20Sopenharmony_ci	 * needed because this new task is not yet running and cannot
16468c2ecf20Sopenharmony_ci	 * be racing exec.
16478c2ecf20Sopenharmony_ci	 */
16488c2ecf20Sopenharmony_ci	assert_spin_locked(&current->sighand->siglock);
16498c2ecf20Sopenharmony_ci
16508c2ecf20Sopenharmony_ci	/* Ref-count the new filter user, and assign it. */
16518c2ecf20Sopenharmony_ci	get_seccomp_filter(current);
16528c2ecf20Sopenharmony_ci	p->seccomp = current->seccomp;
16538c2ecf20Sopenharmony_ci
16548c2ecf20Sopenharmony_ci	/*
16558c2ecf20Sopenharmony_ci	 * Explicitly enable no_new_privs here in case it got set
16568c2ecf20Sopenharmony_ci	 * between the task_struct being duplicated and holding the
16578c2ecf20Sopenharmony_ci	 * sighand lock. The seccomp state and nnp must be in sync.
16588c2ecf20Sopenharmony_ci	 */
16598c2ecf20Sopenharmony_ci	if (task_no_new_privs(current))
16608c2ecf20Sopenharmony_ci		task_set_no_new_privs(p);
16618c2ecf20Sopenharmony_ci
16628c2ecf20Sopenharmony_ci	/*
16638c2ecf20Sopenharmony_ci	 * If the parent gained a seccomp mode after copying thread
16648c2ecf20Sopenharmony_ci	 * flags and between before we held the sighand lock, we have
16658c2ecf20Sopenharmony_ci	 * to manually enable the seccomp thread flag here.
16668c2ecf20Sopenharmony_ci	 */
16678c2ecf20Sopenharmony_ci	if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
16688c2ecf20Sopenharmony_ci		set_tsk_thread_flag(p, TIF_SECCOMP);
16698c2ecf20Sopenharmony_ci#endif
16708c2ecf20Sopenharmony_ci}
16718c2ecf20Sopenharmony_ci
16728c2ecf20Sopenharmony_ciSYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
16738c2ecf20Sopenharmony_ci{
16748c2ecf20Sopenharmony_ci	current->clear_child_tid = tidptr;
16758c2ecf20Sopenharmony_ci
16768c2ecf20Sopenharmony_ci	return task_pid_vnr(current);
16778c2ecf20Sopenharmony_ci}
16788c2ecf20Sopenharmony_ci
16798c2ecf20Sopenharmony_cistatic void rt_mutex_init_task(struct task_struct *p)
16808c2ecf20Sopenharmony_ci{
16818c2ecf20Sopenharmony_ci	raw_spin_lock_init(&p->pi_lock);
16828c2ecf20Sopenharmony_ci#ifdef CONFIG_RT_MUTEXES
16838c2ecf20Sopenharmony_ci	p->pi_waiters = RB_ROOT_CACHED;
16848c2ecf20Sopenharmony_ci	p->pi_top_task = NULL;
16858c2ecf20Sopenharmony_ci	p->pi_blocked_on = NULL;
16868c2ecf20Sopenharmony_ci#endif
16878c2ecf20Sopenharmony_ci}
16888c2ecf20Sopenharmony_ci
16898c2ecf20Sopenharmony_cistatic inline void init_task_pid_links(struct task_struct *task)
16908c2ecf20Sopenharmony_ci{
16918c2ecf20Sopenharmony_ci	enum pid_type type;
16928c2ecf20Sopenharmony_ci
16938c2ecf20Sopenharmony_ci	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
16948c2ecf20Sopenharmony_ci		INIT_HLIST_NODE(&task->pid_links[type]);
16958c2ecf20Sopenharmony_ci	}
16968c2ecf20Sopenharmony_ci}
16978c2ecf20Sopenharmony_ci
16988c2ecf20Sopenharmony_cistatic inline void
16998c2ecf20Sopenharmony_ciinit_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
17008c2ecf20Sopenharmony_ci{
17018c2ecf20Sopenharmony_ci	if (type == PIDTYPE_PID)
17028c2ecf20Sopenharmony_ci		task->thread_pid = pid;
17038c2ecf20Sopenharmony_ci	else
17048c2ecf20Sopenharmony_ci		task->signal->pids[type] = pid;
17058c2ecf20Sopenharmony_ci}
17068c2ecf20Sopenharmony_ci
17078c2ecf20Sopenharmony_cistatic inline void rcu_copy_process(struct task_struct *p)
17088c2ecf20Sopenharmony_ci{
17098c2ecf20Sopenharmony_ci#ifdef CONFIG_PREEMPT_RCU
17108c2ecf20Sopenharmony_ci	p->rcu_read_lock_nesting = 0;
17118c2ecf20Sopenharmony_ci	p->rcu_read_unlock_special.s = 0;
17128c2ecf20Sopenharmony_ci	p->rcu_blocked_node = NULL;
17138c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&p->rcu_node_entry);
17148c2ecf20Sopenharmony_ci#endif /* #ifdef CONFIG_PREEMPT_RCU */
17158c2ecf20Sopenharmony_ci#ifdef CONFIG_TASKS_RCU
17168c2ecf20Sopenharmony_ci	p->rcu_tasks_holdout = false;
17178c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
17188c2ecf20Sopenharmony_ci	p->rcu_tasks_idle_cpu = -1;
17198c2ecf20Sopenharmony_ci#endif /* #ifdef CONFIG_TASKS_RCU */
17208c2ecf20Sopenharmony_ci#ifdef CONFIG_TASKS_TRACE_RCU
17218c2ecf20Sopenharmony_ci	p->trc_reader_nesting = 0;
17228c2ecf20Sopenharmony_ci	p->trc_reader_special.s = 0;
17238c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&p->trc_holdout_list);
17248c2ecf20Sopenharmony_ci#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
17258c2ecf20Sopenharmony_ci}
17268c2ecf20Sopenharmony_ci
17278c2ecf20Sopenharmony_cistruct pid *pidfd_pid(const struct file *file)
17288c2ecf20Sopenharmony_ci{
17298c2ecf20Sopenharmony_ci	if (file->f_op == &pidfd_fops)
17308c2ecf20Sopenharmony_ci		return file->private_data;
17318c2ecf20Sopenharmony_ci
17328c2ecf20Sopenharmony_ci	return ERR_PTR(-EBADF);
17338c2ecf20Sopenharmony_ci}
17348c2ecf20Sopenharmony_ci
17358c2ecf20Sopenharmony_cistatic int pidfd_release(struct inode *inode, struct file *file)
17368c2ecf20Sopenharmony_ci{
17378c2ecf20Sopenharmony_ci	struct pid *pid = file->private_data;
17388c2ecf20Sopenharmony_ci
17398c2ecf20Sopenharmony_ci	file->private_data = NULL;
17408c2ecf20Sopenharmony_ci	put_pid(pid);
17418c2ecf20Sopenharmony_ci	return 0;
17428c2ecf20Sopenharmony_ci}
17438c2ecf20Sopenharmony_ci
17448c2ecf20Sopenharmony_ci#ifdef CONFIG_PROC_FS
17458c2ecf20Sopenharmony_ci/**
17468c2ecf20Sopenharmony_ci * pidfd_show_fdinfo - print information about a pidfd
17478c2ecf20Sopenharmony_ci * @m: proc fdinfo file
17488c2ecf20Sopenharmony_ci * @f: file referencing a pidfd
17498c2ecf20Sopenharmony_ci *
17508c2ecf20Sopenharmony_ci * Pid:
17518c2ecf20Sopenharmony_ci * This function will print the pid that a given pidfd refers to in the
17528c2ecf20Sopenharmony_ci * pid namespace of the procfs instance.
17538c2ecf20Sopenharmony_ci * If the pid namespace of the process is not a descendant of the pid
17548c2ecf20Sopenharmony_ci * namespace of the procfs instance 0 will be shown as its pid. This is
17558c2ecf20Sopenharmony_ci * similar to calling getppid() on a process whose parent is outside of
17568c2ecf20Sopenharmony_ci * its pid namespace.
17578c2ecf20Sopenharmony_ci *
17588c2ecf20Sopenharmony_ci * NSpid:
17598c2ecf20Sopenharmony_ci * If pid namespaces are supported then this function will also print
17608c2ecf20Sopenharmony_ci * the pid of a given pidfd refers to for all descendant pid namespaces
17618c2ecf20Sopenharmony_ci * starting from the current pid namespace of the instance, i.e. the
17628c2ecf20Sopenharmony_ci * Pid field and the first entry in the NSpid field will be identical.
17638c2ecf20Sopenharmony_ci * If the pid namespace of the process is not a descendant of the pid
17648c2ecf20Sopenharmony_ci * namespace of the procfs instance 0 will be shown as its first NSpid
17658c2ecf20Sopenharmony_ci * entry and no others will be shown.
17668c2ecf20Sopenharmony_ci * Note that this differs from the Pid and NSpid fields in
17678c2ecf20Sopenharmony_ci * /proc/<pid>/status where Pid and NSpid are always shown relative to
17688c2ecf20Sopenharmony_ci * the  pid namespace of the procfs instance. The difference becomes
17698c2ecf20Sopenharmony_ci * obvious when sending around a pidfd between pid namespaces from a
17708c2ecf20Sopenharmony_ci * different branch of the tree, i.e. where no ancestoral relation is
17718c2ecf20Sopenharmony_ci * present between the pid namespaces:
17728c2ecf20Sopenharmony_ci * - create two new pid namespaces ns1 and ns2 in the initial pid
17738c2ecf20Sopenharmony_ci *   namespace (also take care to create new mount namespaces in the
17748c2ecf20Sopenharmony_ci *   new pid namespace and mount procfs)
17758c2ecf20Sopenharmony_ci * - create a process with a pidfd in ns1
17768c2ecf20Sopenharmony_ci * - send pidfd from ns1 to ns2
17778c2ecf20Sopenharmony_ci * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid
17788c2ecf20Sopenharmony_ci *   have exactly one entry, which is 0
17798c2ecf20Sopenharmony_ci */
17808c2ecf20Sopenharmony_cistatic void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
17818c2ecf20Sopenharmony_ci{
17828c2ecf20Sopenharmony_ci	struct pid *pid = f->private_data;
17838c2ecf20Sopenharmony_ci	struct pid_namespace *ns;
17848c2ecf20Sopenharmony_ci	pid_t nr = -1;
17858c2ecf20Sopenharmony_ci
17868c2ecf20Sopenharmony_ci	if (likely(pid_has_task(pid, PIDTYPE_PID))) {
17878c2ecf20Sopenharmony_ci		ns = proc_pid_ns(file_inode(m->file)->i_sb);
17888c2ecf20Sopenharmony_ci		nr = pid_nr_ns(pid, ns);
17898c2ecf20Sopenharmony_ci	}
17908c2ecf20Sopenharmony_ci
17918c2ecf20Sopenharmony_ci	seq_put_decimal_ll(m, "Pid:\t", nr);
17928c2ecf20Sopenharmony_ci
17938c2ecf20Sopenharmony_ci#ifdef CONFIG_PID_NS
17948c2ecf20Sopenharmony_ci	seq_put_decimal_ll(m, "\nNSpid:\t", nr);
17958c2ecf20Sopenharmony_ci	if (nr > 0) {
17968c2ecf20Sopenharmony_ci		int i;
17978c2ecf20Sopenharmony_ci
17988c2ecf20Sopenharmony_ci		/* If nr is non-zero it means that 'pid' is valid and that
17998c2ecf20Sopenharmony_ci		 * ns, i.e. the pid namespace associated with the procfs
18008c2ecf20Sopenharmony_ci		 * instance, is in the pid namespace hierarchy of pid.
18018c2ecf20Sopenharmony_ci		 * Start at one below the already printed level.
18028c2ecf20Sopenharmony_ci		 */
18038c2ecf20Sopenharmony_ci		for (i = ns->level + 1; i <= pid->level; i++)
18048c2ecf20Sopenharmony_ci			seq_put_decimal_ll(m, "\t", pid->numbers[i].nr);
18058c2ecf20Sopenharmony_ci	}
18068c2ecf20Sopenharmony_ci#endif
18078c2ecf20Sopenharmony_ci	seq_putc(m, '\n');
18088c2ecf20Sopenharmony_ci}
18098c2ecf20Sopenharmony_ci#endif
18108c2ecf20Sopenharmony_ci
18118c2ecf20Sopenharmony_ci/*
18128c2ecf20Sopenharmony_ci * Poll support for process exit notification.
18138c2ecf20Sopenharmony_ci */
18148c2ecf20Sopenharmony_cistatic __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
18158c2ecf20Sopenharmony_ci{
18168c2ecf20Sopenharmony_ci	struct pid *pid = file->private_data;
18178c2ecf20Sopenharmony_ci	__poll_t poll_flags = 0;
18188c2ecf20Sopenharmony_ci
18198c2ecf20Sopenharmony_ci	poll_wait(file, &pid->wait_pidfd, pts);
18208c2ecf20Sopenharmony_ci
18218c2ecf20Sopenharmony_ci	/*
18228c2ecf20Sopenharmony_ci	 * Inform pollers only when the whole thread group exits.
18238c2ecf20Sopenharmony_ci	 * If the thread group leader exits before all other threads in the
18248c2ecf20Sopenharmony_ci	 * group, then poll(2) should block, similar to the wait(2) family.
18258c2ecf20Sopenharmony_ci	 */
18268c2ecf20Sopenharmony_ci	if (thread_group_exited(pid))
18278c2ecf20Sopenharmony_ci		poll_flags = EPOLLIN | EPOLLRDNORM;
18288c2ecf20Sopenharmony_ci
18298c2ecf20Sopenharmony_ci	return poll_flags;
18308c2ecf20Sopenharmony_ci}
18318c2ecf20Sopenharmony_ci
18328c2ecf20Sopenharmony_ciconst struct file_operations pidfd_fops = {
18338c2ecf20Sopenharmony_ci	.release = pidfd_release,
18348c2ecf20Sopenharmony_ci	.poll = pidfd_poll,
18358c2ecf20Sopenharmony_ci#ifdef CONFIG_PROC_FS
18368c2ecf20Sopenharmony_ci	.show_fdinfo = pidfd_show_fdinfo,
18378c2ecf20Sopenharmony_ci#endif
18388c2ecf20Sopenharmony_ci};
18398c2ecf20Sopenharmony_ci
18408c2ecf20Sopenharmony_cistatic void __delayed_free_task(struct rcu_head *rhp)
18418c2ecf20Sopenharmony_ci{
18428c2ecf20Sopenharmony_ci	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
18438c2ecf20Sopenharmony_ci
18448c2ecf20Sopenharmony_ci	free_task(tsk);
18458c2ecf20Sopenharmony_ci}
18468c2ecf20Sopenharmony_ci
18478c2ecf20Sopenharmony_cistatic __always_inline void delayed_free_task(struct task_struct *tsk)
18488c2ecf20Sopenharmony_ci{
18498c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_MEMCG))
18508c2ecf20Sopenharmony_ci		call_rcu(&tsk->rcu, __delayed_free_task);
18518c2ecf20Sopenharmony_ci	else
18528c2ecf20Sopenharmony_ci		free_task(tsk);
18538c2ecf20Sopenharmony_ci}
18548c2ecf20Sopenharmony_ci
18558c2ecf20Sopenharmony_cistatic void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk)
18568c2ecf20Sopenharmony_ci{
18578c2ecf20Sopenharmony_ci	/* Skip if kernel thread */
18588c2ecf20Sopenharmony_ci	if (!tsk->mm)
18598c2ecf20Sopenharmony_ci		return;
18608c2ecf20Sopenharmony_ci
18618c2ecf20Sopenharmony_ci	/* Skip if spawning a thread or using vfork */
18628c2ecf20Sopenharmony_ci	if ((clone_flags & (CLONE_VM | CLONE_THREAD | CLONE_VFORK)) != CLONE_VM)
18638c2ecf20Sopenharmony_ci		return;
18648c2ecf20Sopenharmony_ci
18658c2ecf20Sopenharmony_ci	/* We need to synchronize with __set_oom_adj */
18668c2ecf20Sopenharmony_ci	mutex_lock(&oom_adj_mutex);
18678c2ecf20Sopenharmony_ci	set_bit(MMF_MULTIPROCESS, &tsk->mm->flags);
18688c2ecf20Sopenharmony_ci	/* Update the values in case they were changed after copy_signal */
18698c2ecf20Sopenharmony_ci	tsk->signal->oom_score_adj = current->signal->oom_score_adj;
18708c2ecf20Sopenharmony_ci	tsk->signal->oom_score_adj_min = current->signal->oom_score_adj_min;
18718c2ecf20Sopenharmony_ci	mutex_unlock(&oom_adj_mutex);
18728c2ecf20Sopenharmony_ci}
18738c2ecf20Sopenharmony_ci
18748c2ecf20Sopenharmony_ci/*
18758c2ecf20Sopenharmony_ci * This creates a new process as a copy of the old one,
18768c2ecf20Sopenharmony_ci * but does not actually start it yet.
18778c2ecf20Sopenharmony_ci *
18788c2ecf20Sopenharmony_ci * It copies the registers, and all the appropriate
18798c2ecf20Sopenharmony_ci * parts of the process environment (as per the clone
18808c2ecf20Sopenharmony_ci * flags). The actual kick-off is left to the caller.
18818c2ecf20Sopenharmony_ci */
18828c2ecf20Sopenharmony_cistatic __latent_entropy struct task_struct *copy_process(
18838c2ecf20Sopenharmony_ci					struct pid *pid,
18848c2ecf20Sopenharmony_ci					int trace,
18858c2ecf20Sopenharmony_ci					int node,
18868c2ecf20Sopenharmony_ci					struct kernel_clone_args *args)
18878c2ecf20Sopenharmony_ci{
18888c2ecf20Sopenharmony_ci	int pidfd = -1, retval;
18898c2ecf20Sopenharmony_ci	struct task_struct *p;
18908c2ecf20Sopenharmony_ci	struct multiprocess_signals delayed;
18918c2ecf20Sopenharmony_ci	struct file *pidfile = NULL;
18928c2ecf20Sopenharmony_ci	u64 clone_flags = args->flags;
18938c2ecf20Sopenharmony_ci	struct nsproxy *nsp = current->nsproxy;
18948c2ecf20Sopenharmony_ci
18958c2ecf20Sopenharmony_ci	/*
18968c2ecf20Sopenharmony_ci	 * Don't allow sharing the root directory with processes in a different
18978c2ecf20Sopenharmony_ci	 * namespace
18988c2ecf20Sopenharmony_ci	 */
18998c2ecf20Sopenharmony_ci	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
19008c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
19018c2ecf20Sopenharmony_ci
19028c2ecf20Sopenharmony_ci	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
19038c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
19048c2ecf20Sopenharmony_ci
19058c2ecf20Sopenharmony_ci	/*
19068c2ecf20Sopenharmony_ci	 * Thread groups must share signals as well, and detached threads
19078c2ecf20Sopenharmony_ci	 * can only be started up within the thread group.
19088c2ecf20Sopenharmony_ci	 */
19098c2ecf20Sopenharmony_ci	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
19108c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
19118c2ecf20Sopenharmony_ci
19128c2ecf20Sopenharmony_ci	/*
19138c2ecf20Sopenharmony_ci	 * Shared signal handlers imply shared VM. By way of the above,
19148c2ecf20Sopenharmony_ci	 * thread groups also imply shared VM. Blocking this case allows
19158c2ecf20Sopenharmony_ci	 * for various simplifications in other code.
19168c2ecf20Sopenharmony_ci	 */
19178c2ecf20Sopenharmony_ci	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
19188c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
19198c2ecf20Sopenharmony_ci
19208c2ecf20Sopenharmony_ci	/*
19218c2ecf20Sopenharmony_ci	 * Siblings of global init remain as zombies on exit since they are
19228c2ecf20Sopenharmony_ci	 * not reaped by their parent (swapper). To solve this and to avoid
19238c2ecf20Sopenharmony_ci	 * multi-rooted process trees, prevent global and container-inits
19248c2ecf20Sopenharmony_ci	 * from creating siblings.
19258c2ecf20Sopenharmony_ci	 */
19268c2ecf20Sopenharmony_ci	if ((clone_flags & CLONE_PARENT) &&
19278c2ecf20Sopenharmony_ci				current->signal->flags & SIGNAL_UNKILLABLE)
19288c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
19298c2ecf20Sopenharmony_ci
19308c2ecf20Sopenharmony_ci	/*
19318c2ecf20Sopenharmony_ci	 * If the new process will be in a different pid or user namespace
19328c2ecf20Sopenharmony_ci	 * do not allow it to share a thread group with the forking task.
19338c2ecf20Sopenharmony_ci	 */
19348c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_THREAD) {
19358c2ecf20Sopenharmony_ci		if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
19368c2ecf20Sopenharmony_ci		    (task_active_pid_ns(current) != nsp->pid_ns_for_children))
19378c2ecf20Sopenharmony_ci			return ERR_PTR(-EINVAL);
19388c2ecf20Sopenharmony_ci	}
19398c2ecf20Sopenharmony_ci
19408c2ecf20Sopenharmony_ci	/*
19418c2ecf20Sopenharmony_ci	 * If the new process will be in a different time namespace
19428c2ecf20Sopenharmony_ci	 * do not allow it to share VM or a thread group with the forking task.
19438c2ecf20Sopenharmony_ci	 */
19448c2ecf20Sopenharmony_ci	if (clone_flags & (CLONE_THREAD | CLONE_VM)) {
19458c2ecf20Sopenharmony_ci		if (nsp->time_ns != nsp->time_ns_for_children)
19468c2ecf20Sopenharmony_ci			return ERR_PTR(-EINVAL);
19478c2ecf20Sopenharmony_ci	}
19488c2ecf20Sopenharmony_ci
19498c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_PIDFD) {
19508c2ecf20Sopenharmony_ci		/*
19518c2ecf20Sopenharmony_ci		 * - CLONE_DETACHED is blocked so that we can potentially
19528c2ecf20Sopenharmony_ci		 *   reuse it later for CLONE_PIDFD.
19538c2ecf20Sopenharmony_ci		 * - CLONE_THREAD is blocked until someone really needs it.
19548c2ecf20Sopenharmony_ci		 */
19558c2ecf20Sopenharmony_ci		if (clone_flags & (CLONE_DETACHED | CLONE_THREAD))
19568c2ecf20Sopenharmony_ci			return ERR_PTR(-EINVAL);
19578c2ecf20Sopenharmony_ci	}
19588c2ecf20Sopenharmony_ci
19598c2ecf20Sopenharmony_ci	/*
19608c2ecf20Sopenharmony_ci	 * Force any signals received before this point to be delivered
19618c2ecf20Sopenharmony_ci	 * before the fork happens.  Collect up signals sent to multiple
19628c2ecf20Sopenharmony_ci	 * processes that happen during the fork and delay them so that
19638c2ecf20Sopenharmony_ci	 * they appear to happen after the fork.
19648c2ecf20Sopenharmony_ci	 */
19658c2ecf20Sopenharmony_ci	sigemptyset(&delayed.signal);
19668c2ecf20Sopenharmony_ci	INIT_HLIST_NODE(&delayed.node);
19678c2ecf20Sopenharmony_ci
19688c2ecf20Sopenharmony_ci	spin_lock_irq(&current->sighand->siglock);
19698c2ecf20Sopenharmony_ci	if (!(clone_flags & CLONE_THREAD))
19708c2ecf20Sopenharmony_ci		hlist_add_head(&delayed.node, &current->signal->multiprocess);
19718c2ecf20Sopenharmony_ci	recalc_sigpending();
19728c2ecf20Sopenharmony_ci	spin_unlock_irq(&current->sighand->siglock);
19738c2ecf20Sopenharmony_ci	retval = -ERESTARTNOINTR;
19748c2ecf20Sopenharmony_ci	if (task_sigpending(current))
19758c2ecf20Sopenharmony_ci		goto fork_out;
19768c2ecf20Sopenharmony_ci
19778c2ecf20Sopenharmony_ci	retval = -ENOMEM;
19788c2ecf20Sopenharmony_ci	p = dup_task_struct(current, node);
19798c2ecf20Sopenharmony_ci	if (!p)
19808c2ecf20Sopenharmony_ci		goto fork_out;
19818c2ecf20Sopenharmony_ci	if (args->io_thread) {
19828c2ecf20Sopenharmony_ci		/*
19838c2ecf20Sopenharmony_ci		 * Mark us an IO worker, and block any signal that isn't
19848c2ecf20Sopenharmony_ci		 * fatal or STOP
19858c2ecf20Sopenharmony_ci		 */
19868c2ecf20Sopenharmony_ci		p->flags |= PF_IO_WORKER;
19878c2ecf20Sopenharmony_ci		siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
19888c2ecf20Sopenharmony_ci	}
19898c2ecf20Sopenharmony_ci
19908c2ecf20Sopenharmony_ci	/*
19918c2ecf20Sopenharmony_ci	 * This _must_ happen before we call free_task(), i.e. before we jump
19928c2ecf20Sopenharmony_ci	 * to any of the bad_fork_* labels. This is to avoid freeing
19938c2ecf20Sopenharmony_ci	 * p->set_child_tid which is (ab)used as a kthread's data pointer for
19948c2ecf20Sopenharmony_ci	 * kernel threads (PF_KTHREAD).
19958c2ecf20Sopenharmony_ci	 */
19968c2ecf20Sopenharmony_ci	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? args->child_tid : NULL;
19978c2ecf20Sopenharmony_ci	/*
19988c2ecf20Sopenharmony_ci	 * Clear TID on mm_release()?
19998c2ecf20Sopenharmony_ci	 */
20008c2ecf20Sopenharmony_ci	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? args->child_tid : NULL;
20018c2ecf20Sopenharmony_ci
20028c2ecf20Sopenharmony_ci	ftrace_graph_init_task(p);
20038c2ecf20Sopenharmony_ci
20048c2ecf20Sopenharmony_ci	rt_mutex_init_task(p);
20058c2ecf20Sopenharmony_ci
20068c2ecf20Sopenharmony_ci	lockdep_assert_irqs_enabled();
20078c2ecf20Sopenharmony_ci#ifdef CONFIG_PROVE_LOCKING
20088c2ecf20Sopenharmony_ci	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
20098c2ecf20Sopenharmony_ci#endif
20108c2ecf20Sopenharmony_ci	retval = -EAGAIN;
20118c2ecf20Sopenharmony_ci	if (atomic_read(&p->real_cred->user->processes) >=
20128c2ecf20Sopenharmony_ci			task_rlimit(p, RLIMIT_NPROC)) {
20138c2ecf20Sopenharmony_ci		if (p->real_cred->user != INIT_USER &&
20148c2ecf20Sopenharmony_ci		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
20158c2ecf20Sopenharmony_ci			goto bad_fork_free;
20168c2ecf20Sopenharmony_ci	}
20178c2ecf20Sopenharmony_ci	current->flags &= ~PF_NPROC_EXCEEDED;
20188c2ecf20Sopenharmony_ci
20198c2ecf20Sopenharmony_ci	retval = copy_creds(p, clone_flags);
20208c2ecf20Sopenharmony_ci	if (retval < 0)
20218c2ecf20Sopenharmony_ci		goto bad_fork_free;
20228c2ecf20Sopenharmony_ci
20238c2ecf20Sopenharmony_ci	/*
20248c2ecf20Sopenharmony_ci	 * If multiple threads are within copy_process(), then this check
20258c2ecf20Sopenharmony_ci	 * triggers too late. This doesn't hurt, the check is only there
20268c2ecf20Sopenharmony_ci	 * to stop root fork bombs.
20278c2ecf20Sopenharmony_ci	 */
20288c2ecf20Sopenharmony_ci	retval = -EAGAIN;
20298c2ecf20Sopenharmony_ci	if (data_race(nr_threads >= max_threads))
20308c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_count;
20318c2ecf20Sopenharmony_ci
20328c2ecf20Sopenharmony_ci	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
20338c2ecf20Sopenharmony_ci#ifdef CONFIG_RECLAIM_ACCT
20348c2ecf20Sopenharmony_ci	reclaimacct_tsk_init(p);
20358c2ecf20Sopenharmony_ci#endif
20368c2ecf20Sopenharmony_ci	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE);
20378c2ecf20Sopenharmony_ci	p->flags |= PF_FORKNOEXEC;
20388c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&p->children);
20398c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&p->sibling);
20408c2ecf20Sopenharmony_ci	rcu_copy_process(p);
20418c2ecf20Sopenharmony_ci	p->vfork_done = NULL;
20428c2ecf20Sopenharmony_ci	spin_lock_init(&p->alloc_lock);
20438c2ecf20Sopenharmony_ci
20448c2ecf20Sopenharmony_ci	init_sigpending(&p->pending);
20458c2ecf20Sopenharmony_ci
20468c2ecf20Sopenharmony_ci	p->utime = p->stime = p->gtime = 0;
20478c2ecf20Sopenharmony_ci#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
20488c2ecf20Sopenharmony_ci	p->utimescaled = p->stimescaled = 0;
20498c2ecf20Sopenharmony_ci#endif
20508c2ecf20Sopenharmony_ci	prev_cputime_init(&p->prev_cputime);
20518c2ecf20Sopenharmony_ci
20528c2ecf20Sopenharmony_ci#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
20538c2ecf20Sopenharmony_ci	seqcount_init(&p->vtime.seqcount);
20548c2ecf20Sopenharmony_ci	p->vtime.starttime = 0;
20558c2ecf20Sopenharmony_ci	p->vtime.state = VTIME_INACTIVE;
20568c2ecf20Sopenharmony_ci#endif
20578c2ecf20Sopenharmony_ci
20588c2ecf20Sopenharmony_ci#ifdef CONFIG_IO_URING
20598c2ecf20Sopenharmony_ci	p->io_uring = NULL;
20608c2ecf20Sopenharmony_ci#endif
20618c2ecf20Sopenharmony_ci
20628c2ecf20Sopenharmony_ci#if defined(SPLIT_RSS_COUNTING)
20638c2ecf20Sopenharmony_ci	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
20648c2ecf20Sopenharmony_ci#endif
20658c2ecf20Sopenharmony_ci
20668c2ecf20Sopenharmony_ci	p->default_timer_slack_ns = current->timer_slack_ns;
20678c2ecf20Sopenharmony_ci
20688c2ecf20Sopenharmony_ci#ifdef CONFIG_PSI
20698c2ecf20Sopenharmony_ci	p->psi_flags = 0;
20708c2ecf20Sopenharmony_ci#endif
20718c2ecf20Sopenharmony_ci
20728c2ecf20Sopenharmony_ci	task_io_accounting_init(&p->ioac);
20738c2ecf20Sopenharmony_ci	acct_clear_integrals(p);
20748c2ecf20Sopenharmony_ci
20758c2ecf20Sopenharmony_ci	posix_cputimers_init(&p->posix_cputimers);
20768c2ecf20Sopenharmony_ci
20778c2ecf20Sopenharmony_ci	p->io_context = NULL;
20788c2ecf20Sopenharmony_ci	audit_set_context(p, NULL);
20798c2ecf20Sopenharmony_ci	cgroup_fork(p);
20808c2ecf20Sopenharmony_ci#ifdef CONFIG_NUMA
20818c2ecf20Sopenharmony_ci	p->mempolicy = mpol_dup(p->mempolicy);
20828c2ecf20Sopenharmony_ci	if (IS_ERR(p->mempolicy)) {
20838c2ecf20Sopenharmony_ci		retval = PTR_ERR(p->mempolicy);
20848c2ecf20Sopenharmony_ci		p->mempolicy = NULL;
20858c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_threadgroup_lock;
20868c2ecf20Sopenharmony_ci	}
20878c2ecf20Sopenharmony_ci#endif
20888c2ecf20Sopenharmony_ci#ifdef CONFIG_CPUSETS
20898c2ecf20Sopenharmony_ci	p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
20908c2ecf20Sopenharmony_ci	p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
20918c2ecf20Sopenharmony_ci	seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock);
20928c2ecf20Sopenharmony_ci#endif
20938c2ecf20Sopenharmony_ci#ifdef CONFIG_TRACE_IRQFLAGS
20948c2ecf20Sopenharmony_ci	memset(&p->irqtrace, 0, sizeof(p->irqtrace));
20958c2ecf20Sopenharmony_ci	p->irqtrace.hardirq_disable_ip	= _THIS_IP_;
20968c2ecf20Sopenharmony_ci	p->irqtrace.softirq_enable_ip	= _THIS_IP_;
20978c2ecf20Sopenharmony_ci	p->softirqs_enabled		= 1;
20988c2ecf20Sopenharmony_ci	p->softirq_context		= 0;
20998c2ecf20Sopenharmony_ci#endif
21008c2ecf20Sopenharmony_ci
21018c2ecf20Sopenharmony_ci	p->pagefault_disabled = 0;
21028c2ecf20Sopenharmony_ci
21038c2ecf20Sopenharmony_ci#ifdef CONFIG_LOCKDEP
21048c2ecf20Sopenharmony_ci	lockdep_init_task(p);
21058c2ecf20Sopenharmony_ci#endif
21068c2ecf20Sopenharmony_ci
21078c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_MUTEXES
21088c2ecf20Sopenharmony_ci	p->blocked_on = NULL; /* not blocked yet */
21098c2ecf20Sopenharmony_ci#endif
21108c2ecf20Sopenharmony_ci#ifdef CONFIG_BCACHE
21118c2ecf20Sopenharmony_ci	p->sequential_io	= 0;
21128c2ecf20Sopenharmony_ci	p->sequential_io_avg	= 0;
21138c2ecf20Sopenharmony_ci#endif
21148c2ecf20Sopenharmony_ci#ifdef CONFIG_BPF_SYSCALL
21158c2ecf20Sopenharmony_ci	p->bpf_ctx = NULL;
21168c2ecf20Sopenharmony_ci#endif
21178c2ecf20Sopenharmony_ci
21188c2ecf20Sopenharmony_ci	/* Perform scheduler related setup. Assign this task to a CPU. */
21198c2ecf20Sopenharmony_ci	retval = sched_fork(clone_flags, p);
21208c2ecf20Sopenharmony_ci	if (retval)
21218c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_policy;
21228c2ecf20Sopenharmony_ci
21238c2ecf20Sopenharmony_ci	retval = perf_event_init_task(p);
21248c2ecf20Sopenharmony_ci	if (retval)
21258c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_policy;
21268c2ecf20Sopenharmony_ci	retval = audit_alloc(p);
21278c2ecf20Sopenharmony_ci	if (retval)
21288c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_perf;
21298c2ecf20Sopenharmony_ci	/* copy all the process information */
21308c2ecf20Sopenharmony_ci	shm_init_task(p);
21318c2ecf20Sopenharmony_ci	retval = security_task_alloc(p, clone_flags);
21328c2ecf20Sopenharmony_ci	if (retval)
21338c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_audit;
21348c2ecf20Sopenharmony_ci	retval = copy_semundo(clone_flags, p);
21358c2ecf20Sopenharmony_ci	if (retval)
21368c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_security;
21378c2ecf20Sopenharmony_ci	retval = copy_files(clone_flags, p);
21388c2ecf20Sopenharmony_ci	if (retval)
21398c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_semundo;
21408c2ecf20Sopenharmony_ci	retval = copy_fs(clone_flags, p);
21418c2ecf20Sopenharmony_ci	if (retval)
21428c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_files;
21438c2ecf20Sopenharmony_ci	retval = copy_sighand(clone_flags, p);
21448c2ecf20Sopenharmony_ci	if (retval)
21458c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_fs;
21468c2ecf20Sopenharmony_ci	retval = copy_signal(clone_flags, p);
21478c2ecf20Sopenharmony_ci	if (retval)
21488c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_sighand;
21498c2ecf20Sopenharmony_ci	retval = copy_mm(clone_flags, p);
21508c2ecf20Sopenharmony_ci	if (retval)
21518c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_signal;
21528c2ecf20Sopenharmony_ci	retval = copy_namespaces(clone_flags, p);
21538c2ecf20Sopenharmony_ci	if (retval)
21548c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_mm;
21558c2ecf20Sopenharmony_ci	retval = copy_io(clone_flags, p);
21568c2ecf20Sopenharmony_ci	if (retval)
21578c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_namespaces;
21588c2ecf20Sopenharmony_ci	retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls);
21598c2ecf20Sopenharmony_ci	if (retval)
21608c2ecf20Sopenharmony_ci		goto bad_fork_cleanup_io;
21618c2ecf20Sopenharmony_ci
21628c2ecf20Sopenharmony_ci	stackleak_task_init(p);
21638c2ecf20Sopenharmony_ci
21648c2ecf20Sopenharmony_ci	if (pid != &init_struct_pid) {
21658c2ecf20Sopenharmony_ci		pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid,
21668c2ecf20Sopenharmony_ci				args->set_tid_size);
21678c2ecf20Sopenharmony_ci		if (IS_ERR(pid)) {
21688c2ecf20Sopenharmony_ci			retval = PTR_ERR(pid);
21698c2ecf20Sopenharmony_ci			goto bad_fork_cleanup_thread;
21708c2ecf20Sopenharmony_ci		}
21718c2ecf20Sopenharmony_ci	}
21728c2ecf20Sopenharmony_ci
21738c2ecf20Sopenharmony_ci	/*
21748c2ecf20Sopenharmony_ci	 * This has to happen after we've potentially unshared the file
21758c2ecf20Sopenharmony_ci	 * descriptor table (so that the pidfd doesn't leak into the child
21768c2ecf20Sopenharmony_ci	 * if the fd table isn't shared).
21778c2ecf20Sopenharmony_ci	 */
21788c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_PIDFD) {
21798c2ecf20Sopenharmony_ci		retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
21808c2ecf20Sopenharmony_ci		if (retval < 0)
21818c2ecf20Sopenharmony_ci			goto bad_fork_free_pid;
21828c2ecf20Sopenharmony_ci
21838c2ecf20Sopenharmony_ci		pidfd = retval;
21848c2ecf20Sopenharmony_ci
21858c2ecf20Sopenharmony_ci		pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
21868c2ecf20Sopenharmony_ci					      O_RDWR | O_CLOEXEC);
21878c2ecf20Sopenharmony_ci		if (IS_ERR(pidfile)) {
21888c2ecf20Sopenharmony_ci			put_unused_fd(pidfd);
21898c2ecf20Sopenharmony_ci			retval = PTR_ERR(pidfile);
21908c2ecf20Sopenharmony_ci			goto bad_fork_free_pid;
21918c2ecf20Sopenharmony_ci		}
21928c2ecf20Sopenharmony_ci		get_pid(pid);	/* held by pidfile now */
21938c2ecf20Sopenharmony_ci
21948c2ecf20Sopenharmony_ci		retval = put_user(pidfd, args->pidfd);
21958c2ecf20Sopenharmony_ci		if (retval)
21968c2ecf20Sopenharmony_ci			goto bad_fork_put_pidfd;
21978c2ecf20Sopenharmony_ci	}
21988c2ecf20Sopenharmony_ci
21998c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK
22008c2ecf20Sopenharmony_ci	p->plug = NULL;
22018c2ecf20Sopenharmony_ci#endif
22028c2ecf20Sopenharmony_ci	futex_init_task(p);
22038c2ecf20Sopenharmony_ci
22048c2ecf20Sopenharmony_ci	/*
22058c2ecf20Sopenharmony_ci	 * sigaltstack should be cleared when sharing the same VM
22068c2ecf20Sopenharmony_ci	 */
22078c2ecf20Sopenharmony_ci	if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
22088c2ecf20Sopenharmony_ci		sas_ss_reset(p);
22098c2ecf20Sopenharmony_ci
22108c2ecf20Sopenharmony_ci	/*
22118c2ecf20Sopenharmony_ci	 * Syscall tracing and stepping should be turned off in the
22128c2ecf20Sopenharmony_ci	 * child regardless of CLONE_PTRACE.
22138c2ecf20Sopenharmony_ci	 */
22148c2ecf20Sopenharmony_ci	user_disable_single_step(p);
22158c2ecf20Sopenharmony_ci	clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
22168c2ecf20Sopenharmony_ci#ifdef TIF_SYSCALL_EMU
22178c2ecf20Sopenharmony_ci	clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
22188c2ecf20Sopenharmony_ci#endif
22198c2ecf20Sopenharmony_ci	clear_tsk_latency_tracing(p);
22208c2ecf20Sopenharmony_ci
22218c2ecf20Sopenharmony_ci	/* ok, now we should be set up.. */
22228c2ecf20Sopenharmony_ci	p->pid = pid_nr(pid);
22238c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_THREAD) {
22248c2ecf20Sopenharmony_ci		p->group_leader = current->group_leader;
22258c2ecf20Sopenharmony_ci		p->tgid = current->tgid;
22268c2ecf20Sopenharmony_ci	} else {
22278c2ecf20Sopenharmony_ci		p->group_leader = p;
22288c2ecf20Sopenharmony_ci		p->tgid = p->pid;
22298c2ecf20Sopenharmony_ci	}
22308c2ecf20Sopenharmony_ci
22318c2ecf20Sopenharmony_ci	p->nr_dirtied = 0;
22328c2ecf20Sopenharmony_ci	p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
22338c2ecf20Sopenharmony_ci	p->dirty_paused_when = 0;
22348c2ecf20Sopenharmony_ci
22358c2ecf20Sopenharmony_ci	p->pdeath_signal = 0;
22368c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&p->thread_group);
22378c2ecf20Sopenharmony_ci	p->task_works = NULL;
22388c2ecf20Sopenharmony_ci	clear_posix_cputimers_work(p);
22398c2ecf20Sopenharmony_ci
22408c2ecf20Sopenharmony_ci	/*
22418c2ecf20Sopenharmony_ci	 * Ensure that the cgroup subsystem policies allow the new process to be
22428c2ecf20Sopenharmony_ci	 * forked. It should be noted that the new process's css_set can be changed
22438c2ecf20Sopenharmony_ci	 * between here and cgroup_post_fork() if an organisation operation is in
22448c2ecf20Sopenharmony_ci	 * progress.
22458c2ecf20Sopenharmony_ci	 */
22468c2ecf20Sopenharmony_ci	retval = cgroup_can_fork(p, args);
22478c2ecf20Sopenharmony_ci	if (retval)
22488c2ecf20Sopenharmony_ci		goto bad_fork_put_pidfd;
22498c2ecf20Sopenharmony_ci
22508c2ecf20Sopenharmony_ci	/*
22518c2ecf20Sopenharmony_ci	 * From this point on we must avoid any synchronous user-space
22528c2ecf20Sopenharmony_ci	 * communication until we take the tasklist-lock. In particular, we do
22538c2ecf20Sopenharmony_ci	 * not want user-space to be able to predict the process start-time by
22548c2ecf20Sopenharmony_ci	 * stalling fork(2) after we recorded the start_time but before it is
22558c2ecf20Sopenharmony_ci	 * visible to the system.
22568c2ecf20Sopenharmony_ci	 */
22578c2ecf20Sopenharmony_ci
22588c2ecf20Sopenharmony_ci	p->start_time = ktime_get_ns();
22598c2ecf20Sopenharmony_ci	p->start_boottime = ktime_get_boottime_ns();
22608c2ecf20Sopenharmony_ci
22618c2ecf20Sopenharmony_ci	/*
22628c2ecf20Sopenharmony_ci	 * Make it visible to the rest of the system, but dont wake it up yet.
22638c2ecf20Sopenharmony_ci	 * Need tasklist lock for parent etc handling!
22648c2ecf20Sopenharmony_ci	 */
22658c2ecf20Sopenharmony_ci	write_lock_irq(&tasklist_lock);
22668c2ecf20Sopenharmony_ci
22678c2ecf20Sopenharmony_ci	/* CLONE_PARENT re-uses the old parent */
22688c2ecf20Sopenharmony_ci	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
22698c2ecf20Sopenharmony_ci		p->real_parent = current->real_parent;
22708c2ecf20Sopenharmony_ci		p->parent_exec_id = current->parent_exec_id;
22718c2ecf20Sopenharmony_ci		if (clone_flags & CLONE_THREAD)
22728c2ecf20Sopenharmony_ci			p->exit_signal = -1;
22738c2ecf20Sopenharmony_ci		else
22748c2ecf20Sopenharmony_ci			p->exit_signal = current->group_leader->exit_signal;
22758c2ecf20Sopenharmony_ci	} else {
22768c2ecf20Sopenharmony_ci		p->real_parent = current;
22778c2ecf20Sopenharmony_ci		p->parent_exec_id = current->self_exec_id;
22788c2ecf20Sopenharmony_ci		p->exit_signal = args->exit_signal;
22798c2ecf20Sopenharmony_ci	}
22808c2ecf20Sopenharmony_ci
22818c2ecf20Sopenharmony_ci	klp_copy_process(p);
22828c2ecf20Sopenharmony_ci
22838c2ecf20Sopenharmony_ci	spin_lock(&current->sighand->siglock);
22848c2ecf20Sopenharmony_ci
22858c2ecf20Sopenharmony_ci	rseq_fork(p, clone_flags);
22868c2ecf20Sopenharmony_ci
22878c2ecf20Sopenharmony_ci	/* Don't start children in a dying pid namespace */
22888c2ecf20Sopenharmony_ci	if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
22898c2ecf20Sopenharmony_ci		retval = -ENOMEM;
22908c2ecf20Sopenharmony_ci		goto bad_fork_cancel_cgroup;
22918c2ecf20Sopenharmony_ci	}
22928c2ecf20Sopenharmony_ci
22938c2ecf20Sopenharmony_ci	/* Let kill terminate clone/fork in the middle */
22948c2ecf20Sopenharmony_ci	if (fatal_signal_pending(current)) {
22958c2ecf20Sopenharmony_ci		retval = -EINTR;
22968c2ecf20Sopenharmony_ci		goto bad_fork_cancel_cgroup;
22978c2ecf20Sopenharmony_ci	}
22988c2ecf20Sopenharmony_ci
22998c2ecf20Sopenharmony_ci	/* No more failure paths after this point. */
23008c2ecf20Sopenharmony_ci
23018c2ecf20Sopenharmony_ci	/*
23028c2ecf20Sopenharmony_ci	 * Copy seccomp details explicitly here, in case they were changed
23038c2ecf20Sopenharmony_ci	 * before holding sighand lock.
23048c2ecf20Sopenharmony_ci	 */
23058c2ecf20Sopenharmony_ci	copy_seccomp(p);
23068c2ecf20Sopenharmony_ci
23078c2ecf20Sopenharmony_ci	init_task_pid_links(p);
23088c2ecf20Sopenharmony_ci	if (likely(p->pid)) {
23098c2ecf20Sopenharmony_ci		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
23108c2ecf20Sopenharmony_ci
23118c2ecf20Sopenharmony_ci		init_task_pid(p, PIDTYPE_PID, pid);
23128c2ecf20Sopenharmony_ci		if (thread_group_leader(p)) {
23138c2ecf20Sopenharmony_ci			init_task_pid(p, PIDTYPE_TGID, pid);
23148c2ecf20Sopenharmony_ci			init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
23158c2ecf20Sopenharmony_ci			init_task_pid(p, PIDTYPE_SID, task_session(current));
23168c2ecf20Sopenharmony_ci
23178c2ecf20Sopenharmony_ci			if (is_child_reaper(pid)) {
23188c2ecf20Sopenharmony_ci				ns_of_pid(pid)->child_reaper = p;
23198c2ecf20Sopenharmony_ci				p->signal->flags |= SIGNAL_UNKILLABLE;
23208c2ecf20Sopenharmony_ci			}
23218c2ecf20Sopenharmony_ci			p->signal->shared_pending.signal = delayed.signal;
23228c2ecf20Sopenharmony_ci			p->signal->tty = tty_kref_get(current->signal->tty);
23238c2ecf20Sopenharmony_ci			/*
23248c2ecf20Sopenharmony_ci			 * Inherit has_child_subreaper flag under the same
23258c2ecf20Sopenharmony_ci			 * tasklist_lock with adding child to the process tree
23268c2ecf20Sopenharmony_ci			 * for propagate_has_child_subreaper optimization.
23278c2ecf20Sopenharmony_ci			 */
23288c2ecf20Sopenharmony_ci			p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
23298c2ecf20Sopenharmony_ci							 p->real_parent->signal->is_child_subreaper;
23308c2ecf20Sopenharmony_ci			list_add_tail(&p->sibling, &p->real_parent->children);
23318c2ecf20Sopenharmony_ci			list_add_tail_rcu(&p->tasks, &init_task.tasks);
23328c2ecf20Sopenharmony_ci			attach_pid(p, PIDTYPE_TGID);
23338c2ecf20Sopenharmony_ci			attach_pid(p, PIDTYPE_PGID);
23348c2ecf20Sopenharmony_ci			attach_pid(p, PIDTYPE_SID);
23358c2ecf20Sopenharmony_ci			__this_cpu_inc(process_counts);
23368c2ecf20Sopenharmony_ci		} else {
23378c2ecf20Sopenharmony_ci			current->signal->nr_threads++;
23388c2ecf20Sopenharmony_ci			atomic_inc(&current->signal->live);
23398c2ecf20Sopenharmony_ci			refcount_inc(&current->signal->sigcnt);
23408c2ecf20Sopenharmony_ci			task_join_group_stop(p);
23418c2ecf20Sopenharmony_ci			list_add_tail_rcu(&p->thread_group,
23428c2ecf20Sopenharmony_ci					  &p->group_leader->thread_group);
23438c2ecf20Sopenharmony_ci			list_add_tail_rcu(&p->thread_node,
23448c2ecf20Sopenharmony_ci					  &p->signal->thread_head);
23458c2ecf20Sopenharmony_ci		}
23468c2ecf20Sopenharmony_ci		attach_pid(p, PIDTYPE_PID);
23478c2ecf20Sopenharmony_ci		nr_threads++;
23488c2ecf20Sopenharmony_ci	}
23498c2ecf20Sopenharmony_ci	total_forks++;
23508c2ecf20Sopenharmony_ci	hlist_del_init(&delayed.node);
23518c2ecf20Sopenharmony_ci	spin_unlock(&current->sighand->siglock);
23528c2ecf20Sopenharmony_ci	syscall_tracepoint_update(p);
23538c2ecf20Sopenharmony_ci	write_unlock_irq(&tasklist_lock);
23548c2ecf20Sopenharmony_ci
23558c2ecf20Sopenharmony_ci	if (pidfile)
23568c2ecf20Sopenharmony_ci		fd_install(pidfd, pidfile);
23578c2ecf20Sopenharmony_ci
23588c2ecf20Sopenharmony_ci	proc_fork_connector(p);
23598c2ecf20Sopenharmony_ci	sched_post_fork(p, args);
23608c2ecf20Sopenharmony_ci	cgroup_post_fork(p, args);
23618c2ecf20Sopenharmony_ci	perf_event_fork(p);
23628c2ecf20Sopenharmony_ci
23638c2ecf20Sopenharmony_ci	trace_task_newtask(p, clone_flags);
23648c2ecf20Sopenharmony_ci	uprobe_copy_process(p, clone_flags);
23658c2ecf20Sopenharmony_ci
23668c2ecf20Sopenharmony_ci	copy_oom_score_adj(clone_flags, p);
23678c2ecf20Sopenharmony_ci
23688c2ecf20Sopenharmony_ci	return p;
23698c2ecf20Sopenharmony_ci
23708c2ecf20Sopenharmony_cibad_fork_cancel_cgroup:
23718c2ecf20Sopenharmony_ci	spin_unlock(&current->sighand->siglock);
23728c2ecf20Sopenharmony_ci	write_unlock_irq(&tasklist_lock);
23738c2ecf20Sopenharmony_ci	cgroup_cancel_fork(p, args);
23748c2ecf20Sopenharmony_cibad_fork_put_pidfd:
23758c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_PIDFD) {
23768c2ecf20Sopenharmony_ci		fput(pidfile);
23778c2ecf20Sopenharmony_ci		put_unused_fd(pidfd);
23788c2ecf20Sopenharmony_ci	}
23798c2ecf20Sopenharmony_cibad_fork_free_pid:
23808c2ecf20Sopenharmony_ci	if (pid != &init_struct_pid)
23818c2ecf20Sopenharmony_ci		free_pid(pid);
23828c2ecf20Sopenharmony_cibad_fork_cleanup_thread:
23838c2ecf20Sopenharmony_ci	exit_thread(p);
23848c2ecf20Sopenharmony_cibad_fork_cleanup_io:
23858c2ecf20Sopenharmony_ci	if (p->io_context)
23868c2ecf20Sopenharmony_ci		exit_io_context(p);
23878c2ecf20Sopenharmony_cibad_fork_cleanup_namespaces:
23888c2ecf20Sopenharmony_ci	exit_task_namespaces(p);
23898c2ecf20Sopenharmony_cibad_fork_cleanup_mm:
23908c2ecf20Sopenharmony_ci	if (p->mm) {
23918c2ecf20Sopenharmony_ci		mm_clear_owner(p->mm, p);
23928c2ecf20Sopenharmony_ci		mmput(p->mm);
23938c2ecf20Sopenharmony_ci	}
23948c2ecf20Sopenharmony_cibad_fork_cleanup_signal:
23958c2ecf20Sopenharmony_ci	if (!(clone_flags & CLONE_THREAD))
23968c2ecf20Sopenharmony_ci		free_signal_struct(p->signal);
23978c2ecf20Sopenharmony_cibad_fork_cleanup_sighand:
23988c2ecf20Sopenharmony_ci	__cleanup_sighand(p->sighand);
23998c2ecf20Sopenharmony_cibad_fork_cleanup_fs:
24008c2ecf20Sopenharmony_ci	exit_fs(p); /* blocking */
24018c2ecf20Sopenharmony_cibad_fork_cleanup_files:
24028c2ecf20Sopenharmony_ci	exit_files(p); /* blocking */
24038c2ecf20Sopenharmony_cibad_fork_cleanup_semundo:
24048c2ecf20Sopenharmony_ci	exit_sem(p);
24058c2ecf20Sopenharmony_cibad_fork_cleanup_security:
24068c2ecf20Sopenharmony_ci	security_task_free(p);
24078c2ecf20Sopenharmony_cibad_fork_cleanup_audit:
24088c2ecf20Sopenharmony_ci	audit_free(p);
24098c2ecf20Sopenharmony_cibad_fork_cleanup_perf:
24108c2ecf20Sopenharmony_ci	perf_event_free_task(p);
24118c2ecf20Sopenharmony_cibad_fork_cleanup_policy:
24128c2ecf20Sopenharmony_ci	lockdep_free_task(p);
24138c2ecf20Sopenharmony_ci	free_task_load_ptrs(p);
24148c2ecf20Sopenharmony_ci#ifdef CONFIG_NUMA
24158c2ecf20Sopenharmony_ci	mpol_put(p->mempolicy);
24168c2ecf20Sopenharmony_cibad_fork_cleanup_threadgroup_lock:
24178c2ecf20Sopenharmony_ci#endif
24188c2ecf20Sopenharmony_ci	delayacct_tsk_free(p);
24198c2ecf20Sopenharmony_cibad_fork_cleanup_count:
24208c2ecf20Sopenharmony_ci	atomic_dec(&p->cred->user->processes);
24218c2ecf20Sopenharmony_ci	exit_creds(p);
24228c2ecf20Sopenharmony_cibad_fork_free:
24238c2ecf20Sopenharmony_ci	p->state = TASK_DEAD;
24248c2ecf20Sopenharmony_ci	put_task_stack(p);
24258c2ecf20Sopenharmony_ci	delayed_free_task(p);
24268c2ecf20Sopenharmony_cifork_out:
24278c2ecf20Sopenharmony_ci	spin_lock_irq(&current->sighand->siglock);
24288c2ecf20Sopenharmony_ci	hlist_del_init(&delayed.node);
24298c2ecf20Sopenharmony_ci	spin_unlock_irq(&current->sighand->siglock);
24308c2ecf20Sopenharmony_ci	return ERR_PTR(retval);
24318c2ecf20Sopenharmony_ci}
24328c2ecf20Sopenharmony_ci
24338c2ecf20Sopenharmony_cistatic inline void init_idle_pids(struct task_struct *idle)
24348c2ecf20Sopenharmony_ci{
24358c2ecf20Sopenharmony_ci	enum pid_type type;
24368c2ecf20Sopenharmony_ci
24378c2ecf20Sopenharmony_ci	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
24388c2ecf20Sopenharmony_ci		INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */
24398c2ecf20Sopenharmony_ci		init_task_pid(idle, type, &init_struct_pid);
24408c2ecf20Sopenharmony_ci	}
24418c2ecf20Sopenharmony_ci}
24428c2ecf20Sopenharmony_ci
24438c2ecf20Sopenharmony_cistruct task_struct * __init fork_idle(int cpu)
24448c2ecf20Sopenharmony_ci{
24458c2ecf20Sopenharmony_ci	struct task_struct *task;
24468c2ecf20Sopenharmony_ci	struct kernel_clone_args args = {
24478c2ecf20Sopenharmony_ci		.flags = CLONE_VM,
24488c2ecf20Sopenharmony_ci	};
24498c2ecf20Sopenharmony_ci
24508c2ecf20Sopenharmony_ci	task = copy_process(&init_struct_pid, 0, cpu_to_node(cpu), &args);
24518c2ecf20Sopenharmony_ci	if (!IS_ERR(task)) {
24528c2ecf20Sopenharmony_ci		init_idle_pids(task);
24538c2ecf20Sopenharmony_ci		init_idle(task, cpu);
24548c2ecf20Sopenharmony_ci	}
24558c2ecf20Sopenharmony_ci
24568c2ecf20Sopenharmony_ci	return task;
24578c2ecf20Sopenharmony_ci}
24588c2ecf20Sopenharmony_ci
24598c2ecf20Sopenharmony_ci/*
24608c2ecf20Sopenharmony_ci * This is like kernel_clone(), but shaved down and tailored to just
24618c2ecf20Sopenharmony_ci * creating io_uring workers. It returns a created task, or an error pointer.
24628c2ecf20Sopenharmony_ci * The returned task is inactive, and the caller must fire it up through
24638c2ecf20Sopenharmony_ci * wake_up_new_task(p). All signals are blocked in the created task.
24648c2ecf20Sopenharmony_ci */
24658c2ecf20Sopenharmony_cistruct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
24668c2ecf20Sopenharmony_ci{
24678c2ecf20Sopenharmony_ci	unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
24688c2ecf20Sopenharmony_ci				CLONE_IO;
24698c2ecf20Sopenharmony_ci	struct kernel_clone_args args = {
24708c2ecf20Sopenharmony_ci		.flags		= ((lower_32_bits(flags) | CLONE_VM |
24718c2ecf20Sopenharmony_ci				    CLONE_UNTRACED) & ~CSIGNAL),
24728c2ecf20Sopenharmony_ci		.exit_signal	= (lower_32_bits(flags) & CSIGNAL),
24738c2ecf20Sopenharmony_ci		.stack		= (unsigned long)fn,
24748c2ecf20Sopenharmony_ci		.stack_size	= (unsigned long)arg,
24758c2ecf20Sopenharmony_ci		.io_thread	= 1,
24768c2ecf20Sopenharmony_ci	};
24778c2ecf20Sopenharmony_ci
24788c2ecf20Sopenharmony_ci	return copy_process(NULL, 0, node, &args);
24798c2ecf20Sopenharmony_ci}
24808c2ecf20Sopenharmony_ci
24818c2ecf20Sopenharmony_ci/*
24828c2ecf20Sopenharmony_ci *  Ok, this is the main fork-routine.
24838c2ecf20Sopenharmony_ci *
24848c2ecf20Sopenharmony_ci * It copies the process, and if successful kick-starts
24858c2ecf20Sopenharmony_ci * it and waits for it to finish using the VM if required.
24868c2ecf20Sopenharmony_ci *
24878c2ecf20Sopenharmony_ci * args->exit_signal is expected to be checked for sanity by the caller.
24888c2ecf20Sopenharmony_ci */
24898c2ecf20Sopenharmony_cipid_t kernel_clone(struct kernel_clone_args *args)
24908c2ecf20Sopenharmony_ci{
24918c2ecf20Sopenharmony_ci	u64 clone_flags = args->flags;
24928c2ecf20Sopenharmony_ci	struct completion vfork;
24938c2ecf20Sopenharmony_ci	struct pid *pid;
24948c2ecf20Sopenharmony_ci	struct task_struct *p;
24958c2ecf20Sopenharmony_ci	int trace = 0;
24968c2ecf20Sopenharmony_ci	pid_t nr;
24978c2ecf20Sopenharmony_ci
24988c2ecf20Sopenharmony_ci	/*
24998c2ecf20Sopenharmony_ci	 * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument
25008c2ecf20Sopenharmony_ci	 * to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are
25018c2ecf20Sopenharmony_ci	 * mutually exclusive. With clone3() CLONE_PIDFD has grown a separate
25028c2ecf20Sopenharmony_ci	 * field in struct clone_args and it still doesn't make sense to have
25038c2ecf20Sopenharmony_ci	 * them both point at the same memory location. Performing this check
25048c2ecf20Sopenharmony_ci	 * here has the advantage that we don't need to have a separate helper
25058c2ecf20Sopenharmony_ci	 * to check for legacy clone().
25068c2ecf20Sopenharmony_ci	 */
25078c2ecf20Sopenharmony_ci	if ((args->flags & CLONE_PIDFD) &&
25088c2ecf20Sopenharmony_ci	    (args->flags & CLONE_PARENT_SETTID) &&
25098c2ecf20Sopenharmony_ci	    (args->pidfd == args->parent_tid))
25108c2ecf20Sopenharmony_ci		return -EINVAL;
25118c2ecf20Sopenharmony_ci
25128c2ecf20Sopenharmony_ci	/*
25138c2ecf20Sopenharmony_ci	 * Determine whether and which event to report to ptracer.  When
25148c2ecf20Sopenharmony_ci	 * called from kernel_thread or CLONE_UNTRACED is explicitly
25158c2ecf20Sopenharmony_ci	 * requested, no event is reported; otherwise, report if the event
25168c2ecf20Sopenharmony_ci	 * for the type of forking is enabled.
25178c2ecf20Sopenharmony_ci	 */
25188c2ecf20Sopenharmony_ci	if (!(clone_flags & CLONE_UNTRACED)) {
25198c2ecf20Sopenharmony_ci		if (clone_flags & CLONE_VFORK)
25208c2ecf20Sopenharmony_ci			trace = PTRACE_EVENT_VFORK;
25218c2ecf20Sopenharmony_ci		else if (args->exit_signal != SIGCHLD)
25228c2ecf20Sopenharmony_ci			trace = PTRACE_EVENT_CLONE;
25238c2ecf20Sopenharmony_ci		else
25248c2ecf20Sopenharmony_ci			trace = PTRACE_EVENT_FORK;
25258c2ecf20Sopenharmony_ci
25268c2ecf20Sopenharmony_ci		if (likely(!ptrace_event_enabled(current, trace)))
25278c2ecf20Sopenharmony_ci			trace = 0;
25288c2ecf20Sopenharmony_ci	}
25298c2ecf20Sopenharmony_ci
25308c2ecf20Sopenharmony_ci	p = copy_process(NULL, trace, NUMA_NO_NODE, args);
25318c2ecf20Sopenharmony_ci	add_latent_entropy();
25328c2ecf20Sopenharmony_ci
25338c2ecf20Sopenharmony_ci	if (IS_ERR(p))
25348c2ecf20Sopenharmony_ci		return PTR_ERR(p);
25358c2ecf20Sopenharmony_ci
25368c2ecf20Sopenharmony_ci	/*
25378c2ecf20Sopenharmony_ci	 * Do this prior waking up the new thread - the thread pointer
25388c2ecf20Sopenharmony_ci	 * might get invalid after that point, if the thread exits quickly.
25398c2ecf20Sopenharmony_ci	 */
25408c2ecf20Sopenharmony_ci	trace_sched_process_fork(current, p);
25418c2ecf20Sopenharmony_ci
25428c2ecf20Sopenharmony_ci	pid = get_task_pid(p, PIDTYPE_PID);
25438c2ecf20Sopenharmony_ci	nr = pid_vnr(pid);
25448c2ecf20Sopenharmony_ci
25458c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_PARENT_SETTID)
25468c2ecf20Sopenharmony_ci		put_user(nr, args->parent_tid);
25478c2ecf20Sopenharmony_ci
25488c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_VFORK) {
25498c2ecf20Sopenharmony_ci		p->vfork_done = &vfork;
25508c2ecf20Sopenharmony_ci		init_completion(&vfork);
25518c2ecf20Sopenharmony_ci		get_task_struct(p);
25528c2ecf20Sopenharmony_ci	}
25538c2ecf20Sopenharmony_ci
25548c2ecf20Sopenharmony_ci	CALL_HCK_LITE_HOOK(ced_kernel_clone_lhck, p);
25558c2ecf20Sopenharmony_ci	wake_up_new_task(p);
25568c2ecf20Sopenharmony_ci
25578c2ecf20Sopenharmony_ci	/* forking complete and child started to run, tell ptracer */
25588c2ecf20Sopenharmony_ci	if (unlikely(trace))
25598c2ecf20Sopenharmony_ci		ptrace_event_pid(trace, pid);
25608c2ecf20Sopenharmony_ci
25618c2ecf20Sopenharmony_ci	if (clone_flags & CLONE_VFORK) {
25628c2ecf20Sopenharmony_ci		if (!wait_for_vfork_done(p, &vfork))
25638c2ecf20Sopenharmony_ci			ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
25648c2ecf20Sopenharmony_ci	}
25658c2ecf20Sopenharmony_ci
25668c2ecf20Sopenharmony_ci	put_pid(pid);
25678c2ecf20Sopenharmony_ci	return nr;
25688c2ecf20Sopenharmony_ci}
25698c2ecf20Sopenharmony_ci
25708c2ecf20Sopenharmony_ci/*
25718c2ecf20Sopenharmony_ci * Create a kernel thread.
25728c2ecf20Sopenharmony_ci */
25738c2ecf20Sopenharmony_cipid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
25748c2ecf20Sopenharmony_ci{
25758c2ecf20Sopenharmony_ci	struct kernel_clone_args args = {
25768c2ecf20Sopenharmony_ci		.flags		= ((lower_32_bits(flags) | CLONE_VM |
25778c2ecf20Sopenharmony_ci				    CLONE_UNTRACED) & ~CSIGNAL),
25788c2ecf20Sopenharmony_ci		.exit_signal	= (lower_32_bits(flags) & CSIGNAL),
25798c2ecf20Sopenharmony_ci		.stack		= (unsigned long)fn,
25808c2ecf20Sopenharmony_ci		.stack_size	= (unsigned long)arg,
25818c2ecf20Sopenharmony_ci	};
25828c2ecf20Sopenharmony_ci
25838c2ecf20Sopenharmony_ci	return kernel_clone(&args);
25848c2ecf20Sopenharmony_ci}
25858c2ecf20Sopenharmony_ci
25868c2ecf20Sopenharmony_ci#ifdef __ARCH_WANT_SYS_FORK
25878c2ecf20Sopenharmony_ciSYSCALL_DEFINE0(fork)
25888c2ecf20Sopenharmony_ci{
25898c2ecf20Sopenharmony_ci#ifdef CONFIG_MMU
25908c2ecf20Sopenharmony_ci	struct kernel_clone_args args = {
25918c2ecf20Sopenharmony_ci		.exit_signal = SIGCHLD,
25928c2ecf20Sopenharmony_ci	};
25938c2ecf20Sopenharmony_ci
25948c2ecf20Sopenharmony_ci	return kernel_clone(&args);
25958c2ecf20Sopenharmony_ci#else
25968c2ecf20Sopenharmony_ci	/* can not support in nommu mode */
25978c2ecf20Sopenharmony_ci	return -EINVAL;
25988c2ecf20Sopenharmony_ci#endif
25998c2ecf20Sopenharmony_ci}
26008c2ecf20Sopenharmony_ci#endif
26018c2ecf20Sopenharmony_ci
26028c2ecf20Sopenharmony_ci#ifdef __ARCH_WANT_SYS_VFORK
26038c2ecf20Sopenharmony_ciSYSCALL_DEFINE0(vfork)
26048c2ecf20Sopenharmony_ci{
26058c2ecf20Sopenharmony_ci	struct kernel_clone_args args = {
26068c2ecf20Sopenharmony_ci		.flags		= CLONE_VFORK | CLONE_VM,
26078c2ecf20Sopenharmony_ci		.exit_signal	= SIGCHLD,
26088c2ecf20Sopenharmony_ci	};
26098c2ecf20Sopenharmony_ci
26108c2ecf20Sopenharmony_ci	return kernel_clone(&args);
26118c2ecf20Sopenharmony_ci}
26128c2ecf20Sopenharmony_ci#endif
26138c2ecf20Sopenharmony_ci
26148c2ecf20Sopenharmony_ci#ifdef __ARCH_WANT_SYS_CLONE
26158c2ecf20Sopenharmony_ci#ifdef CONFIG_CLONE_BACKWARDS
26168c2ecf20Sopenharmony_ciSYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
26178c2ecf20Sopenharmony_ci		 int __user *, parent_tidptr,
26188c2ecf20Sopenharmony_ci		 unsigned long, tls,
26198c2ecf20Sopenharmony_ci		 int __user *, child_tidptr)
26208c2ecf20Sopenharmony_ci#elif defined(CONFIG_CLONE_BACKWARDS2)
26218c2ecf20Sopenharmony_ciSYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
26228c2ecf20Sopenharmony_ci		 int __user *, parent_tidptr,
26238c2ecf20Sopenharmony_ci		 int __user *, child_tidptr,
26248c2ecf20Sopenharmony_ci		 unsigned long, tls)
26258c2ecf20Sopenharmony_ci#elif defined(CONFIG_CLONE_BACKWARDS3)
26268c2ecf20Sopenharmony_ciSYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
26278c2ecf20Sopenharmony_ci		int, stack_size,
26288c2ecf20Sopenharmony_ci		int __user *, parent_tidptr,
26298c2ecf20Sopenharmony_ci		int __user *, child_tidptr,
26308c2ecf20Sopenharmony_ci		unsigned long, tls)
26318c2ecf20Sopenharmony_ci#else
26328c2ecf20Sopenharmony_ciSYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
26338c2ecf20Sopenharmony_ci		 int __user *, parent_tidptr,
26348c2ecf20Sopenharmony_ci		 int __user *, child_tidptr,
26358c2ecf20Sopenharmony_ci		 unsigned long, tls)
26368c2ecf20Sopenharmony_ci#endif
26378c2ecf20Sopenharmony_ci{
26388c2ecf20Sopenharmony_ci	struct kernel_clone_args args = {
26398c2ecf20Sopenharmony_ci		.flags		= (lower_32_bits(clone_flags) & ~CSIGNAL),
26408c2ecf20Sopenharmony_ci		.pidfd		= parent_tidptr,
26418c2ecf20Sopenharmony_ci		.child_tid	= child_tidptr,
26428c2ecf20Sopenharmony_ci		.parent_tid	= parent_tidptr,
26438c2ecf20Sopenharmony_ci		.exit_signal	= (lower_32_bits(clone_flags) & CSIGNAL),
26448c2ecf20Sopenharmony_ci		.stack		= newsp,
26458c2ecf20Sopenharmony_ci		.tls		= tls,
26468c2ecf20Sopenharmony_ci	};
26478c2ecf20Sopenharmony_ci
26488c2ecf20Sopenharmony_ci	return kernel_clone(&args);
26498c2ecf20Sopenharmony_ci}
26508c2ecf20Sopenharmony_ci#endif
26518c2ecf20Sopenharmony_ci
26528c2ecf20Sopenharmony_ci#ifdef __ARCH_WANT_SYS_CLONE3
26538c2ecf20Sopenharmony_ci
26548c2ecf20Sopenharmony_cinoinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
26558c2ecf20Sopenharmony_ci					      struct clone_args __user *uargs,
26568c2ecf20Sopenharmony_ci					      size_t usize)
26578c2ecf20Sopenharmony_ci{
26588c2ecf20Sopenharmony_ci	int err;
26598c2ecf20Sopenharmony_ci	struct clone_args args;
26608c2ecf20Sopenharmony_ci	pid_t *kset_tid = kargs->set_tid;
26618c2ecf20Sopenharmony_ci
26628c2ecf20Sopenharmony_ci	BUILD_BUG_ON(offsetofend(struct clone_args, tls) !=
26638c2ecf20Sopenharmony_ci		     CLONE_ARGS_SIZE_VER0);
26648c2ecf20Sopenharmony_ci	BUILD_BUG_ON(offsetofend(struct clone_args, set_tid_size) !=
26658c2ecf20Sopenharmony_ci		     CLONE_ARGS_SIZE_VER1);
26668c2ecf20Sopenharmony_ci	BUILD_BUG_ON(offsetofend(struct clone_args, cgroup) !=
26678c2ecf20Sopenharmony_ci		     CLONE_ARGS_SIZE_VER2);
26688c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2);
26698c2ecf20Sopenharmony_ci
26708c2ecf20Sopenharmony_ci	if (unlikely(usize > PAGE_SIZE))
26718c2ecf20Sopenharmony_ci		return -E2BIG;
26728c2ecf20Sopenharmony_ci	if (unlikely(usize < CLONE_ARGS_SIZE_VER0))
26738c2ecf20Sopenharmony_ci		return -EINVAL;
26748c2ecf20Sopenharmony_ci
26758c2ecf20Sopenharmony_ci	err = copy_struct_from_user(&args, sizeof(args), uargs, usize);
26768c2ecf20Sopenharmony_ci	if (err)
26778c2ecf20Sopenharmony_ci		return err;
26788c2ecf20Sopenharmony_ci
26798c2ecf20Sopenharmony_ci	if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL))
26808c2ecf20Sopenharmony_ci		return -EINVAL;
26818c2ecf20Sopenharmony_ci
26828c2ecf20Sopenharmony_ci	if (unlikely(!args.set_tid && args.set_tid_size > 0))
26838c2ecf20Sopenharmony_ci		return -EINVAL;
26848c2ecf20Sopenharmony_ci
26858c2ecf20Sopenharmony_ci	if (unlikely(args.set_tid && args.set_tid_size == 0))
26868c2ecf20Sopenharmony_ci		return -EINVAL;
26878c2ecf20Sopenharmony_ci
26888c2ecf20Sopenharmony_ci	/*
26898c2ecf20Sopenharmony_ci	 * Verify that higher 32bits of exit_signal are unset and that
26908c2ecf20Sopenharmony_ci	 * it is a valid signal
26918c2ecf20Sopenharmony_ci	 */
26928c2ecf20Sopenharmony_ci	if (unlikely((args.exit_signal & ~((u64)CSIGNAL)) ||
26938c2ecf20Sopenharmony_ci		     !valid_signal(args.exit_signal)))
26948c2ecf20Sopenharmony_ci		return -EINVAL;
26958c2ecf20Sopenharmony_ci
26968c2ecf20Sopenharmony_ci	if ((args.flags & CLONE_INTO_CGROUP) &&
26978c2ecf20Sopenharmony_ci	    (args.cgroup > INT_MAX || usize < CLONE_ARGS_SIZE_VER2))
26988c2ecf20Sopenharmony_ci		return -EINVAL;
26998c2ecf20Sopenharmony_ci
27008c2ecf20Sopenharmony_ci	*kargs = (struct kernel_clone_args){
27018c2ecf20Sopenharmony_ci		.flags		= args.flags,
27028c2ecf20Sopenharmony_ci		.pidfd		= u64_to_user_ptr(args.pidfd),
27038c2ecf20Sopenharmony_ci		.child_tid	= u64_to_user_ptr(args.child_tid),
27048c2ecf20Sopenharmony_ci		.parent_tid	= u64_to_user_ptr(args.parent_tid),
27058c2ecf20Sopenharmony_ci		.exit_signal	= args.exit_signal,
27068c2ecf20Sopenharmony_ci		.stack		= args.stack,
27078c2ecf20Sopenharmony_ci		.stack_size	= args.stack_size,
27088c2ecf20Sopenharmony_ci		.tls		= args.tls,
27098c2ecf20Sopenharmony_ci		.set_tid_size	= args.set_tid_size,
27108c2ecf20Sopenharmony_ci		.cgroup		= args.cgroup,
27118c2ecf20Sopenharmony_ci	};
27128c2ecf20Sopenharmony_ci
27138c2ecf20Sopenharmony_ci	if (args.set_tid &&
27148c2ecf20Sopenharmony_ci		copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid),
27158c2ecf20Sopenharmony_ci			(kargs->set_tid_size * sizeof(pid_t))))
27168c2ecf20Sopenharmony_ci		return -EFAULT;
27178c2ecf20Sopenharmony_ci
27188c2ecf20Sopenharmony_ci	kargs->set_tid = kset_tid;
27198c2ecf20Sopenharmony_ci
27208c2ecf20Sopenharmony_ci	return 0;
27218c2ecf20Sopenharmony_ci}
27228c2ecf20Sopenharmony_ci
27238c2ecf20Sopenharmony_ci/**
27248c2ecf20Sopenharmony_ci * clone3_stack_valid - check and prepare stack
27258c2ecf20Sopenharmony_ci * @kargs: kernel clone args
27268c2ecf20Sopenharmony_ci *
27278c2ecf20Sopenharmony_ci * Verify that the stack arguments userspace gave us are sane.
27288c2ecf20Sopenharmony_ci * In addition, set the stack direction for userspace since it's easy for us to
27298c2ecf20Sopenharmony_ci * determine.
27308c2ecf20Sopenharmony_ci */
27318c2ecf20Sopenharmony_cistatic inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
27328c2ecf20Sopenharmony_ci{
27338c2ecf20Sopenharmony_ci	if (kargs->stack == 0) {
27348c2ecf20Sopenharmony_ci		if (kargs->stack_size > 0)
27358c2ecf20Sopenharmony_ci			return false;
27368c2ecf20Sopenharmony_ci	} else {
27378c2ecf20Sopenharmony_ci		if (kargs->stack_size == 0)
27388c2ecf20Sopenharmony_ci			return false;
27398c2ecf20Sopenharmony_ci
27408c2ecf20Sopenharmony_ci		if (!access_ok((void __user *)kargs->stack, kargs->stack_size))
27418c2ecf20Sopenharmony_ci			return false;
27428c2ecf20Sopenharmony_ci
27438c2ecf20Sopenharmony_ci#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64)
27448c2ecf20Sopenharmony_ci		kargs->stack += kargs->stack_size;
27458c2ecf20Sopenharmony_ci#endif
27468c2ecf20Sopenharmony_ci	}
27478c2ecf20Sopenharmony_ci
27488c2ecf20Sopenharmony_ci	return true;
27498c2ecf20Sopenharmony_ci}
27508c2ecf20Sopenharmony_ci
27518c2ecf20Sopenharmony_cistatic bool clone3_args_valid(struct kernel_clone_args *kargs)
27528c2ecf20Sopenharmony_ci{
27538c2ecf20Sopenharmony_ci	/* Verify that no unknown flags are passed along. */
27548c2ecf20Sopenharmony_ci	if (kargs->flags &
27558c2ecf20Sopenharmony_ci	    ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
27568c2ecf20Sopenharmony_ci		return false;
27578c2ecf20Sopenharmony_ci
27588c2ecf20Sopenharmony_ci	/*
27598c2ecf20Sopenharmony_ci	 * - make the CLONE_DETACHED bit reuseable for clone3
27608c2ecf20Sopenharmony_ci	 * - make the CSIGNAL bits reuseable for clone3
27618c2ecf20Sopenharmony_ci	 */
27628c2ecf20Sopenharmony_ci	if (kargs->flags & (CLONE_DETACHED | (CSIGNAL & (~CLONE_NEWTIME))))
27638c2ecf20Sopenharmony_ci		return false;
27648c2ecf20Sopenharmony_ci
27658c2ecf20Sopenharmony_ci	if ((kargs->flags & (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND)) ==
27668c2ecf20Sopenharmony_ci	    (CLONE_SIGHAND | CLONE_CLEAR_SIGHAND))
27678c2ecf20Sopenharmony_ci		return false;
27688c2ecf20Sopenharmony_ci
27698c2ecf20Sopenharmony_ci	if ((kargs->flags & (CLONE_THREAD | CLONE_PARENT)) &&
27708c2ecf20Sopenharmony_ci	    kargs->exit_signal)
27718c2ecf20Sopenharmony_ci		return false;
27728c2ecf20Sopenharmony_ci
27738c2ecf20Sopenharmony_ci	if (!clone3_stack_valid(kargs))
27748c2ecf20Sopenharmony_ci		return false;
27758c2ecf20Sopenharmony_ci
27768c2ecf20Sopenharmony_ci	return true;
27778c2ecf20Sopenharmony_ci}
27788c2ecf20Sopenharmony_ci
27798c2ecf20Sopenharmony_ci/**
27808c2ecf20Sopenharmony_ci * clone3 - create a new process with specific properties
27818c2ecf20Sopenharmony_ci * @uargs: argument structure
27828c2ecf20Sopenharmony_ci * @size:  size of @uargs
27838c2ecf20Sopenharmony_ci *
27848c2ecf20Sopenharmony_ci * clone3() is the extensible successor to clone()/clone2().
27858c2ecf20Sopenharmony_ci * It takes a struct as argument that is versioned by its size.
27868c2ecf20Sopenharmony_ci *
27878c2ecf20Sopenharmony_ci * Return: On success, a positive PID for the child process.
27888c2ecf20Sopenharmony_ci *         On error, a negative errno number.
27898c2ecf20Sopenharmony_ci */
27908c2ecf20Sopenharmony_ciSYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
27918c2ecf20Sopenharmony_ci{
27928c2ecf20Sopenharmony_ci	int err;
27938c2ecf20Sopenharmony_ci
27948c2ecf20Sopenharmony_ci	struct kernel_clone_args kargs;
27958c2ecf20Sopenharmony_ci	pid_t set_tid[MAX_PID_NS_LEVEL];
27968c2ecf20Sopenharmony_ci
27978c2ecf20Sopenharmony_ci	kargs.set_tid = set_tid;
27988c2ecf20Sopenharmony_ci
27998c2ecf20Sopenharmony_ci	err = copy_clone_args_from_user(&kargs, uargs, size);
28008c2ecf20Sopenharmony_ci	if (err)
28018c2ecf20Sopenharmony_ci		return err;
28028c2ecf20Sopenharmony_ci
28038c2ecf20Sopenharmony_ci	if (!clone3_args_valid(&kargs))
28048c2ecf20Sopenharmony_ci		return -EINVAL;
28058c2ecf20Sopenharmony_ci
28068c2ecf20Sopenharmony_ci	return kernel_clone(&kargs);
28078c2ecf20Sopenharmony_ci}
28088c2ecf20Sopenharmony_ci#endif
28098c2ecf20Sopenharmony_ci
28108c2ecf20Sopenharmony_civoid walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data)
28118c2ecf20Sopenharmony_ci{
28128c2ecf20Sopenharmony_ci	struct task_struct *leader, *parent, *child;
28138c2ecf20Sopenharmony_ci	int res;
28148c2ecf20Sopenharmony_ci
28158c2ecf20Sopenharmony_ci	read_lock(&tasklist_lock);
28168c2ecf20Sopenharmony_ci	leader = top = top->group_leader;
28178c2ecf20Sopenharmony_cidown:
28188c2ecf20Sopenharmony_ci	for_each_thread(leader, parent) {
28198c2ecf20Sopenharmony_ci		list_for_each_entry(child, &parent->children, sibling) {
28208c2ecf20Sopenharmony_ci			res = visitor(child, data);
28218c2ecf20Sopenharmony_ci			if (res) {
28228c2ecf20Sopenharmony_ci				if (res < 0)
28238c2ecf20Sopenharmony_ci					goto out;
28248c2ecf20Sopenharmony_ci				leader = child;
28258c2ecf20Sopenharmony_ci				goto down;
28268c2ecf20Sopenharmony_ci			}
28278c2ecf20Sopenharmony_ciup:
28288c2ecf20Sopenharmony_ci			;
28298c2ecf20Sopenharmony_ci		}
28308c2ecf20Sopenharmony_ci	}
28318c2ecf20Sopenharmony_ci
28328c2ecf20Sopenharmony_ci	if (leader != top) {
28338c2ecf20Sopenharmony_ci		child = leader;
28348c2ecf20Sopenharmony_ci		parent = child->real_parent;
28358c2ecf20Sopenharmony_ci		leader = parent->group_leader;
28368c2ecf20Sopenharmony_ci		goto up;
28378c2ecf20Sopenharmony_ci	}
28388c2ecf20Sopenharmony_ciout:
28398c2ecf20Sopenharmony_ci	read_unlock(&tasklist_lock);
28408c2ecf20Sopenharmony_ci}
28418c2ecf20Sopenharmony_ci
28428c2ecf20Sopenharmony_ci#ifndef ARCH_MIN_MMSTRUCT_ALIGN
28438c2ecf20Sopenharmony_ci#define ARCH_MIN_MMSTRUCT_ALIGN 0
28448c2ecf20Sopenharmony_ci#endif
28458c2ecf20Sopenharmony_ci
28468c2ecf20Sopenharmony_cistatic void sighand_ctor(void *data)
28478c2ecf20Sopenharmony_ci{
28488c2ecf20Sopenharmony_ci	struct sighand_struct *sighand = data;
28498c2ecf20Sopenharmony_ci
28508c2ecf20Sopenharmony_ci	spin_lock_init(&sighand->siglock);
28518c2ecf20Sopenharmony_ci	init_waitqueue_head(&sighand->signalfd_wqh);
28528c2ecf20Sopenharmony_ci}
28538c2ecf20Sopenharmony_ci
28548c2ecf20Sopenharmony_civoid __init mm_cache_init(void)
28558c2ecf20Sopenharmony_ci{
28568c2ecf20Sopenharmony_ci	unsigned int mm_size;
28578c2ecf20Sopenharmony_ci
28588c2ecf20Sopenharmony_ci	/*
28598c2ecf20Sopenharmony_ci	 * The mm_cpumask is located at the end of mm_struct, and is
28608c2ecf20Sopenharmony_ci	 * dynamically sized based on the maximum CPU number this system
28618c2ecf20Sopenharmony_ci	 * can have, taking hotplug into account (nr_cpu_ids).
28628c2ecf20Sopenharmony_ci	 */
28638c2ecf20Sopenharmony_ci	mm_size = sizeof(struct mm_struct) + cpumask_size();
28648c2ecf20Sopenharmony_ci
28658c2ecf20Sopenharmony_ci	mm_cachep = kmem_cache_create_usercopy("mm_struct",
28668c2ecf20Sopenharmony_ci			mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
28678c2ecf20Sopenharmony_ci			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
28688c2ecf20Sopenharmony_ci			offsetof(struct mm_struct, saved_auxv),
28698c2ecf20Sopenharmony_ci			sizeof_field(struct mm_struct, saved_auxv),
28708c2ecf20Sopenharmony_ci			NULL);
28718c2ecf20Sopenharmony_ci}
28728c2ecf20Sopenharmony_ci
28738c2ecf20Sopenharmony_civoid __init proc_caches_init(void)
28748c2ecf20Sopenharmony_ci{
28758c2ecf20Sopenharmony_ci	sighand_cachep = kmem_cache_create("sighand_cache",
28768c2ecf20Sopenharmony_ci			sizeof(struct sighand_struct), 0,
28778c2ecf20Sopenharmony_ci			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
28788c2ecf20Sopenharmony_ci			SLAB_ACCOUNT, sighand_ctor);
28798c2ecf20Sopenharmony_ci	signal_cachep = kmem_cache_create("signal_cache",
28808c2ecf20Sopenharmony_ci			sizeof(struct signal_struct), 0,
28818c2ecf20Sopenharmony_ci			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
28828c2ecf20Sopenharmony_ci			NULL);
28838c2ecf20Sopenharmony_ci	files_cachep = kmem_cache_create("files_cache",
28848c2ecf20Sopenharmony_ci			sizeof(struct files_struct), 0,
28858c2ecf20Sopenharmony_ci			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
28868c2ecf20Sopenharmony_ci			NULL);
28878c2ecf20Sopenharmony_ci	fs_cachep = kmem_cache_create("fs_cache",
28888c2ecf20Sopenharmony_ci			sizeof(struct fs_struct), 0,
28898c2ecf20Sopenharmony_ci			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
28908c2ecf20Sopenharmony_ci			NULL);
28918c2ecf20Sopenharmony_ci
28928c2ecf20Sopenharmony_ci	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
28938c2ecf20Sopenharmony_ci	mmap_init();
28948c2ecf20Sopenharmony_ci	nsproxy_cache_init();
28958c2ecf20Sopenharmony_ci}
28968c2ecf20Sopenharmony_ci
28978c2ecf20Sopenharmony_ci/*
28988c2ecf20Sopenharmony_ci * Check constraints on flags passed to the unshare system call.
28998c2ecf20Sopenharmony_ci */
29008c2ecf20Sopenharmony_cistatic int check_unshare_flags(unsigned long unshare_flags)
29018c2ecf20Sopenharmony_ci{
29028c2ecf20Sopenharmony_ci	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
29038c2ecf20Sopenharmony_ci				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
29048c2ecf20Sopenharmony_ci				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
29058c2ecf20Sopenharmony_ci				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP|
29068c2ecf20Sopenharmony_ci				CLONE_NEWTIME))
29078c2ecf20Sopenharmony_ci		return -EINVAL;
29088c2ecf20Sopenharmony_ci	/*
29098c2ecf20Sopenharmony_ci	 * Not implemented, but pretend it works if there is nothing
29108c2ecf20Sopenharmony_ci	 * to unshare.  Note that unsharing the address space or the
29118c2ecf20Sopenharmony_ci	 * signal handlers also need to unshare the signal queues (aka
29128c2ecf20Sopenharmony_ci	 * CLONE_THREAD).
29138c2ecf20Sopenharmony_ci	 */
29148c2ecf20Sopenharmony_ci	if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
29158c2ecf20Sopenharmony_ci		if (!thread_group_empty(current))
29168c2ecf20Sopenharmony_ci			return -EINVAL;
29178c2ecf20Sopenharmony_ci	}
29188c2ecf20Sopenharmony_ci	if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
29198c2ecf20Sopenharmony_ci		if (refcount_read(&current->sighand->count) > 1)
29208c2ecf20Sopenharmony_ci			return -EINVAL;
29218c2ecf20Sopenharmony_ci	}
29228c2ecf20Sopenharmony_ci	if (unshare_flags & CLONE_VM) {
29238c2ecf20Sopenharmony_ci		if (!current_is_single_threaded())
29248c2ecf20Sopenharmony_ci			return -EINVAL;
29258c2ecf20Sopenharmony_ci	}
29268c2ecf20Sopenharmony_ci
29278c2ecf20Sopenharmony_ci	return 0;
29288c2ecf20Sopenharmony_ci}
29298c2ecf20Sopenharmony_ci
29308c2ecf20Sopenharmony_ci/*
29318c2ecf20Sopenharmony_ci * Unshare the filesystem structure if it is being shared
29328c2ecf20Sopenharmony_ci */
29338c2ecf20Sopenharmony_cistatic int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
29348c2ecf20Sopenharmony_ci{
29358c2ecf20Sopenharmony_ci	struct fs_struct *fs = current->fs;
29368c2ecf20Sopenharmony_ci
29378c2ecf20Sopenharmony_ci	if (!(unshare_flags & CLONE_FS) || !fs)
29388c2ecf20Sopenharmony_ci		return 0;
29398c2ecf20Sopenharmony_ci
29408c2ecf20Sopenharmony_ci	/* don't need lock here; in the worst case we'll do useless copy */
29418c2ecf20Sopenharmony_ci	if (fs->users == 1)
29428c2ecf20Sopenharmony_ci		return 0;
29438c2ecf20Sopenharmony_ci
29448c2ecf20Sopenharmony_ci	*new_fsp = copy_fs_struct(fs);
29458c2ecf20Sopenharmony_ci	if (!*new_fsp)
29468c2ecf20Sopenharmony_ci		return -ENOMEM;
29478c2ecf20Sopenharmony_ci
29488c2ecf20Sopenharmony_ci	return 0;
29498c2ecf20Sopenharmony_ci}
29508c2ecf20Sopenharmony_ci
29518c2ecf20Sopenharmony_ci/*
29528c2ecf20Sopenharmony_ci * Unshare file descriptor table if it is being shared
29538c2ecf20Sopenharmony_ci */
29548c2ecf20Sopenharmony_ciint unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
29558c2ecf20Sopenharmony_ci	       struct files_struct **new_fdp)
29568c2ecf20Sopenharmony_ci{
29578c2ecf20Sopenharmony_ci	struct files_struct *fd = current->files;
29588c2ecf20Sopenharmony_ci	int error = 0;
29598c2ecf20Sopenharmony_ci
29608c2ecf20Sopenharmony_ci	if ((unshare_flags & CLONE_FILES) &&
29618c2ecf20Sopenharmony_ci	    (fd && atomic_read(&fd->count) > 1)) {
29628c2ecf20Sopenharmony_ci		*new_fdp = dup_fd(fd, max_fds, &error);
29638c2ecf20Sopenharmony_ci		if (!*new_fdp)
29648c2ecf20Sopenharmony_ci			return error;
29658c2ecf20Sopenharmony_ci	}
29668c2ecf20Sopenharmony_ci
29678c2ecf20Sopenharmony_ci	return 0;
29688c2ecf20Sopenharmony_ci}
29698c2ecf20Sopenharmony_ci
29708c2ecf20Sopenharmony_ci/*
29718c2ecf20Sopenharmony_ci * unshare allows a process to 'unshare' part of the process
29728c2ecf20Sopenharmony_ci * context which was originally shared using clone.  copy_*
29738c2ecf20Sopenharmony_ci * functions used by kernel_clone() cannot be used here directly
29748c2ecf20Sopenharmony_ci * because they modify an inactive task_struct that is being
29758c2ecf20Sopenharmony_ci * constructed. Here we are modifying the current, active,
29768c2ecf20Sopenharmony_ci * task_struct.
29778c2ecf20Sopenharmony_ci */
29788c2ecf20Sopenharmony_ciint ksys_unshare(unsigned long unshare_flags)
29798c2ecf20Sopenharmony_ci{
29808c2ecf20Sopenharmony_ci	struct fs_struct *fs, *new_fs = NULL;
29818c2ecf20Sopenharmony_ci	struct files_struct *fd, *new_fd = NULL;
29828c2ecf20Sopenharmony_ci	struct cred *new_cred = NULL;
29838c2ecf20Sopenharmony_ci	struct nsproxy *new_nsproxy = NULL;
29848c2ecf20Sopenharmony_ci	int do_sysvsem = 0;
29858c2ecf20Sopenharmony_ci	int err;
29868c2ecf20Sopenharmony_ci
29878c2ecf20Sopenharmony_ci	/*
29888c2ecf20Sopenharmony_ci	 * If unsharing a user namespace must also unshare the thread group
29898c2ecf20Sopenharmony_ci	 * and unshare the filesystem root and working directories.
29908c2ecf20Sopenharmony_ci	 */
29918c2ecf20Sopenharmony_ci	if (unshare_flags & CLONE_NEWUSER)
29928c2ecf20Sopenharmony_ci		unshare_flags |= CLONE_THREAD | CLONE_FS;
29938c2ecf20Sopenharmony_ci	/*
29948c2ecf20Sopenharmony_ci	 * If unsharing vm, must also unshare signal handlers.
29958c2ecf20Sopenharmony_ci	 */
29968c2ecf20Sopenharmony_ci	if (unshare_flags & CLONE_VM)
29978c2ecf20Sopenharmony_ci		unshare_flags |= CLONE_SIGHAND;
29988c2ecf20Sopenharmony_ci	/*
29998c2ecf20Sopenharmony_ci	 * If unsharing a signal handlers, must also unshare the signal queues.
30008c2ecf20Sopenharmony_ci	 */
30018c2ecf20Sopenharmony_ci	if (unshare_flags & CLONE_SIGHAND)
30028c2ecf20Sopenharmony_ci		unshare_flags |= CLONE_THREAD;
30038c2ecf20Sopenharmony_ci	/*
30048c2ecf20Sopenharmony_ci	 * If unsharing namespace, must also unshare filesystem information.
30058c2ecf20Sopenharmony_ci	 */
30068c2ecf20Sopenharmony_ci	if (unshare_flags & CLONE_NEWNS)
30078c2ecf20Sopenharmony_ci		unshare_flags |= CLONE_FS;
30088c2ecf20Sopenharmony_ci
30098c2ecf20Sopenharmony_ci	err = check_unshare_flags(unshare_flags);
30108c2ecf20Sopenharmony_ci	if (err)
30118c2ecf20Sopenharmony_ci		goto bad_unshare_out;
30128c2ecf20Sopenharmony_ci	/*
30138c2ecf20Sopenharmony_ci	 * CLONE_NEWIPC must also detach from the undolist: after switching
30148c2ecf20Sopenharmony_ci	 * to a new ipc namespace, the semaphore arrays from the old
30158c2ecf20Sopenharmony_ci	 * namespace are unreachable.
30168c2ecf20Sopenharmony_ci	 */
30178c2ecf20Sopenharmony_ci	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
30188c2ecf20Sopenharmony_ci		do_sysvsem = 1;
30198c2ecf20Sopenharmony_ci	err = unshare_fs(unshare_flags, &new_fs);
30208c2ecf20Sopenharmony_ci	if (err)
30218c2ecf20Sopenharmony_ci		goto bad_unshare_out;
30228c2ecf20Sopenharmony_ci	err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
30238c2ecf20Sopenharmony_ci	if (err)
30248c2ecf20Sopenharmony_ci		goto bad_unshare_cleanup_fs;
30258c2ecf20Sopenharmony_ci	err = unshare_userns(unshare_flags, &new_cred);
30268c2ecf20Sopenharmony_ci	if (err)
30278c2ecf20Sopenharmony_ci		goto bad_unshare_cleanup_fd;
30288c2ecf20Sopenharmony_ci	err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
30298c2ecf20Sopenharmony_ci					 new_cred, new_fs);
30308c2ecf20Sopenharmony_ci	if (err)
30318c2ecf20Sopenharmony_ci		goto bad_unshare_cleanup_cred;
30328c2ecf20Sopenharmony_ci
30338c2ecf20Sopenharmony_ci	if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
30348c2ecf20Sopenharmony_ci		if (do_sysvsem) {
30358c2ecf20Sopenharmony_ci			/*
30368c2ecf20Sopenharmony_ci			 * CLONE_SYSVSEM is equivalent to sys_exit().
30378c2ecf20Sopenharmony_ci			 */
30388c2ecf20Sopenharmony_ci			exit_sem(current);
30398c2ecf20Sopenharmony_ci		}
30408c2ecf20Sopenharmony_ci		if (unshare_flags & CLONE_NEWIPC) {
30418c2ecf20Sopenharmony_ci			/* Orphan segments in old ns (see sem above). */
30428c2ecf20Sopenharmony_ci			exit_shm(current);
30438c2ecf20Sopenharmony_ci			shm_init_task(current);
30448c2ecf20Sopenharmony_ci		}
30458c2ecf20Sopenharmony_ci
30468c2ecf20Sopenharmony_ci		if (new_nsproxy)
30478c2ecf20Sopenharmony_ci			switch_task_namespaces(current, new_nsproxy);
30488c2ecf20Sopenharmony_ci
30498c2ecf20Sopenharmony_ci		task_lock(current);
30508c2ecf20Sopenharmony_ci
30518c2ecf20Sopenharmony_ci		if (new_fs) {
30528c2ecf20Sopenharmony_ci			fs = current->fs;
30538c2ecf20Sopenharmony_ci			spin_lock(&fs->lock);
30548c2ecf20Sopenharmony_ci			current->fs = new_fs;
30558c2ecf20Sopenharmony_ci			if (--fs->users)
30568c2ecf20Sopenharmony_ci				new_fs = NULL;
30578c2ecf20Sopenharmony_ci			else
30588c2ecf20Sopenharmony_ci				new_fs = fs;
30598c2ecf20Sopenharmony_ci			spin_unlock(&fs->lock);
30608c2ecf20Sopenharmony_ci		}
30618c2ecf20Sopenharmony_ci
30628c2ecf20Sopenharmony_ci		if (new_fd) {
30638c2ecf20Sopenharmony_ci			fd = current->files;
30648c2ecf20Sopenharmony_ci			current->files = new_fd;
30658c2ecf20Sopenharmony_ci			new_fd = fd;
30668c2ecf20Sopenharmony_ci		}
30678c2ecf20Sopenharmony_ci
30688c2ecf20Sopenharmony_ci		task_unlock(current);
30698c2ecf20Sopenharmony_ci
30708c2ecf20Sopenharmony_ci		if (new_cred) {
30718c2ecf20Sopenharmony_ci			/* Install the new user namespace */
30728c2ecf20Sopenharmony_ci			commit_creds(new_cred);
30738c2ecf20Sopenharmony_ci			new_cred = NULL;
30748c2ecf20Sopenharmony_ci		}
30758c2ecf20Sopenharmony_ci	}
30768c2ecf20Sopenharmony_ci
30778c2ecf20Sopenharmony_ci	perf_event_namespaces(current);
30788c2ecf20Sopenharmony_ci
30798c2ecf20Sopenharmony_cibad_unshare_cleanup_cred:
30808c2ecf20Sopenharmony_ci	if (new_cred)
30818c2ecf20Sopenharmony_ci		put_cred(new_cred);
30828c2ecf20Sopenharmony_cibad_unshare_cleanup_fd:
30838c2ecf20Sopenharmony_ci	if (new_fd)
30848c2ecf20Sopenharmony_ci		put_files_struct(new_fd);
30858c2ecf20Sopenharmony_ci
30868c2ecf20Sopenharmony_cibad_unshare_cleanup_fs:
30878c2ecf20Sopenharmony_ci	if (new_fs)
30888c2ecf20Sopenharmony_ci		free_fs_struct(new_fs);
30898c2ecf20Sopenharmony_ci
30908c2ecf20Sopenharmony_cibad_unshare_out:
30918c2ecf20Sopenharmony_ci	return err;
30928c2ecf20Sopenharmony_ci}
30938c2ecf20Sopenharmony_ci
30948c2ecf20Sopenharmony_ciSYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
30958c2ecf20Sopenharmony_ci{
30968c2ecf20Sopenharmony_ci	return ksys_unshare(unshare_flags);
30978c2ecf20Sopenharmony_ci}
30988c2ecf20Sopenharmony_ci
30998c2ecf20Sopenharmony_ci/*
31008c2ecf20Sopenharmony_ci *	Helper to unshare the files of the current task.
31018c2ecf20Sopenharmony_ci *	We don't want to expose copy_files internals to
31028c2ecf20Sopenharmony_ci *	the exec layer of the kernel.
31038c2ecf20Sopenharmony_ci */
31048c2ecf20Sopenharmony_ci
31058c2ecf20Sopenharmony_ciint unshare_files(struct files_struct **displaced)
31068c2ecf20Sopenharmony_ci{
31078c2ecf20Sopenharmony_ci	struct task_struct *task = current;
31088c2ecf20Sopenharmony_ci	struct files_struct *copy = NULL;
31098c2ecf20Sopenharmony_ci	int error;
31108c2ecf20Sopenharmony_ci
31118c2ecf20Sopenharmony_ci	error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
31128c2ecf20Sopenharmony_ci	if (error || !copy) {
31138c2ecf20Sopenharmony_ci		*displaced = NULL;
31148c2ecf20Sopenharmony_ci		return error;
31158c2ecf20Sopenharmony_ci	}
31168c2ecf20Sopenharmony_ci	*displaced = task->files;
31178c2ecf20Sopenharmony_ci	task_lock(task);
31188c2ecf20Sopenharmony_ci	task->files = copy;
31198c2ecf20Sopenharmony_ci	task_unlock(task);
31208c2ecf20Sopenharmony_ci	return 0;
31218c2ecf20Sopenharmony_ci}
31228c2ecf20Sopenharmony_ci
31238c2ecf20Sopenharmony_ciint sysctl_max_threads(struct ctl_table *table, int write,
31248c2ecf20Sopenharmony_ci		       void *buffer, size_t *lenp, loff_t *ppos)
31258c2ecf20Sopenharmony_ci{
31268c2ecf20Sopenharmony_ci	struct ctl_table t;
31278c2ecf20Sopenharmony_ci	int ret;
31288c2ecf20Sopenharmony_ci	int threads = max_threads;
31298c2ecf20Sopenharmony_ci	int min = 1;
31308c2ecf20Sopenharmony_ci	int max = MAX_THREADS;
31318c2ecf20Sopenharmony_ci
31328c2ecf20Sopenharmony_ci	t = *table;
31338c2ecf20Sopenharmony_ci	t.data = &threads;
31348c2ecf20Sopenharmony_ci	t.extra1 = &min;
31358c2ecf20Sopenharmony_ci	t.extra2 = &max;
31368c2ecf20Sopenharmony_ci
31378c2ecf20Sopenharmony_ci	ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
31388c2ecf20Sopenharmony_ci	if (ret || !write)
31398c2ecf20Sopenharmony_ci		return ret;
31408c2ecf20Sopenharmony_ci
31418c2ecf20Sopenharmony_ci	max_threads = threads;
31428c2ecf20Sopenharmony_ci
31438c2ecf20Sopenharmony_ci	return 0;
31448c2ecf20Sopenharmony_ci}
3145