18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * mm/mmap.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Written by obz. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Address space accounting code <alan@lxorguk.ukuu.org.uk> 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <linux/kernel.h> 138c2ecf20Sopenharmony_ci#include <linux/slab.h> 148c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 158c2ecf20Sopenharmony_ci#include <linux/mm.h> 168c2ecf20Sopenharmony_ci#include <linux/mm_inline.h> 178c2ecf20Sopenharmony_ci#include <linux/vmacache.h> 188c2ecf20Sopenharmony_ci#include <linux/shm.h> 198c2ecf20Sopenharmony_ci#include <linux/mman.h> 208c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 218c2ecf20Sopenharmony_ci#include <linux/swap.h> 228c2ecf20Sopenharmony_ci#include <linux/syscalls.h> 238c2ecf20Sopenharmony_ci#include <linux/capability.h> 248c2ecf20Sopenharmony_ci#include <linux/init.h> 258c2ecf20Sopenharmony_ci#include <linux/file.h> 268c2ecf20Sopenharmony_ci#include <linux/fs.h> 278c2ecf20Sopenharmony_ci#include <linux/personality.h> 288c2ecf20Sopenharmony_ci#include <linux/security.h> 298c2ecf20Sopenharmony_ci#include <linux/hugetlb.h> 308c2ecf20Sopenharmony_ci#include <linux/shmem_fs.h> 318c2ecf20Sopenharmony_ci#include <linux/profile.h> 328c2ecf20Sopenharmony_ci#include <linux/export.h> 338c2ecf20Sopenharmony_ci#include <linux/mount.h> 348c2ecf20Sopenharmony_ci#include <linux/mempolicy.h> 358c2ecf20Sopenharmony_ci#include <linux/rmap.h> 368c2ecf20Sopenharmony_ci#include <linux/mmu_notifier.h> 378c2ecf20Sopenharmony_ci#include <linux/mmdebug.h> 388c2ecf20Sopenharmony_ci#include <linux/perf_event.h> 398c2ecf20Sopenharmony_ci#include <linux/audit.h> 408c2ecf20Sopenharmony_ci#include <linux/khugepaged.h> 418c2ecf20Sopenharmony_ci#include <linux/uprobes.h> 428c2ecf20Sopenharmony_ci#include <linux/rbtree_augmented.h> 438c2ecf20Sopenharmony_ci#include <linux/notifier.h> 448c2ecf20Sopenharmony_ci#include <linux/memory.h> 458c2ecf20Sopenharmony_ci#include <linux/printk.h> 468c2ecf20Sopenharmony_ci#include <linux/userfaultfd_k.h> 478c2ecf20Sopenharmony_ci#include <linux/moduleparam.h> 488c2ecf20Sopenharmony_ci#include <linux/pkeys.h> 498c2ecf20Sopenharmony_ci#include <linux/oom.h> 508c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 518c2ecf20Sopenharmony_ci#include <linux/xpm.h> 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 548c2ecf20Sopenharmony_ci#include <asm/cacheflush.h> 558c2ecf20Sopenharmony_ci#include <asm/tlb.h> 568c2ecf20Sopenharmony_ci#include <asm/mmu_context.h> 578c2ecf20Sopenharmony_ci#include <linux/hck/lite_hck_jit_memory.h> 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci#define CREATE_TRACE_POINTS 618c2ecf20Sopenharmony_ci#include <trace/events/mmap.h> 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci#undef CREATE_TRACE_POINTS 648c2ecf20Sopenharmony_ci#include <trace/hooks/mm.h> 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci#include "internal.h" 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci#ifndef arch_mmap_check 698c2ecf20Sopenharmony_ci#define arch_mmap_check(addr, len, flags) (0) 708c2ecf20Sopenharmony_ci#endif 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS 738c2ecf20Sopenharmony_ciconst int mmap_rnd_bits_min = CONFIG_ARCH_MMAP_RND_BITS_MIN; 748c2ecf20Sopenharmony_ciconst int mmap_rnd_bits_max = CONFIG_ARCH_MMAP_RND_BITS_MAX; 758c2ecf20Sopenharmony_ciint mmap_rnd_bits __read_mostly = CONFIG_ARCH_MMAP_RND_BITS; 768c2ecf20Sopenharmony_ci#endif 778c2ecf20Sopenharmony_ci#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS 788c2ecf20Sopenharmony_ciconst int mmap_rnd_compat_bits_min = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN; 798c2ecf20Sopenharmony_ciconst int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX; 808c2ecf20Sopenharmony_ciint mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS; 818c2ecf20Sopenharmony_ci#endif 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_cistatic bool ignore_rlimit_data; 848c2ecf20Sopenharmony_cicore_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644); 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_cistatic void unmap_region(struct mm_struct *mm, 878c2ecf20Sopenharmony_ci struct vm_area_struct *vma, struct vm_area_struct *prev, 888c2ecf20Sopenharmony_ci unsigned long start, unsigned long end); 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci/* description of effects of mapping type and prot in current implementation. 918c2ecf20Sopenharmony_ci * this is due to the limited x86 page protection hardware. The expected 928c2ecf20Sopenharmony_ci * behavior is in parens: 938c2ecf20Sopenharmony_ci * 948c2ecf20Sopenharmony_ci * map_type prot 958c2ecf20Sopenharmony_ci * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC 968c2ecf20Sopenharmony_ci * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes 978c2ecf20Sopenharmony_ci * w: (no) no w: (no) no w: (yes) yes w: (no) no 988c2ecf20Sopenharmony_ci * x: (no) no x: (no) yes x: (no) yes x: (yes) yes 998c2ecf20Sopenharmony_ci * 1008c2ecf20Sopenharmony_ci * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes 1018c2ecf20Sopenharmony_ci * w: (no) no w: (no) no w: (copy) copy w: (no) no 1028c2ecf20Sopenharmony_ci * x: (no) no x: (no) yes x: (no) yes x: (yes) yes 1038c2ecf20Sopenharmony_ci */ 1048c2ecf20Sopenharmony_cipgprot_t protection_map[16] __ro_after_init = { 1058c2ecf20Sopenharmony_ci __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, 1068c2ecf20Sopenharmony_ci __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 1078c2ecf20Sopenharmony_ci}; 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci#ifndef CONFIG_ARCH_HAS_FILTER_PGPROT 1108c2ecf20Sopenharmony_cistatic inline pgprot_t arch_filter_pgprot(pgprot_t prot) 1118c2ecf20Sopenharmony_ci{ 1128c2ecf20Sopenharmony_ci return prot; 1138c2ecf20Sopenharmony_ci} 1148c2ecf20Sopenharmony_ci#endif 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_cipgprot_t vm_get_page_prot(unsigned long vm_flags) 1178c2ecf20Sopenharmony_ci{ 1188c2ecf20Sopenharmony_ci pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags & 1198c2ecf20Sopenharmony_ci (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | 1208c2ecf20Sopenharmony_ci pgprot_val(arch_vm_get_page_prot(vm_flags))); 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci return arch_filter_pgprot(ret); 1238c2ecf20Sopenharmony_ci} 1248c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vm_get_page_prot); 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_cistatic pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) 1278c2ecf20Sopenharmony_ci{ 1288c2ecf20Sopenharmony_ci return pgprot_modify(oldprot, vm_get_page_prot(vm_flags)); 1298c2ecf20Sopenharmony_ci} 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci/* Update vma->vm_page_prot to reflect vma->vm_flags. */ 1328c2ecf20Sopenharmony_civoid vma_set_page_prot(struct vm_area_struct *vma) 1338c2ecf20Sopenharmony_ci{ 1348c2ecf20Sopenharmony_ci unsigned long vm_flags = vma->vm_flags; 1358c2ecf20Sopenharmony_ci pgprot_t vm_page_prot; 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags); 1388c2ecf20Sopenharmony_ci if (vma_wants_writenotify(vma, vm_page_prot)) { 1398c2ecf20Sopenharmony_ci vm_flags &= ~VM_SHARED; 1408c2ecf20Sopenharmony_ci vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags); 1418c2ecf20Sopenharmony_ci } 1428c2ecf20Sopenharmony_ci /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */ 1438c2ecf20Sopenharmony_ci WRITE_ONCE(vma->vm_page_prot, vm_page_prot); 1448c2ecf20Sopenharmony_ci} 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci/* 1478c2ecf20Sopenharmony_ci * Requires inode->i_mapping->i_mmap_rwsem 1488c2ecf20Sopenharmony_ci */ 1498c2ecf20Sopenharmony_cistatic void __remove_shared_vm_struct(struct vm_area_struct *vma, 1508c2ecf20Sopenharmony_ci struct file *file, struct address_space *mapping) 1518c2ecf20Sopenharmony_ci{ 1528c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_DENYWRITE) 1538c2ecf20Sopenharmony_ci allow_write_access(file); 1548c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_SHARED) 1558c2ecf20Sopenharmony_ci mapping_unmap_writable(mapping); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci flush_dcache_mmap_lock(mapping); 1588c2ecf20Sopenharmony_ci vma_interval_tree_remove(vma, &mapping->i_mmap); 1598c2ecf20Sopenharmony_ci flush_dcache_mmap_unlock(mapping); 1608c2ecf20Sopenharmony_ci} 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci/* 1638c2ecf20Sopenharmony_ci * Unlink a file-based vm structure from its interval tree, to hide 1648c2ecf20Sopenharmony_ci * vma from rmap and vmtruncate before freeing its page tables. 1658c2ecf20Sopenharmony_ci */ 1668c2ecf20Sopenharmony_civoid unlink_file_vma(struct vm_area_struct *vma) 1678c2ecf20Sopenharmony_ci{ 1688c2ecf20Sopenharmony_ci struct file *file = vma->vm_file; 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci if (file) { 1718c2ecf20Sopenharmony_ci struct address_space *mapping = file->f_mapping; 1728c2ecf20Sopenharmony_ci i_mmap_lock_write(mapping); 1738c2ecf20Sopenharmony_ci __remove_shared_vm_struct(vma, file, mapping); 1748c2ecf20Sopenharmony_ci i_mmap_unlock_write(mapping); 1758c2ecf20Sopenharmony_ci } 1768c2ecf20Sopenharmony_ci} 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci/* 1798c2ecf20Sopenharmony_ci * Close a vm structure and free it, returning the next. 1808c2ecf20Sopenharmony_ci */ 1818c2ecf20Sopenharmony_cistatic struct vm_area_struct *remove_vma(struct vm_area_struct *vma) 1828c2ecf20Sopenharmony_ci{ 1838c2ecf20Sopenharmony_ci struct vm_area_struct *next = vma->vm_next; 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci might_sleep(); 1868c2ecf20Sopenharmony_ci if (vma->vm_ops && vma->vm_ops->close) 1878c2ecf20Sopenharmony_ci vma->vm_ops->close(vma); 1888c2ecf20Sopenharmony_ci if (vma->vm_file) 1898c2ecf20Sopenharmony_ci fput(vma->vm_file); 1908c2ecf20Sopenharmony_ci mpol_put(vma_policy(vma)); 1918c2ecf20Sopenharmony_ci vm_area_free(vma); 1928c2ecf20Sopenharmony_ci return next; 1938c2ecf20Sopenharmony_ci} 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_cistatic int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, 1968c2ecf20Sopenharmony_ci struct list_head *uf); 1978c2ecf20Sopenharmony_ciSYSCALL_DEFINE1(brk, unsigned long, brk) 1988c2ecf20Sopenharmony_ci{ 1998c2ecf20Sopenharmony_ci unsigned long retval; 2008c2ecf20Sopenharmony_ci unsigned long newbrk, oldbrk, origbrk; 2018c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 2028c2ecf20Sopenharmony_ci struct vm_area_struct *next; 2038c2ecf20Sopenharmony_ci unsigned long min_brk; 2048c2ecf20Sopenharmony_ci bool populate; 2058c2ecf20Sopenharmony_ci bool downgraded = false; 2068c2ecf20Sopenharmony_ci LIST_HEAD(uf); 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci if (mmap_write_lock_killable(mm)) 2098c2ecf20Sopenharmony_ci return -EINTR; 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci origbrk = mm->brk; 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT_BRK 2148c2ecf20Sopenharmony_ci /* 2158c2ecf20Sopenharmony_ci * CONFIG_COMPAT_BRK can still be overridden by setting 2168c2ecf20Sopenharmony_ci * randomize_va_space to 2, which will still cause mm->start_brk 2178c2ecf20Sopenharmony_ci * to be arbitrarily shifted 2188c2ecf20Sopenharmony_ci */ 2198c2ecf20Sopenharmony_ci if (current->brk_randomized) 2208c2ecf20Sopenharmony_ci min_brk = mm->start_brk; 2218c2ecf20Sopenharmony_ci else 2228c2ecf20Sopenharmony_ci min_brk = mm->end_data; 2238c2ecf20Sopenharmony_ci#else 2248c2ecf20Sopenharmony_ci min_brk = mm->start_brk; 2258c2ecf20Sopenharmony_ci#endif 2268c2ecf20Sopenharmony_ci if (brk < min_brk) 2278c2ecf20Sopenharmony_ci goto out; 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci /* 2308c2ecf20Sopenharmony_ci * Check against rlimit here. If this check is done later after the test 2318c2ecf20Sopenharmony_ci * of oldbrk with newbrk then it can escape the test and let the data 2328c2ecf20Sopenharmony_ci * segment grow beyond its set limit the in case where the limit is 2338c2ecf20Sopenharmony_ci * not page aligned -Ram Gupta 2348c2ecf20Sopenharmony_ci */ 2358c2ecf20Sopenharmony_ci if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk, 2368c2ecf20Sopenharmony_ci mm->end_data, mm->start_data)) 2378c2ecf20Sopenharmony_ci goto out; 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci newbrk = PAGE_ALIGN(brk); 2408c2ecf20Sopenharmony_ci oldbrk = PAGE_ALIGN(mm->brk); 2418c2ecf20Sopenharmony_ci if (oldbrk == newbrk) { 2428c2ecf20Sopenharmony_ci mm->brk = brk; 2438c2ecf20Sopenharmony_ci goto success; 2448c2ecf20Sopenharmony_ci } 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci /* 2478c2ecf20Sopenharmony_ci * Always allow shrinking brk. 2488c2ecf20Sopenharmony_ci * __do_munmap() may downgrade mmap_lock to read. 2498c2ecf20Sopenharmony_ci */ 2508c2ecf20Sopenharmony_ci if (brk <= mm->brk) { 2518c2ecf20Sopenharmony_ci int ret; 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci /* 2548c2ecf20Sopenharmony_ci * mm->brk must to be protected by write mmap_lock so update it 2558c2ecf20Sopenharmony_ci * before downgrading mmap_lock. When __do_munmap() fails, 2568c2ecf20Sopenharmony_ci * mm->brk will be restored from origbrk. 2578c2ecf20Sopenharmony_ci */ 2588c2ecf20Sopenharmony_ci mm->brk = brk; 2598c2ecf20Sopenharmony_ci ret = __do_munmap(mm, newbrk, oldbrk-newbrk, &uf, true); 2608c2ecf20Sopenharmony_ci if (ret < 0) { 2618c2ecf20Sopenharmony_ci mm->brk = origbrk; 2628c2ecf20Sopenharmony_ci goto out; 2638c2ecf20Sopenharmony_ci } else if (ret == 1) { 2648c2ecf20Sopenharmony_ci downgraded = true; 2658c2ecf20Sopenharmony_ci } 2668c2ecf20Sopenharmony_ci goto success; 2678c2ecf20Sopenharmony_ci } 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci /* Check against existing mmap mappings. */ 2708c2ecf20Sopenharmony_ci next = find_vma(mm, oldbrk); 2718c2ecf20Sopenharmony_ci if (next && newbrk + PAGE_SIZE > vm_start_gap(next)) 2728c2ecf20Sopenharmony_ci goto out; 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci /* Ok, looks good - let it rip. */ 2758c2ecf20Sopenharmony_ci if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0) 2768c2ecf20Sopenharmony_ci goto out; 2778c2ecf20Sopenharmony_ci mm->brk = brk; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_cisuccess: 2808c2ecf20Sopenharmony_ci populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0; 2818c2ecf20Sopenharmony_ci if (downgraded) 2828c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 2838c2ecf20Sopenharmony_ci else 2848c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 2858c2ecf20Sopenharmony_ci userfaultfd_unmap_complete(mm, &uf); 2868c2ecf20Sopenharmony_ci if (populate) 2878c2ecf20Sopenharmony_ci mm_populate(oldbrk, newbrk - oldbrk); 2888c2ecf20Sopenharmony_ci return brk; 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ciout: 2918c2ecf20Sopenharmony_ci retval = origbrk; 2928c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 2938c2ecf20Sopenharmony_ci return retval; 2948c2ecf20Sopenharmony_ci} 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_cistatic inline unsigned long vma_compute_gap(struct vm_area_struct *vma) 2978c2ecf20Sopenharmony_ci{ 2988c2ecf20Sopenharmony_ci unsigned long gap, prev_end; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci /* 3018c2ecf20Sopenharmony_ci * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we 3028c2ecf20Sopenharmony_ci * allow two stack_guard_gaps between them here, and when choosing 3038c2ecf20Sopenharmony_ci * an unmapped area; whereas when expanding we only require one. 3048c2ecf20Sopenharmony_ci * That's a little inconsistent, but keeps the code here simpler. 3058c2ecf20Sopenharmony_ci */ 3068c2ecf20Sopenharmony_ci gap = vm_start_gap(vma); 3078c2ecf20Sopenharmony_ci if (vma->vm_prev) { 3088c2ecf20Sopenharmony_ci prev_end = vm_end_gap(vma->vm_prev); 3098c2ecf20Sopenharmony_ci if (gap > prev_end) 3108c2ecf20Sopenharmony_ci gap -= prev_end; 3118c2ecf20Sopenharmony_ci else 3128c2ecf20Sopenharmony_ci gap = 0; 3138c2ecf20Sopenharmony_ci } 3148c2ecf20Sopenharmony_ci return gap; 3158c2ecf20Sopenharmony_ci} 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci#ifdef CONFIG_DEBUG_VM_RB 3188c2ecf20Sopenharmony_cistatic unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma) 3198c2ecf20Sopenharmony_ci{ 3208c2ecf20Sopenharmony_ci unsigned long max = vma_compute_gap(vma), subtree_gap; 3218c2ecf20Sopenharmony_ci if (vma->vm_rb.rb_left) { 3228c2ecf20Sopenharmony_ci subtree_gap = rb_entry(vma->vm_rb.rb_left, 3238c2ecf20Sopenharmony_ci struct vm_area_struct, vm_rb)->rb_subtree_gap; 3248c2ecf20Sopenharmony_ci if (subtree_gap > max) 3258c2ecf20Sopenharmony_ci max = subtree_gap; 3268c2ecf20Sopenharmony_ci } 3278c2ecf20Sopenharmony_ci if (vma->vm_rb.rb_right) { 3288c2ecf20Sopenharmony_ci subtree_gap = rb_entry(vma->vm_rb.rb_right, 3298c2ecf20Sopenharmony_ci struct vm_area_struct, vm_rb)->rb_subtree_gap; 3308c2ecf20Sopenharmony_ci if (subtree_gap > max) 3318c2ecf20Sopenharmony_ci max = subtree_gap; 3328c2ecf20Sopenharmony_ci } 3338c2ecf20Sopenharmony_ci return max; 3348c2ecf20Sopenharmony_ci} 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_cistatic int browse_rb(struct mm_struct *mm) 3378c2ecf20Sopenharmony_ci{ 3388c2ecf20Sopenharmony_ci struct rb_root *root = &mm->mm_rb; 3398c2ecf20Sopenharmony_ci int i = 0, j, bug = 0; 3408c2ecf20Sopenharmony_ci struct rb_node *nd, *pn = NULL; 3418c2ecf20Sopenharmony_ci unsigned long prev = 0, pend = 0; 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci for (nd = rb_first(root); nd; nd = rb_next(nd)) { 3448c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 3458c2ecf20Sopenharmony_ci vma = rb_entry(nd, struct vm_area_struct, vm_rb); 3468c2ecf20Sopenharmony_ci if (vma->vm_start < prev) { 3478c2ecf20Sopenharmony_ci pr_emerg("vm_start %lx < prev %lx\n", 3488c2ecf20Sopenharmony_ci vma->vm_start, prev); 3498c2ecf20Sopenharmony_ci bug = 1; 3508c2ecf20Sopenharmony_ci } 3518c2ecf20Sopenharmony_ci if (vma->vm_start < pend) { 3528c2ecf20Sopenharmony_ci pr_emerg("vm_start %lx < pend %lx\n", 3538c2ecf20Sopenharmony_ci vma->vm_start, pend); 3548c2ecf20Sopenharmony_ci bug = 1; 3558c2ecf20Sopenharmony_ci } 3568c2ecf20Sopenharmony_ci if (vma->vm_start > vma->vm_end) { 3578c2ecf20Sopenharmony_ci pr_emerg("vm_start %lx > vm_end %lx\n", 3588c2ecf20Sopenharmony_ci vma->vm_start, vma->vm_end); 3598c2ecf20Sopenharmony_ci bug = 1; 3608c2ecf20Sopenharmony_ci } 3618c2ecf20Sopenharmony_ci spin_lock(&mm->page_table_lock); 3628c2ecf20Sopenharmony_ci if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) { 3638c2ecf20Sopenharmony_ci pr_emerg("free gap %lx, correct %lx\n", 3648c2ecf20Sopenharmony_ci vma->rb_subtree_gap, 3658c2ecf20Sopenharmony_ci vma_compute_subtree_gap(vma)); 3668c2ecf20Sopenharmony_ci bug = 1; 3678c2ecf20Sopenharmony_ci } 3688c2ecf20Sopenharmony_ci spin_unlock(&mm->page_table_lock); 3698c2ecf20Sopenharmony_ci i++; 3708c2ecf20Sopenharmony_ci pn = nd; 3718c2ecf20Sopenharmony_ci prev = vma->vm_start; 3728c2ecf20Sopenharmony_ci pend = vma->vm_end; 3738c2ecf20Sopenharmony_ci } 3748c2ecf20Sopenharmony_ci j = 0; 3758c2ecf20Sopenharmony_ci for (nd = pn; nd; nd = rb_prev(nd)) 3768c2ecf20Sopenharmony_ci j++; 3778c2ecf20Sopenharmony_ci if (i != j) { 3788c2ecf20Sopenharmony_ci pr_emerg("backwards %d, forwards %d\n", j, i); 3798c2ecf20Sopenharmony_ci bug = 1; 3808c2ecf20Sopenharmony_ci } 3818c2ecf20Sopenharmony_ci return bug ? -1 : i; 3828c2ecf20Sopenharmony_ci} 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_cistatic void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore) 3858c2ecf20Sopenharmony_ci{ 3868c2ecf20Sopenharmony_ci struct rb_node *nd; 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci for (nd = rb_first(root); nd; nd = rb_next(nd)) { 3898c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 3908c2ecf20Sopenharmony_ci vma = rb_entry(nd, struct vm_area_struct, vm_rb); 3918c2ecf20Sopenharmony_ci VM_BUG_ON_VMA(vma != ignore && 3928c2ecf20Sopenharmony_ci vma->rb_subtree_gap != vma_compute_subtree_gap(vma), 3938c2ecf20Sopenharmony_ci vma); 3948c2ecf20Sopenharmony_ci } 3958c2ecf20Sopenharmony_ci} 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_cistatic void validate_mm(struct mm_struct *mm) 3988c2ecf20Sopenharmony_ci{ 3998c2ecf20Sopenharmony_ci int bug = 0; 4008c2ecf20Sopenharmony_ci int i = 0; 4018c2ecf20Sopenharmony_ci unsigned long highest_address = 0; 4028c2ecf20Sopenharmony_ci struct vm_area_struct *vma = mm->mmap; 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci while (vma) { 4058c2ecf20Sopenharmony_ci struct anon_vma *anon_vma = vma->anon_vma; 4068c2ecf20Sopenharmony_ci struct anon_vma_chain *avc; 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci if (anon_vma) { 4098c2ecf20Sopenharmony_ci anon_vma_lock_read(anon_vma); 4108c2ecf20Sopenharmony_ci list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) 4118c2ecf20Sopenharmony_ci anon_vma_interval_tree_verify(avc); 4128c2ecf20Sopenharmony_ci anon_vma_unlock_read(anon_vma); 4138c2ecf20Sopenharmony_ci } 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci highest_address = vm_end_gap(vma); 4168c2ecf20Sopenharmony_ci vma = vma->vm_next; 4178c2ecf20Sopenharmony_ci i++; 4188c2ecf20Sopenharmony_ci } 4198c2ecf20Sopenharmony_ci if (i != mm->map_count) { 4208c2ecf20Sopenharmony_ci pr_emerg("map_count %d vm_next %d\n", mm->map_count, i); 4218c2ecf20Sopenharmony_ci bug = 1; 4228c2ecf20Sopenharmony_ci } 4238c2ecf20Sopenharmony_ci if (highest_address != mm->highest_vm_end) { 4248c2ecf20Sopenharmony_ci pr_emerg("mm->highest_vm_end %lx, found %lx\n", 4258c2ecf20Sopenharmony_ci mm->highest_vm_end, highest_address); 4268c2ecf20Sopenharmony_ci bug = 1; 4278c2ecf20Sopenharmony_ci } 4288c2ecf20Sopenharmony_ci i = browse_rb(mm); 4298c2ecf20Sopenharmony_ci if (i != mm->map_count) { 4308c2ecf20Sopenharmony_ci if (i != -1) 4318c2ecf20Sopenharmony_ci pr_emerg("map_count %d rb %d\n", mm->map_count, i); 4328c2ecf20Sopenharmony_ci bug = 1; 4338c2ecf20Sopenharmony_ci } 4348c2ecf20Sopenharmony_ci VM_BUG_ON_MM(bug, mm); 4358c2ecf20Sopenharmony_ci} 4368c2ecf20Sopenharmony_ci#else 4378c2ecf20Sopenharmony_ci#define validate_mm_rb(root, ignore) do { } while (0) 4388c2ecf20Sopenharmony_ci#define validate_mm(mm) do { } while (0) 4398c2ecf20Sopenharmony_ci#endif 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ciRB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks, 4428c2ecf20Sopenharmony_ci struct vm_area_struct, vm_rb, 4438c2ecf20Sopenharmony_ci unsigned long, rb_subtree_gap, vma_compute_gap) 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci/* 4468c2ecf20Sopenharmony_ci * Update augmented rbtree rb_subtree_gap values after vma->vm_start or 4478c2ecf20Sopenharmony_ci * vma->vm_prev->vm_end values changed, without modifying the vma's position 4488c2ecf20Sopenharmony_ci * in the rbtree. 4498c2ecf20Sopenharmony_ci */ 4508c2ecf20Sopenharmony_cistatic void vma_gap_update(struct vm_area_struct *vma) 4518c2ecf20Sopenharmony_ci{ 4528c2ecf20Sopenharmony_ci /* 4538c2ecf20Sopenharmony_ci * As it turns out, RB_DECLARE_CALLBACKS_MAX() already created 4548c2ecf20Sopenharmony_ci * a callback function that does exactly what we want. 4558c2ecf20Sopenharmony_ci */ 4568c2ecf20Sopenharmony_ci vma_gap_callbacks_propagate(&vma->vm_rb, NULL); 4578c2ecf20Sopenharmony_ci} 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_cistatic inline void vma_rb_insert(struct vm_area_struct *vma, 4608c2ecf20Sopenharmony_ci struct rb_root *root) 4618c2ecf20Sopenharmony_ci{ 4628c2ecf20Sopenharmony_ci /* All rb_subtree_gap values must be consistent prior to insertion */ 4638c2ecf20Sopenharmony_ci validate_mm_rb(root, NULL); 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_ci rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks); 4668c2ecf20Sopenharmony_ci} 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_cistatic void __vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root) 4698c2ecf20Sopenharmony_ci{ 4708c2ecf20Sopenharmony_ci /* 4718c2ecf20Sopenharmony_ci * Note rb_erase_augmented is a fairly large inline function, 4728c2ecf20Sopenharmony_ci * so make sure we instantiate it only once with our desired 4738c2ecf20Sopenharmony_ci * augmented rbtree callbacks. 4748c2ecf20Sopenharmony_ci */ 4758c2ecf20Sopenharmony_ci rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks); 4768c2ecf20Sopenharmony_ci} 4778c2ecf20Sopenharmony_ci 4788c2ecf20Sopenharmony_cistatic __always_inline void vma_rb_erase_ignore(struct vm_area_struct *vma, 4798c2ecf20Sopenharmony_ci struct rb_root *root, 4808c2ecf20Sopenharmony_ci struct vm_area_struct *ignore) 4818c2ecf20Sopenharmony_ci{ 4828c2ecf20Sopenharmony_ci /* 4838c2ecf20Sopenharmony_ci * All rb_subtree_gap values must be consistent prior to erase, 4848c2ecf20Sopenharmony_ci * with the possible exception of 4858c2ecf20Sopenharmony_ci * 4868c2ecf20Sopenharmony_ci * a. the "next" vma being erased if next->vm_start was reduced in 4878c2ecf20Sopenharmony_ci * __vma_adjust() -> __vma_unlink() 4888c2ecf20Sopenharmony_ci * b. the vma being erased in detach_vmas_to_be_unmapped() -> 4898c2ecf20Sopenharmony_ci * vma_rb_erase() 4908c2ecf20Sopenharmony_ci */ 4918c2ecf20Sopenharmony_ci validate_mm_rb(root, ignore); 4928c2ecf20Sopenharmony_ci 4938c2ecf20Sopenharmony_ci __vma_rb_erase(vma, root); 4948c2ecf20Sopenharmony_ci} 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_cistatic __always_inline void vma_rb_erase(struct vm_area_struct *vma, 4978c2ecf20Sopenharmony_ci struct rb_root *root) 4988c2ecf20Sopenharmony_ci{ 4998c2ecf20Sopenharmony_ci vma_rb_erase_ignore(vma, root, vma); 5008c2ecf20Sopenharmony_ci} 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci/* 5038c2ecf20Sopenharmony_ci * vma has some anon_vma assigned, and is already inserted on that 5048c2ecf20Sopenharmony_ci * anon_vma's interval trees. 5058c2ecf20Sopenharmony_ci * 5068c2ecf20Sopenharmony_ci * Before updating the vma's vm_start / vm_end / vm_pgoff fields, the 5078c2ecf20Sopenharmony_ci * vma must be removed from the anon_vma's interval trees using 5088c2ecf20Sopenharmony_ci * anon_vma_interval_tree_pre_update_vma(). 5098c2ecf20Sopenharmony_ci * 5108c2ecf20Sopenharmony_ci * After the update, the vma will be reinserted using 5118c2ecf20Sopenharmony_ci * anon_vma_interval_tree_post_update_vma(). 5128c2ecf20Sopenharmony_ci * 5138c2ecf20Sopenharmony_ci * The entire update must be protected by exclusive mmap_lock and by 5148c2ecf20Sopenharmony_ci * the root anon_vma's mutex. 5158c2ecf20Sopenharmony_ci */ 5168c2ecf20Sopenharmony_cistatic inline void 5178c2ecf20Sopenharmony_cianon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma) 5188c2ecf20Sopenharmony_ci{ 5198c2ecf20Sopenharmony_ci struct anon_vma_chain *avc; 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) 5228c2ecf20Sopenharmony_ci anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root); 5238c2ecf20Sopenharmony_ci} 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_cistatic inline void 5268c2ecf20Sopenharmony_cianon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma) 5278c2ecf20Sopenharmony_ci{ 5288c2ecf20Sopenharmony_ci struct anon_vma_chain *avc; 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) 5318c2ecf20Sopenharmony_ci anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root); 5328c2ecf20Sopenharmony_ci} 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_cistatic int find_vma_links(struct mm_struct *mm, unsigned long addr, 5358c2ecf20Sopenharmony_ci unsigned long end, struct vm_area_struct **pprev, 5368c2ecf20Sopenharmony_ci struct rb_node ***rb_link, struct rb_node **rb_parent) 5378c2ecf20Sopenharmony_ci{ 5388c2ecf20Sopenharmony_ci struct rb_node **__rb_link, *__rb_parent, *rb_prev; 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci __rb_link = &mm->mm_rb.rb_node; 5418c2ecf20Sopenharmony_ci rb_prev = __rb_parent = NULL; 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ci while (*__rb_link) { 5448c2ecf20Sopenharmony_ci struct vm_area_struct *vma_tmp; 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_ci __rb_parent = *__rb_link; 5478c2ecf20Sopenharmony_ci vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb); 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci if (vma_tmp->vm_end > addr) { 5508c2ecf20Sopenharmony_ci /* Fail if an existing vma overlaps the area */ 5518c2ecf20Sopenharmony_ci if (vma_tmp->vm_start < end) 5528c2ecf20Sopenharmony_ci return -ENOMEM; 5538c2ecf20Sopenharmony_ci __rb_link = &__rb_parent->rb_left; 5548c2ecf20Sopenharmony_ci } else { 5558c2ecf20Sopenharmony_ci rb_prev = __rb_parent; 5568c2ecf20Sopenharmony_ci __rb_link = &__rb_parent->rb_right; 5578c2ecf20Sopenharmony_ci } 5588c2ecf20Sopenharmony_ci } 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci *pprev = NULL; 5618c2ecf20Sopenharmony_ci if (rb_prev) 5628c2ecf20Sopenharmony_ci *pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb); 5638c2ecf20Sopenharmony_ci *rb_link = __rb_link; 5648c2ecf20Sopenharmony_ci *rb_parent = __rb_parent; 5658c2ecf20Sopenharmony_ci return 0; 5668c2ecf20Sopenharmony_ci} 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci/* 5698c2ecf20Sopenharmony_ci * vma_next() - Get the next VMA. 5708c2ecf20Sopenharmony_ci * @mm: The mm_struct. 5718c2ecf20Sopenharmony_ci * @vma: The current vma. 5728c2ecf20Sopenharmony_ci * 5738c2ecf20Sopenharmony_ci * If @vma is NULL, return the first vma in the mm. 5748c2ecf20Sopenharmony_ci * 5758c2ecf20Sopenharmony_ci * Returns: The next VMA after @vma. 5768c2ecf20Sopenharmony_ci */ 5778c2ecf20Sopenharmony_cistatic inline struct vm_area_struct *vma_next(struct mm_struct *mm, 5788c2ecf20Sopenharmony_ci struct vm_area_struct *vma) 5798c2ecf20Sopenharmony_ci{ 5808c2ecf20Sopenharmony_ci if (!vma) 5818c2ecf20Sopenharmony_ci return mm->mmap; 5828c2ecf20Sopenharmony_ci 5838c2ecf20Sopenharmony_ci return vma->vm_next; 5848c2ecf20Sopenharmony_ci} 5858c2ecf20Sopenharmony_ci 5868c2ecf20Sopenharmony_ci/* 5878c2ecf20Sopenharmony_ci * munmap_vma_range() - munmap VMAs that overlap a range. 5888c2ecf20Sopenharmony_ci * @mm: The mm struct 5898c2ecf20Sopenharmony_ci * @start: The start of the range. 5908c2ecf20Sopenharmony_ci * @len: The length of the range. 5918c2ecf20Sopenharmony_ci * @pprev: pointer to the pointer that will be set to previous vm_area_struct 5928c2ecf20Sopenharmony_ci * @rb_link: the rb_node 5938c2ecf20Sopenharmony_ci * @rb_parent: the parent rb_node 5948c2ecf20Sopenharmony_ci * 5958c2ecf20Sopenharmony_ci * Find all the vm_area_struct that overlap from @start to 5968c2ecf20Sopenharmony_ci * @end and munmap them. Set @pprev to the previous vm_area_struct. 5978c2ecf20Sopenharmony_ci * 5988c2ecf20Sopenharmony_ci * Returns: -ENOMEM on munmap failure or 0 on success. 5998c2ecf20Sopenharmony_ci */ 6008c2ecf20Sopenharmony_cistatic inline int 6018c2ecf20Sopenharmony_cimunmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len, 6028c2ecf20Sopenharmony_ci struct vm_area_struct **pprev, struct rb_node ***link, 6038c2ecf20Sopenharmony_ci struct rb_node **parent, struct list_head *uf) 6048c2ecf20Sopenharmony_ci{ 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_ci while (find_vma_links(mm, start, start + len, pprev, link, parent)) 6078c2ecf20Sopenharmony_ci if (do_munmap(mm, start, len, uf)) 6088c2ecf20Sopenharmony_ci return -ENOMEM; 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_ci return 0; 6118c2ecf20Sopenharmony_ci} 6128c2ecf20Sopenharmony_cistatic unsigned long count_vma_pages_range(struct mm_struct *mm, 6138c2ecf20Sopenharmony_ci unsigned long addr, unsigned long end) 6148c2ecf20Sopenharmony_ci{ 6158c2ecf20Sopenharmony_ci unsigned long nr_pages = 0; 6168c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 6178c2ecf20Sopenharmony_ci 6188c2ecf20Sopenharmony_ci /* Find first overlaping mapping */ 6198c2ecf20Sopenharmony_ci vma = find_vma_intersection(mm, addr, end); 6208c2ecf20Sopenharmony_ci if (!vma) 6218c2ecf20Sopenharmony_ci return 0; 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci nr_pages = (min(end, vma->vm_end) - 6248c2ecf20Sopenharmony_ci max(addr, vma->vm_start)) >> PAGE_SHIFT; 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci /* Iterate over the rest of the overlaps */ 6278c2ecf20Sopenharmony_ci for (vma = vma->vm_next; vma; vma = vma->vm_next) { 6288c2ecf20Sopenharmony_ci unsigned long overlap_len; 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci if (vma->vm_start > end) 6318c2ecf20Sopenharmony_ci break; 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci overlap_len = min(end, vma->vm_end) - vma->vm_start; 6348c2ecf20Sopenharmony_ci nr_pages += overlap_len >> PAGE_SHIFT; 6358c2ecf20Sopenharmony_ci } 6368c2ecf20Sopenharmony_ci 6378c2ecf20Sopenharmony_ci return nr_pages; 6388c2ecf20Sopenharmony_ci} 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_civoid __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, 6418c2ecf20Sopenharmony_ci struct rb_node **rb_link, struct rb_node *rb_parent) 6428c2ecf20Sopenharmony_ci{ 6438c2ecf20Sopenharmony_ci /* Update tracking information for the gap following the new vma. */ 6448c2ecf20Sopenharmony_ci if (vma->vm_next) 6458c2ecf20Sopenharmony_ci vma_gap_update(vma->vm_next); 6468c2ecf20Sopenharmony_ci else 6478c2ecf20Sopenharmony_ci mm->highest_vm_end = vm_end_gap(vma); 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_ci /* 6508c2ecf20Sopenharmony_ci * vma->vm_prev wasn't known when we followed the rbtree to find the 6518c2ecf20Sopenharmony_ci * correct insertion point for that vma. As a result, we could not 6528c2ecf20Sopenharmony_ci * update the vma vm_rb parents rb_subtree_gap values on the way down. 6538c2ecf20Sopenharmony_ci * So, we first insert the vma with a zero rb_subtree_gap value 6548c2ecf20Sopenharmony_ci * (to be consistent with what we did on the way down), and then 6558c2ecf20Sopenharmony_ci * immediately update the gap to the correct value. Finally we 6568c2ecf20Sopenharmony_ci * rebalance the rbtree after all augmented values have been set. 6578c2ecf20Sopenharmony_ci */ 6588c2ecf20Sopenharmony_ci rb_link_node(&vma->vm_rb, rb_parent, rb_link); 6598c2ecf20Sopenharmony_ci vma->rb_subtree_gap = 0; 6608c2ecf20Sopenharmony_ci vma_gap_update(vma); 6618c2ecf20Sopenharmony_ci vma_rb_insert(vma, &mm->mm_rb); 6628c2ecf20Sopenharmony_ci} 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_cistatic void __vma_link_file(struct vm_area_struct *vma) 6658c2ecf20Sopenharmony_ci{ 6668c2ecf20Sopenharmony_ci struct file *file; 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci file = vma->vm_file; 6698c2ecf20Sopenharmony_ci if (file) { 6708c2ecf20Sopenharmony_ci struct address_space *mapping = file->f_mapping; 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_DENYWRITE) 6738c2ecf20Sopenharmony_ci put_write_access(file_inode(file)); 6748c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_SHARED) 6758c2ecf20Sopenharmony_ci mapping_allow_writable(mapping); 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci flush_dcache_mmap_lock(mapping); 6788c2ecf20Sopenharmony_ci vma_interval_tree_insert(vma, &mapping->i_mmap); 6798c2ecf20Sopenharmony_ci flush_dcache_mmap_unlock(mapping); 6808c2ecf20Sopenharmony_ci } 6818c2ecf20Sopenharmony_ci} 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_cistatic void 6848c2ecf20Sopenharmony_ci__vma_link(struct mm_struct *mm, struct vm_area_struct *vma, 6858c2ecf20Sopenharmony_ci struct vm_area_struct *prev, struct rb_node **rb_link, 6868c2ecf20Sopenharmony_ci struct rb_node *rb_parent) 6878c2ecf20Sopenharmony_ci{ 6888c2ecf20Sopenharmony_ci __vma_link_list(mm, vma, prev); 6898c2ecf20Sopenharmony_ci __vma_link_rb(mm, vma, rb_link, rb_parent); 6908c2ecf20Sopenharmony_ci} 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_cistatic void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, 6938c2ecf20Sopenharmony_ci struct vm_area_struct *prev, struct rb_node **rb_link, 6948c2ecf20Sopenharmony_ci struct rb_node *rb_parent) 6958c2ecf20Sopenharmony_ci{ 6968c2ecf20Sopenharmony_ci struct address_space *mapping = NULL; 6978c2ecf20Sopenharmony_ci 6988c2ecf20Sopenharmony_ci if (vma->vm_file) { 6998c2ecf20Sopenharmony_ci mapping = vma->vm_file->f_mapping; 7008c2ecf20Sopenharmony_ci i_mmap_lock_write(mapping); 7018c2ecf20Sopenharmony_ci } 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci __vma_link(mm, vma, prev, rb_link, rb_parent); 7048c2ecf20Sopenharmony_ci __vma_link_file(vma); 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci if (mapping) 7078c2ecf20Sopenharmony_ci i_mmap_unlock_write(mapping); 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci mm->map_count++; 7108c2ecf20Sopenharmony_ci validate_mm(mm); 7118c2ecf20Sopenharmony_ci} 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci/* 7148c2ecf20Sopenharmony_ci * Helper for vma_adjust() in the split_vma insert case: insert a vma into the 7158c2ecf20Sopenharmony_ci * mm's list and rbtree. It has already been inserted into the interval tree. 7168c2ecf20Sopenharmony_ci */ 7178c2ecf20Sopenharmony_cistatic void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) 7188c2ecf20Sopenharmony_ci{ 7198c2ecf20Sopenharmony_ci struct vm_area_struct *prev; 7208c2ecf20Sopenharmony_ci struct rb_node **rb_link, *rb_parent; 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_ci if (find_vma_links(mm, vma->vm_start, vma->vm_end, 7238c2ecf20Sopenharmony_ci &prev, &rb_link, &rb_parent)) 7248c2ecf20Sopenharmony_ci BUG(); 7258c2ecf20Sopenharmony_ci __vma_link(mm, vma, prev, rb_link, rb_parent); 7268c2ecf20Sopenharmony_ci mm->map_count++; 7278c2ecf20Sopenharmony_ci} 7288c2ecf20Sopenharmony_ci 7298c2ecf20Sopenharmony_cistatic __always_inline void __vma_unlink(struct mm_struct *mm, 7308c2ecf20Sopenharmony_ci struct vm_area_struct *vma, 7318c2ecf20Sopenharmony_ci struct vm_area_struct *ignore) 7328c2ecf20Sopenharmony_ci{ 7338c2ecf20Sopenharmony_ci vma_rb_erase_ignore(vma, &mm->mm_rb, ignore); 7348c2ecf20Sopenharmony_ci __vma_unlink_list(mm, vma); 7358c2ecf20Sopenharmony_ci /* Kill the cache */ 7368c2ecf20Sopenharmony_ci vmacache_invalidate(mm); 7378c2ecf20Sopenharmony_ci} 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_ci/* 7408c2ecf20Sopenharmony_ci * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that 7418c2ecf20Sopenharmony_ci * is already present in an i_mmap tree without adjusting the tree. 7428c2ecf20Sopenharmony_ci * The following helper function should be used when such adjustments 7438c2ecf20Sopenharmony_ci * are necessary. The "insert" vma (if any) is to be inserted 7448c2ecf20Sopenharmony_ci * before we drop the necessary locks. 7458c2ecf20Sopenharmony_ci */ 7468c2ecf20Sopenharmony_ciint __vma_adjust(struct vm_area_struct *vma, unsigned long start, 7478c2ecf20Sopenharmony_ci unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, 7488c2ecf20Sopenharmony_ci struct vm_area_struct *expand) 7498c2ecf20Sopenharmony_ci{ 7508c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 7518c2ecf20Sopenharmony_ci struct vm_area_struct *next = vma->vm_next, *orig_vma = vma; 7528c2ecf20Sopenharmony_ci struct address_space *mapping = NULL; 7538c2ecf20Sopenharmony_ci struct rb_root_cached *root = NULL; 7548c2ecf20Sopenharmony_ci struct anon_vma *anon_vma = NULL; 7558c2ecf20Sopenharmony_ci struct file *file = vma->vm_file; 7568c2ecf20Sopenharmony_ci bool start_changed = false, end_changed = false; 7578c2ecf20Sopenharmony_ci long adjust_next = 0; 7588c2ecf20Sopenharmony_ci int remove_next = 0; 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci if (next && !insert) { 7618c2ecf20Sopenharmony_ci struct vm_area_struct *exporter = NULL, *importer = NULL; 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ci if (end >= next->vm_end) { 7648c2ecf20Sopenharmony_ci /* 7658c2ecf20Sopenharmony_ci * vma expands, overlapping all the next, and 7668c2ecf20Sopenharmony_ci * perhaps the one after too (mprotect case 6). 7678c2ecf20Sopenharmony_ci * The only other cases that gets here are 7688c2ecf20Sopenharmony_ci * case 1, case 7 and case 8. 7698c2ecf20Sopenharmony_ci */ 7708c2ecf20Sopenharmony_ci if (next == expand) { 7718c2ecf20Sopenharmony_ci /* 7728c2ecf20Sopenharmony_ci * The only case where we don't expand "vma" 7738c2ecf20Sopenharmony_ci * and we expand "next" instead is case 8. 7748c2ecf20Sopenharmony_ci */ 7758c2ecf20Sopenharmony_ci VM_WARN_ON(end != next->vm_end); 7768c2ecf20Sopenharmony_ci /* 7778c2ecf20Sopenharmony_ci * remove_next == 3 means we're 7788c2ecf20Sopenharmony_ci * removing "vma" and that to do so we 7798c2ecf20Sopenharmony_ci * swapped "vma" and "next". 7808c2ecf20Sopenharmony_ci */ 7818c2ecf20Sopenharmony_ci remove_next = 3; 7828c2ecf20Sopenharmony_ci VM_WARN_ON(file != next->vm_file); 7838c2ecf20Sopenharmony_ci swap(vma, next); 7848c2ecf20Sopenharmony_ci } else { 7858c2ecf20Sopenharmony_ci VM_WARN_ON(expand != vma); 7868c2ecf20Sopenharmony_ci /* 7878c2ecf20Sopenharmony_ci * case 1, 6, 7, remove_next == 2 is case 6, 7888c2ecf20Sopenharmony_ci * remove_next == 1 is case 1 or 7. 7898c2ecf20Sopenharmony_ci */ 7908c2ecf20Sopenharmony_ci remove_next = 1 + (end > next->vm_end); 7918c2ecf20Sopenharmony_ci VM_WARN_ON(remove_next == 2 && 7928c2ecf20Sopenharmony_ci end != next->vm_next->vm_end); 7938c2ecf20Sopenharmony_ci /* trim end to next, for case 6 first pass */ 7948c2ecf20Sopenharmony_ci end = next->vm_end; 7958c2ecf20Sopenharmony_ci } 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_ci exporter = next; 7988c2ecf20Sopenharmony_ci importer = vma; 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_ci /* 8018c2ecf20Sopenharmony_ci * If next doesn't have anon_vma, import from vma after 8028c2ecf20Sopenharmony_ci * next, if the vma overlaps with it. 8038c2ecf20Sopenharmony_ci */ 8048c2ecf20Sopenharmony_ci if (remove_next == 2 && !next->anon_vma) 8058c2ecf20Sopenharmony_ci exporter = next->vm_next; 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_ci } else if (end > next->vm_start) { 8088c2ecf20Sopenharmony_ci /* 8098c2ecf20Sopenharmony_ci * vma expands, overlapping part of the next: 8108c2ecf20Sopenharmony_ci * mprotect case 5 shifting the boundary up. 8118c2ecf20Sopenharmony_ci */ 8128c2ecf20Sopenharmony_ci adjust_next = (end - next->vm_start); 8138c2ecf20Sopenharmony_ci exporter = next; 8148c2ecf20Sopenharmony_ci importer = vma; 8158c2ecf20Sopenharmony_ci VM_WARN_ON(expand != importer); 8168c2ecf20Sopenharmony_ci } else if (end < vma->vm_end) { 8178c2ecf20Sopenharmony_ci /* 8188c2ecf20Sopenharmony_ci * vma shrinks, and !insert tells it's not 8198c2ecf20Sopenharmony_ci * split_vma inserting another: so it must be 8208c2ecf20Sopenharmony_ci * mprotect case 4 shifting the boundary down. 8218c2ecf20Sopenharmony_ci */ 8228c2ecf20Sopenharmony_ci adjust_next = -(vma->vm_end - end); 8238c2ecf20Sopenharmony_ci exporter = vma; 8248c2ecf20Sopenharmony_ci importer = next; 8258c2ecf20Sopenharmony_ci VM_WARN_ON(expand != importer); 8268c2ecf20Sopenharmony_ci } 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci /* 8298c2ecf20Sopenharmony_ci * Easily overlooked: when mprotect shifts the boundary, 8308c2ecf20Sopenharmony_ci * make sure the expanding vma has anon_vma set if the 8318c2ecf20Sopenharmony_ci * shrinking vma had, to cover any anon pages imported. 8328c2ecf20Sopenharmony_ci */ 8338c2ecf20Sopenharmony_ci if (exporter && exporter->anon_vma && !importer->anon_vma) { 8348c2ecf20Sopenharmony_ci int error; 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci importer->anon_vma = exporter->anon_vma; 8378c2ecf20Sopenharmony_ci error = anon_vma_clone(importer, exporter); 8388c2ecf20Sopenharmony_ci if (error) 8398c2ecf20Sopenharmony_ci return error; 8408c2ecf20Sopenharmony_ci } 8418c2ecf20Sopenharmony_ci } 8428c2ecf20Sopenharmony_ciagain: 8438c2ecf20Sopenharmony_ci vma_adjust_trans_huge(orig_vma, start, end, adjust_next); 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_ci if (file) { 8468c2ecf20Sopenharmony_ci mapping = file->f_mapping; 8478c2ecf20Sopenharmony_ci root = &mapping->i_mmap; 8488c2ecf20Sopenharmony_ci uprobe_munmap(vma, vma->vm_start, vma->vm_end); 8498c2ecf20Sopenharmony_ci 8508c2ecf20Sopenharmony_ci if (adjust_next) 8518c2ecf20Sopenharmony_ci uprobe_munmap(next, next->vm_start, next->vm_end); 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci i_mmap_lock_write(mapping); 8548c2ecf20Sopenharmony_ci if (insert) { 8558c2ecf20Sopenharmony_ci /* 8568c2ecf20Sopenharmony_ci * Put into interval tree now, so instantiated pages 8578c2ecf20Sopenharmony_ci * are visible to arm/parisc __flush_dcache_page 8588c2ecf20Sopenharmony_ci * throughout; but we cannot insert into address 8598c2ecf20Sopenharmony_ci * space until vma start or end is updated. 8608c2ecf20Sopenharmony_ci */ 8618c2ecf20Sopenharmony_ci __vma_link_file(insert); 8628c2ecf20Sopenharmony_ci } 8638c2ecf20Sopenharmony_ci } 8648c2ecf20Sopenharmony_ci 8658c2ecf20Sopenharmony_ci anon_vma = vma->anon_vma; 8668c2ecf20Sopenharmony_ci if (!anon_vma && adjust_next) 8678c2ecf20Sopenharmony_ci anon_vma = next->anon_vma; 8688c2ecf20Sopenharmony_ci if (anon_vma) { 8698c2ecf20Sopenharmony_ci VM_WARN_ON(adjust_next && next->anon_vma && 8708c2ecf20Sopenharmony_ci anon_vma != next->anon_vma); 8718c2ecf20Sopenharmony_ci anon_vma_lock_write(anon_vma); 8728c2ecf20Sopenharmony_ci anon_vma_interval_tree_pre_update_vma(vma); 8738c2ecf20Sopenharmony_ci if (adjust_next) 8748c2ecf20Sopenharmony_ci anon_vma_interval_tree_pre_update_vma(next); 8758c2ecf20Sopenharmony_ci } 8768c2ecf20Sopenharmony_ci 8778c2ecf20Sopenharmony_ci if (file) { 8788c2ecf20Sopenharmony_ci flush_dcache_mmap_lock(mapping); 8798c2ecf20Sopenharmony_ci vma_interval_tree_remove(vma, root); 8808c2ecf20Sopenharmony_ci if (adjust_next) 8818c2ecf20Sopenharmony_ci vma_interval_tree_remove(next, root); 8828c2ecf20Sopenharmony_ci } 8838c2ecf20Sopenharmony_ci 8848c2ecf20Sopenharmony_ci if (start != vma->vm_start) { 8858c2ecf20Sopenharmony_ci vma->vm_start = start; 8868c2ecf20Sopenharmony_ci start_changed = true; 8878c2ecf20Sopenharmony_ci } 8888c2ecf20Sopenharmony_ci if (end != vma->vm_end) { 8898c2ecf20Sopenharmony_ci vma->vm_end = end; 8908c2ecf20Sopenharmony_ci end_changed = true; 8918c2ecf20Sopenharmony_ci } 8928c2ecf20Sopenharmony_ci vma->vm_pgoff = pgoff; 8938c2ecf20Sopenharmony_ci if (adjust_next) { 8948c2ecf20Sopenharmony_ci next->vm_start += adjust_next; 8958c2ecf20Sopenharmony_ci next->vm_pgoff += adjust_next >> PAGE_SHIFT; 8968c2ecf20Sopenharmony_ci } 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci if (file) { 8998c2ecf20Sopenharmony_ci if (adjust_next) 9008c2ecf20Sopenharmony_ci vma_interval_tree_insert(next, root); 9018c2ecf20Sopenharmony_ci vma_interval_tree_insert(vma, root); 9028c2ecf20Sopenharmony_ci flush_dcache_mmap_unlock(mapping); 9038c2ecf20Sopenharmony_ci } 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci if (remove_next) { 9068c2ecf20Sopenharmony_ci /* 9078c2ecf20Sopenharmony_ci * vma_merge has merged next into vma, and needs 9088c2ecf20Sopenharmony_ci * us to remove next before dropping the locks. 9098c2ecf20Sopenharmony_ci */ 9108c2ecf20Sopenharmony_ci if (remove_next != 3) 9118c2ecf20Sopenharmony_ci __vma_unlink(mm, next, next); 9128c2ecf20Sopenharmony_ci else 9138c2ecf20Sopenharmony_ci /* 9148c2ecf20Sopenharmony_ci * vma is not before next if they've been 9158c2ecf20Sopenharmony_ci * swapped. 9168c2ecf20Sopenharmony_ci * 9178c2ecf20Sopenharmony_ci * pre-swap() next->vm_start was reduced so 9188c2ecf20Sopenharmony_ci * tell validate_mm_rb to ignore pre-swap() 9198c2ecf20Sopenharmony_ci * "next" (which is stored in post-swap() 9208c2ecf20Sopenharmony_ci * "vma"). 9218c2ecf20Sopenharmony_ci */ 9228c2ecf20Sopenharmony_ci __vma_unlink(mm, next, vma); 9238c2ecf20Sopenharmony_ci if (file) 9248c2ecf20Sopenharmony_ci __remove_shared_vm_struct(next, file, mapping); 9258c2ecf20Sopenharmony_ci } else if (insert) { 9268c2ecf20Sopenharmony_ci /* 9278c2ecf20Sopenharmony_ci * split_vma has split insert from vma, and needs 9288c2ecf20Sopenharmony_ci * us to insert it before dropping the locks 9298c2ecf20Sopenharmony_ci * (it may either follow vma or precede it). 9308c2ecf20Sopenharmony_ci */ 9318c2ecf20Sopenharmony_ci __insert_vm_struct(mm, insert); 9328c2ecf20Sopenharmony_ci } else { 9338c2ecf20Sopenharmony_ci if (start_changed) 9348c2ecf20Sopenharmony_ci vma_gap_update(vma); 9358c2ecf20Sopenharmony_ci if (end_changed) { 9368c2ecf20Sopenharmony_ci if (!next) 9378c2ecf20Sopenharmony_ci mm->highest_vm_end = vm_end_gap(vma); 9388c2ecf20Sopenharmony_ci else if (!adjust_next) 9398c2ecf20Sopenharmony_ci vma_gap_update(next); 9408c2ecf20Sopenharmony_ci } 9418c2ecf20Sopenharmony_ci } 9428c2ecf20Sopenharmony_ci 9438c2ecf20Sopenharmony_ci if (anon_vma) { 9448c2ecf20Sopenharmony_ci anon_vma_interval_tree_post_update_vma(vma); 9458c2ecf20Sopenharmony_ci if (adjust_next) 9468c2ecf20Sopenharmony_ci anon_vma_interval_tree_post_update_vma(next); 9478c2ecf20Sopenharmony_ci anon_vma_unlock_write(anon_vma); 9488c2ecf20Sopenharmony_ci } 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci if (file) { 9518c2ecf20Sopenharmony_ci i_mmap_unlock_write(mapping); 9528c2ecf20Sopenharmony_ci uprobe_mmap(vma); 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_ci if (adjust_next) 9558c2ecf20Sopenharmony_ci uprobe_mmap(next); 9568c2ecf20Sopenharmony_ci } 9578c2ecf20Sopenharmony_ci 9588c2ecf20Sopenharmony_ci if (remove_next) { 9598c2ecf20Sopenharmony_ci if (file) { 9608c2ecf20Sopenharmony_ci uprobe_munmap(next, next->vm_start, next->vm_end); 9618c2ecf20Sopenharmony_ci fput(file); 9628c2ecf20Sopenharmony_ci } 9638c2ecf20Sopenharmony_ci if (next->anon_vma) 9648c2ecf20Sopenharmony_ci anon_vma_merge(vma, next); 9658c2ecf20Sopenharmony_ci mm->map_count--; 9668c2ecf20Sopenharmony_ci mpol_put(vma_policy(next)); 9678c2ecf20Sopenharmony_ci vm_area_free(next); 9688c2ecf20Sopenharmony_ci /* 9698c2ecf20Sopenharmony_ci * In mprotect's case 6 (see comments on vma_merge), 9708c2ecf20Sopenharmony_ci * we must remove another next too. It would clutter 9718c2ecf20Sopenharmony_ci * up the code too much to do both in one go. 9728c2ecf20Sopenharmony_ci */ 9738c2ecf20Sopenharmony_ci if (remove_next != 3) { 9748c2ecf20Sopenharmony_ci /* 9758c2ecf20Sopenharmony_ci * If "next" was removed and vma->vm_end was 9768c2ecf20Sopenharmony_ci * expanded (up) over it, in turn 9778c2ecf20Sopenharmony_ci * "next->vm_prev->vm_end" changed and the 9788c2ecf20Sopenharmony_ci * "vma->vm_next" gap must be updated. 9798c2ecf20Sopenharmony_ci */ 9808c2ecf20Sopenharmony_ci next = vma->vm_next; 9818c2ecf20Sopenharmony_ci } else { 9828c2ecf20Sopenharmony_ci /* 9838c2ecf20Sopenharmony_ci * For the scope of the comment "next" and 9848c2ecf20Sopenharmony_ci * "vma" considered pre-swap(): if "vma" was 9858c2ecf20Sopenharmony_ci * removed, next->vm_start was expanded (down) 9868c2ecf20Sopenharmony_ci * over it and the "next" gap must be updated. 9878c2ecf20Sopenharmony_ci * Because of the swap() the post-swap() "vma" 9888c2ecf20Sopenharmony_ci * actually points to pre-swap() "next" 9898c2ecf20Sopenharmony_ci * (post-swap() "next" as opposed is now a 9908c2ecf20Sopenharmony_ci * dangling pointer). 9918c2ecf20Sopenharmony_ci */ 9928c2ecf20Sopenharmony_ci next = vma; 9938c2ecf20Sopenharmony_ci } 9948c2ecf20Sopenharmony_ci if (remove_next == 2) { 9958c2ecf20Sopenharmony_ci remove_next = 1; 9968c2ecf20Sopenharmony_ci end = next->vm_end; 9978c2ecf20Sopenharmony_ci goto again; 9988c2ecf20Sopenharmony_ci } 9998c2ecf20Sopenharmony_ci else if (next) 10008c2ecf20Sopenharmony_ci vma_gap_update(next); 10018c2ecf20Sopenharmony_ci else { 10028c2ecf20Sopenharmony_ci /* 10038c2ecf20Sopenharmony_ci * If remove_next == 2 we obviously can't 10048c2ecf20Sopenharmony_ci * reach this path. 10058c2ecf20Sopenharmony_ci * 10068c2ecf20Sopenharmony_ci * If remove_next == 3 we can't reach this 10078c2ecf20Sopenharmony_ci * path because pre-swap() next is always not 10088c2ecf20Sopenharmony_ci * NULL. pre-swap() "next" is not being 10098c2ecf20Sopenharmony_ci * removed and its next->vm_end is not altered 10108c2ecf20Sopenharmony_ci * (and furthermore "end" already matches 10118c2ecf20Sopenharmony_ci * next->vm_end in remove_next == 3). 10128c2ecf20Sopenharmony_ci * 10138c2ecf20Sopenharmony_ci * We reach this only in the remove_next == 1 10148c2ecf20Sopenharmony_ci * case if the "next" vma that was removed was 10158c2ecf20Sopenharmony_ci * the highest vma of the mm. However in such 10168c2ecf20Sopenharmony_ci * case next->vm_end == "end" and the extended 10178c2ecf20Sopenharmony_ci * "vma" has vma->vm_end == next->vm_end so 10188c2ecf20Sopenharmony_ci * mm->highest_vm_end doesn't need any update 10198c2ecf20Sopenharmony_ci * in remove_next == 1 case. 10208c2ecf20Sopenharmony_ci */ 10218c2ecf20Sopenharmony_ci VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma)); 10228c2ecf20Sopenharmony_ci } 10238c2ecf20Sopenharmony_ci } 10248c2ecf20Sopenharmony_ci if (insert && file) 10258c2ecf20Sopenharmony_ci uprobe_mmap(insert); 10268c2ecf20Sopenharmony_ci 10278c2ecf20Sopenharmony_ci validate_mm(mm); 10288c2ecf20Sopenharmony_ci 10298c2ecf20Sopenharmony_ci return 0; 10308c2ecf20Sopenharmony_ci} 10318c2ecf20Sopenharmony_ci 10328c2ecf20Sopenharmony_ci/* 10338c2ecf20Sopenharmony_ci * If the vma has a ->close operation then the driver probably needs to release 10348c2ecf20Sopenharmony_ci * per-vma resources, so we don't attempt to merge those. 10358c2ecf20Sopenharmony_ci */ 10368c2ecf20Sopenharmony_cistatic inline int is_mergeable_vma(struct vm_area_struct *vma, 10378c2ecf20Sopenharmony_ci struct file *file, unsigned long vm_flags, 10388c2ecf20Sopenharmony_ci struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 10398c2ecf20Sopenharmony_ci struct anon_vma_name *anon_name) 10408c2ecf20Sopenharmony_ci{ 10418c2ecf20Sopenharmony_ci /* 10428c2ecf20Sopenharmony_ci * VM_SOFTDIRTY should not prevent from VMA merging, if we 10438c2ecf20Sopenharmony_ci * match the flags but dirty bit -- the caller should mark 10448c2ecf20Sopenharmony_ci * merged VMA as dirty. If dirty bit won't be excluded from 10458c2ecf20Sopenharmony_ci * comparison, we increase pressure on the memory system forcing 10468c2ecf20Sopenharmony_ci * the kernel to generate new VMAs when old one could be 10478c2ecf20Sopenharmony_ci * extended instead. 10488c2ecf20Sopenharmony_ci */ 10498c2ecf20Sopenharmony_ci if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY) 10508c2ecf20Sopenharmony_ci return 0; 10518c2ecf20Sopenharmony_ci if (vma->vm_file != file) 10528c2ecf20Sopenharmony_ci return 0; 10538c2ecf20Sopenharmony_ci if (vma->vm_ops && vma->vm_ops->close) 10548c2ecf20Sopenharmony_ci return 0; 10558c2ecf20Sopenharmony_ci if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx)) 10568c2ecf20Sopenharmony_ci return 0; 10578c2ecf20Sopenharmony_ci if (!anon_vma_name_eq(anon_vma_name(vma), anon_name)) 10588c2ecf20Sopenharmony_ci return 0; 10598c2ecf20Sopenharmony_ci return 1; 10608c2ecf20Sopenharmony_ci} 10618c2ecf20Sopenharmony_ci 10628c2ecf20Sopenharmony_cistatic inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1, 10638c2ecf20Sopenharmony_ci struct anon_vma *anon_vma2, 10648c2ecf20Sopenharmony_ci struct vm_area_struct *vma) 10658c2ecf20Sopenharmony_ci{ 10668c2ecf20Sopenharmony_ci /* 10678c2ecf20Sopenharmony_ci * The list_is_singular() test is to avoid merging VMA cloned from 10688c2ecf20Sopenharmony_ci * parents. This can improve scalability caused by anon_vma lock. 10698c2ecf20Sopenharmony_ci */ 10708c2ecf20Sopenharmony_ci if ((!anon_vma1 || !anon_vma2) && (!vma || 10718c2ecf20Sopenharmony_ci list_is_singular(&vma->anon_vma_chain))) 10728c2ecf20Sopenharmony_ci return 1; 10738c2ecf20Sopenharmony_ci return anon_vma1 == anon_vma2; 10748c2ecf20Sopenharmony_ci} 10758c2ecf20Sopenharmony_ci 10768c2ecf20Sopenharmony_ci/* 10778c2ecf20Sopenharmony_ci * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff) 10788c2ecf20Sopenharmony_ci * in front of (at a lower virtual address and file offset than) the vma. 10798c2ecf20Sopenharmony_ci * 10808c2ecf20Sopenharmony_ci * We cannot merge two vmas if they have differently assigned (non-NULL) 10818c2ecf20Sopenharmony_ci * anon_vmas, nor if same anon_vma is assigned but offsets incompatible. 10828c2ecf20Sopenharmony_ci * 10838c2ecf20Sopenharmony_ci * We don't check here for the merged mmap wrapping around the end of pagecache 10848c2ecf20Sopenharmony_ci * indices (16TB on ia32) because do_mmap() does not permit mmap's which 10858c2ecf20Sopenharmony_ci * wrap, nor mmaps which cover the final page at index -1UL. 10868c2ecf20Sopenharmony_ci */ 10878c2ecf20Sopenharmony_cistatic int 10888c2ecf20Sopenharmony_cican_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, 10898c2ecf20Sopenharmony_ci struct anon_vma *anon_vma, struct file *file, 10908c2ecf20Sopenharmony_ci pgoff_t vm_pgoff, 10918c2ecf20Sopenharmony_ci struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 10928c2ecf20Sopenharmony_ci struct anon_vma_name *anon_name) 10938c2ecf20Sopenharmony_ci{ 10948c2ecf20Sopenharmony_ci if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && 10958c2ecf20Sopenharmony_ci is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { 10968c2ecf20Sopenharmony_ci if (vma->vm_pgoff == vm_pgoff) 10978c2ecf20Sopenharmony_ci return 1; 10988c2ecf20Sopenharmony_ci } 10998c2ecf20Sopenharmony_ci return 0; 11008c2ecf20Sopenharmony_ci} 11018c2ecf20Sopenharmony_ci 11028c2ecf20Sopenharmony_ci/* 11038c2ecf20Sopenharmony_ci * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff) 11048c2ecf20Sopenharmony_ci * beyond (at a higher virtual address and file offset than) the vma. 11058c2ecf20Sopenharmony_ci * 11068c2ecf20Sopenharmony_ci * We cannot merge two vmas if they have differently assigned (non-NULL) 11078c2ecf20Sopenharmony_ci * anon_vmas, nor if same anon_vma is assigned but offsets incompatible. 11088c2ecf20Sopenharmony_ci */ 11098c2ecf20Sopenharmony_cistatic int 11108c2ecf20Sopenharmony_cican_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, 11118c2ecf20Sopenharmony_ci struct anon_vma *anon_vma, struct file *file, 11128c2ecf20Sopenharmony_ci pgoff_t vm_pgoff, 11138c2ecf20Sopenharmony_ci struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 11148c2ecf20Sopenharmony_ci struct anon_vma_name *anon_name) 11158c2ecf20Sopenharmony_ci{ 11168c2ecf20Sopenharmony_ci if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && 11178c2ecf20Sopenharmony_ci is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { 11188c2ecf20Sopenharmony_ci pgoff_t vm_pglen; 11198c2ecf20Sopenharmony_ci vm_pglen = vma_pages(vma); 11208c2ecf20Sopenharmony_ci if (vma->vm_pgoff + vm_pglen == vm_pgoff) 11218c2ecf20Sopenharmony_ci return 1; 11228c2ecf20Sopenharmony_ci } 11238c2ecf20Sopenharmony_ci return 0; 11248c2ecf20Sopenharmony_ci} 11258c2ecf20Sopenharmony_ci 11268c2ecf20Sopenharmony_ci/* 11278c2ecf20Sopenharmony_ci * Given a mapping request (addr,end,vm_flags,file,pgoff,anon_name), 11288c2ecf20Sopenharmony_ci * figure out whether that can be merged with its predecessor or its 11298c2ecf20Sopenharmony_ci * successor. Or both (it neatly fills a hole). 11308c2ecf20Sopenharmony_ci * 11318c2ecf20Sopenharmony_ci * In most cases - when called for mmap, brk or mremap - [addr,end) is 11328c2ecf20Sopenharmony_ci * certain not to be mapped by the time vma_merge is called; but when 11338c2ecf20Sopenharmony_ci * called for mprotect, it is certain to be already mapped (either at 11348c2ecf20Sopenharmony_ci * an offset within prev, or at the start of next), and the flags of 11358c2ecf20Sopenharmony_ci * this area are about to be changed to vm_flags - and the no-change 11368c2ecf20Sopenharmony_ci * case has already been eliminated. 11378c2ecf20Sopenharmony_ci * 11388c2ecf20Sopenharmony_ci * The following mprotect cases have to be considered, where AAAA is 11398c2ecf20Sopenharmony_ci * the area passed down from mprotect_fixup, never extending beyond one 11408c2ecf20Sopenharmony_ci * vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after: 11418c2ecf20Sopenharmony_ci * 11428c2ecf20Sopenharmony_ci * AAAA AAAA AAAA 11438c2ecf20Sopenharmony_ci * PPPPPPNNNNNN PPPPPPNNNNNN PPPPPPNNNNNN 11448c2ecf20Sopenharmony_ci * cannot merge might become might become 11458c2ecf20Sopenharmony_ci * PPNNNNNNNNNN PPPPPPPPPPNN 11468c2ecf20Sopenharmony_ci * mmap, brk or case 4 below case 5 below 11478c2ecf20Sopenharmony_ci * mremap move: 11488c2ecf20Sopenharmony_ci * AAAA AAAA 11498c2ecf20Sopenharmony_ci * PPPP NNNN PPPPNNNNXXXX 11508c2ecf20Sopenharmony_ci * might become might become 11518c2ecf20Sopenharmony_ci * PPPPPPPPPPPP 1 or PPPPPPPPPPPP 6 or 11528c2ecf20Sopenharmony_ci * PPPPPPPPNNNN 2 or PPPPPPPPXXXX 7 or 11538c2ecf20Sopenharmony_ci * PPPPNNNNNNNN 3 PPPPXXXXXXXX 8 11548c2ecf20Sopenharmony_ci * 11558c2ecf20Sopenharmony_ci * It is important for case 8 that the vma NNNN overlapping the 11568c2ecf20Sopenharmony_ci * region AAAA is never going to extended over XXXX. Instead XXXX must 11578c2ecf20Sopenharmony_ci * be extended in region AAAA and NNNN must be removed. This way in 11588c2ecf20Sopenharmony_ci * all cases where vma_merge succeeds, the moment vma_adjust drops the 11598c2ecf20Sopenharmony_ci * rmap_locks, the properties of the merged vma will be already 11608c2ecf20Sopenharmony_ci * correct for the whole merged range. Some of those properties like 11618c2ecf20Sopenharmony_ci * vm_page_prot/vm_flags may be accessed by rmap_walks and they must 11628c2ecf20Sopenharmony_ci * be correct for the whole merged range immediately after the 11638c2ecf20Sopenharmony_ci * rmap_locks are released. Otherwise if XXXX would be removed and 11648c2ecf20Sopenharmony_ci * NNNN would be extended over the XXXX range, remove_migration_ptes 11658c2ecf20Sopenharmony_ci * or other rmap walkers (if working on addresses beyond the "end" 11668c2ecf20Sopenharmony_ci * parameter) may establish ptes with the wrong permissions of NNNN 11678c2ecf20Sopenharmony_ci * instead of the right permissions of XXXX. 11688c2ecf20Sopenharmony_ci */ 11698c2ecf20Sopenharmony_cistruct vm_area_struct *vma_merge(struct mm_struct *mm, 11708c2ecf20Sopenharmony_ci struct vm_area_struct *prev, unsigned long addr, 11718c2ecf20Sopenharmony_ci unsigned long end, unsigned long vm_flags, 11728c2ecf20Sopenharmony_ci struct anon_vma *anon_vma, struct file *file, 11738c2ecf20Sopenharmony_ci pgoff_t pgoff, struct mempolicy *policy, 11748c2ecf20Sopenharmony_ci struct vm_userfaultfd_ctx vm_userfaultfd_ctx, 11758c2ecf20Sopenharmony_ci struct anon_vma_name *anon_name) 11768c2ecf20Sopenharmony_ci{ 11778c2ecf20Sopenharmony_ci pgoff_t pglen = (end - addr) >> PAGE_SHIFT; 11788c2ecf20Sopenharmony_ci struct vm_area_struct *area, *next; 11798c2ecf20Sopenharmony_ci int err; 11808c2ecf20Sopenharmony_ci 11818c2ecf20Sopenharmony_ci /* 11828c2ecf20Sopenharmony_ci * We later require that vma->vm_flags == vm_flags, 11838c2ecf20Sopenharmony_ci * so this tests vma->vm_flags & VM_SPECIAL, too. 11848c2ecf20Sopenharmony_ci */ 11858c2ecf20Sopenharmony_ci if (vm_flags & VM_SPECIAL) 11868c2ecf20Sopenharmony_ci return NULL; 11878c2ecf20Sopenharmony_ci 11888c2ecf20Sopenharmony_ci next = vma_next(mm, prev); 11898c2ecf20Sopenharmony_ci area = next; 11908c2ecf20Sopenharmony_ci if (area && area->vm_end == end) /* cases 6, 7, 8 */ 11918c2ecf20Sopenharmony_ci next = next->vm_next; 11928c2ecf20Sopenharmony_ci 11938c2ecf20Sopenharmony_ci /* verify some invariant that must be enforced by the caller */ 11948c2ecf20Sopenharmony_ci VM_WARN_ON(prev && addr <= prev->vm_start); 11958c2ecf20Sopenharmony_ci VM_WARN_ON(area && end > area->vm_end); 11968c2ecf20Sopenharmony_ci VM_WARN_ON(addr >= end); 11978c2ecf20Sopenharmony_ci 11988c2ecf20Sopenharmony_ci /* 11998c2ecf20Sopenharmony_ci * Can it merge with the predecessor? 12008c2ecf20Sopenharmony_ci */ 12018c2ecf20Sopenharmony_ci if (prev && prev->vm_end == addr && 12028c2ecf20Sopenharmony_ci mpol_equal(vma_policy(prev), policy) && 12038c2ecf20Sopenharmony_ci can_vma_merge_after(prev, vm_flags, 12048c2ecf20Sopenharmony_ci anon_vma, file, pgoff, 12058c2ecf20Sopenharmony_ci vm_userfaultfd_ctx, anon_name)) { 12068c2ecf20Sopenharmony_ci /* 12078c2ecf20Sopenharmony_ci * OK, it can. Can we now merge in the successor as well? 12088c2ecf20Sopenharmony_ci */ 12098c2ecf20Sopenharmony_ci if (next && end == next->vm_start && 12108c2ecf20Sopenharmony_ci mpol_equal(policy, vma_policy(next)) && 12118c2ecf20Sopenharmony_ci can_vma_merge_before(next, vm_flags, 12128c2ecf20Sopenharmony_ci anon_vma, file, 12138c2ecf20Sopenharmony_ci pgoff+pglen, 12148c2ecf20Sopenharmony_ci vm_userfaultfd_ctx, anon_name) && 12158c2ecf20Sopenharmony_ci is_mergeable_anon_vma(prev->anon_vma, 12168c2ecf20Sopenharmony_ci next->anon_vma, NULL)) { 12178c2ecf20Sopenharmony_ci /* cases 1, 6 */ 12188c2ecf20Sopenharmony_ci err = __vma_adjust(prev, prev->vm_start, 12198c2ecf20Sopenharmony_ci next->vm_end, prev->vm_pgoff, NULL, 12208c2ecf20Sopenharmony_ci prev); 12218c2ecf20Sopenharmony_ci } else /* cases 2, 5, 7 */ 12228c2ecf20Sopenharmony_ci err = __vma_adjust(prev, prev->vm_start, 12238c2ecf20Sopenharmony_ci end, prev->vm_pgoff, NULL, prev); 12248c2ecf20Sopenharmony_ci if (err) 12258c2ecf20Sopenharmony_ci return NULL; 12268c2ecf20Sopenharmony_ci khugepaged_enter_vma_merge(prev, vm_flags); 12278c2ecf20Sopenharmony_ci return prev; 12288c2ecf20Sopenharmony_ci } 12298c2ecf20Sopenharmony_ci 12308c2ecf20Sopenharmony_ci /* 12318c2ecf20Sopenharmony_ci * Can this new request be merged in front of next? 12328c2ecf20Sopenharmony_ci */ 12338c2ecf20Sopenharmony_ci if (next && end == next->vm_start && 12348c2ecf20Sopenharmony_ci mpol_equal(policy, vma_policy(next)) && 12358c2ecf20Sopenharmony_ci can_vma_merge_before(next, vm_flags, 12368c2ecf20Sopenharmony_ci anon_vma, file, pgoff+pglen, 12378c2ecf20Sopenharmony_ci vm_userfaultfd_ctx, anon_name)) { 12388c2ecf20Sopenharmony_ci if (prev && addr < prev->vm_end) /* case 4 */ 12398c2ecf20Sopenharmony_ci err = __vma_adjust(prev, prev->vm_start, 12408c2ecf20Sopenharmony_ci addr, prev->vm_pgoff, NULL, next); 12418c2ecf20Sopenharmony_ci else { /* cases 3, 8 */ 12428c2ecf20Sopenharmony_ci err = __vma_adjust(area, addr, next->vm_end, 12438c2ecf20Sopenharmony_ci next->vm_pgoff - pglen, NULL, next); 12448c2ecf20Sopenharmony_ci /* 12458c2ecf20Sopenharmony_ci * In case 3 area is already equal to next and 12468c2ecf20Sopenharmony_ci * this is a noop, but in case 8 "area" has 12478c2ecf20Sopenharmony_ci * been removed and next was expanded over it. 12488c2ecf20Sopenharmony_ci */ 12498c2ecf20Sopenharmony_ci area = next; 12508c2ecf20Sopenharmony_ci } 12518c2ecf20Sopenharmony_ci if (err) 12528c2ecf20Sopenharmony_ci return NULL; 12538c2ecf20Sopenharmony_ci khugepaged_enter_vma_merge(area, vm_flags); 12548c2ecf20Sopenharmony_ci return area; 12558c2ecf20Sopenharmony_ci } 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_ci return NULL; 12588c2ecf20Sopenharmony_ci} 12598c2ecf20Sopenharmony_ci 12608c2ecf20Sopenharmony_ci/* 12618c2ecf20Sopenharmony_ci * Rough compatibility check to quickly see if it's even worth looking 12628c2ecf20Sopenharmony_ci * at sharing an anon_vma. 12638c2ecf20Sopenharmony_ci * 12648c2ecf20Sopenharmony_ci * They need to have the same vm_file, and the flags can only differ 12658c2ecf20Sopenharmony_ci * in things that mprotect may change. 12668c2ecf20Sopenharmony_ci * 12678c2ecf20Sopenharmony_ci * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that 12688c2ecf20Sopenharmony_ci * we can merge the two vma's. For example, we refuse to merge a vma if 12698c2ecf20Sopenharmony_ci * there is a vm_ops->close() function, because that indicates that the 12708c2ecf20Sopenharmony_ci * driver is doing some kind of reference counting. But that doesn't 12718c2ecf20Sopenharmony_ci * really matter for the anon_vma sharing case. 12728c2ecf20Sopenharmony_ci */ 12738c2ecf20Sopenharmony_cistatic int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b) 12748c2ecf20Sopenharmony_ci{ 12758c2ecf20Sopenharmony_ci return a->vm_end == b->vm_start && 12768c2ecf20Sopenharmony_ci mpol_equal(vma_policy(a), vma_policy(b)) && 12778c2ecf20Sopenharmony_ci a->vm_file == b->vm_file && 12788c2ecf20Sopenharmony_ci !((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_SOFTDIRTY)) && 12798c2ecf20Sopenharmony_ci b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); 12808c2ecf20Sopenharmony_ci} 12818c2ecf20Sopenharmony_ci 12828c2ecf20Sopenharmony_ci/* 12838c2ecf20Sopenharmony_ci * Do some basic sanity checking to see if we can re-use the anon_vma 12848c2ecf20Sopenharmony_ci * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be 12858c2ecf20Sopenharmony_ci * the same as 'old', the other will be the new one that is trying 12868c2ecf20Sopenharmony_ci * to share the anon_vma. 12878c2ecf20Sopenharmony_ci * 12888c2ecf20Sopenharmony_ci * NOTE! This runs with mm_sem held for reading, so it is possible that 12898c2ecf20Sopenharmony_ci * the anon_vma of 'old' is concurrently in the process of being set up 12908c2ecf20Sopenharmony_ci * by another page fault trying to merge _that_. But that's ok: if it 12918c2ecf20Sopenharmony_ci * is being set up, that automatically means that it will be a singleton 12928c2ecf20Sopenharmony_ci * acceptable for merging, so we can do all of this optimistically. But 12938c2ecf20Sopenharmony_ci * we do that READ_ONCE() to make sure that we never re-load the pointer. 12948c2ecf20Sopenharmony_ci * 12958c2ecf20Sopenharmony_ci * IOW: that the "list_is_singular()" test on the anon_vma_chain only 12968c2ecf20Sopenharmony_ci * matters for the 'stable anon_vma' case (ie the thing we want to avoid 12978c2ecf20Sopenharmony_ci * is to return an anon_vma that is "complex" due to having gone through 12988c2ecf20Sopenharmony_ci * a fork). 12998c2ecf20Sopenharmony_ci * 13008c2ecf20Sopenharmony_ci * We also make sure that the two vma's are compatible (adjacent, 13018c2ecf20Sopenharmony_ci * and with the same memory policies). That's all stable, even with just 13028c2ecf20Sopenharmony_ci * a read lock on the mm_sem. 13038c2ecf20Sopenharmony_ci */ 13048c2ecf20Sopenharmony_cistatic struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b) 13058c2ecf20Sopenharmony_ci{ 13068c2ecf20Sopenharmony_ci if (anon_vma_compatible(a, b)) { 13078c2ecf20Sopenharmony_ci struct anon_vma *anon_vma = READ_ONCE(old->anon_vma); 13088c2ecf20Sopenharmony_ci 13098c2ecf20Sopenharmony_ci if (anon_vma && list_is_singular(&old->anon_vma_chain)) 13108c2ecf20Sopenharmony_ci return anon_vma; 13118c2ecf20Sopenharmony_ci } 13128c2ecf20Sopenharmony_ci return NULL; 13138c2ecf20Sopenharmony_ci} 13148c2ecf20Sopenharmony_ci 13158c2ecf20Sopenharmony_ci/* 13168c2ecf20Sopenharmony_ci * find_mergeable_anon_vma is used by anon_vma_prepare, to check 13178c2ecf20Sopenharmony_ci * neighbouring vmas for a suitable anon_vma, before it goes off 13188c2ecf20Sopenharmony_ci * to allocate a new anon_vma. It checks because a repetitive 13198c2ecf20Sopenharmony_ci * sequence of mprotects and faults may otherwise lead to distinct 13208c2ecf20Sopenharmony_ci * anon_vmas being allocated, preventing vma merge in subsequent 13218c2ecf20Sopenharmony_ci * mprotect. 13228c2ecf20Sopenharmony_ci */ 13238c2ecf20Sopenharmony_cistruct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) 13248c2ecf20Sopenharmony_ci{ 13258c2ecf20Sopenharmony_ci struct anon_vma *anon_vma = NULL; 13268c2ecf20Sopenharmony_ci 13278c2ecf20Sopenharmony_ci /* Try next first. */ 13288c2ecf20Sopenharmony_ci if (vma->vm_next) { 13298c2ecf20Sopenharmony_ci anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next); 13308c2ecf20Sopenharmony_ci if (anon_vma) 13318c2ecf20Sopenharmony_ci return anon_vma; 13328c2ecf20Sopenharmony_ci } 13338c2ecf20Sopenharmony_ci 13348c2ecf20Sopenharmony_ci /* Try prev next. */ 13358c2ecf20Sopenharmony_ci if (vma->vm_prev) 13368c2ecf20Sopenharmony_ci anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma); 13378c2ecf20Sopenharmony_ci 13388c2ecf20Sopenharmony_ci /* 13398c2ecf20Sopenharmony_ci * We might reach here with anon_vma == NULL if we can't find 13408c2ecf20Sopenharmony_ci * any reusable anon_vma. 13418c2ecf20Sopenharmony_ci * There's no absolute need to look only at touching neighbours: 13428c2ecf20Sopenharmony_ci * we could search further afield for "compatible" anon_vmas. 13438c2ecf20Sopenharmony_ci * But it would probably just be a waste of time searching, 13448c2ecf20Sopenharmony_ci * or lead to too many vmas hanging off the same anon_vma. 13458c2ecf20Sopenharmony_ci * We're trying to allow mprotect remerging later on, 13468c2ecf20Sopenharmony_ci * not trying to minimize memory used for anon_vmas. 13478c2ecf20Sopenharmony_ci */ 13488c2ecf20Sopenharmony_ci return anon_vma; 13498c2ecf20Sopenharmony_ci} 13508c2ecf20Sopenharmony_ci 13518c2ecf20Sopenharmony_ci/* 13528c2ecf20Sopenharmony_ci * If a hint addr is less than mmap_min_addr change hint to be as 13538c2ecf20Sopenharmony_ci * low as possible but still greater than mmap_min_addr 13548c2ecf20Sopenharmony_ci */ 13558c2ecf20Sopenharmony_cistatic inline unsigned long round_hint_to_min(unsigned long hint) 13568c2ecf20Sopenharmony_ci{ 13578c2ecf20Sopenharmony_ci hint &= PAGE_MASK; 13588c2ecf20Sopenharmony_ci if (((void *)hint != NULL) && 13598c2ecf20Sopenharmony_ci (hint < mmap_min_addr)) 13608c2ecf20Sopenharmony_ci return PAGE_ALIGN(mmap_min_addr); 13618c2ecf20Sopenharmony_ci return hint; 13628c2ecf20Sopenharmony_ci} 13638c2ecf20Sopenharmony_ci 13648c2ecf20Sopenharmony_cistatic inline int mlock_future_check(struct mm_struct *mm, 13658c2ecf20Sopenharmony_ci unsigned long flags, 13668c2ecf20Sopenharmony_ci unsigned long len) 13678c2ecf20Sopenharmony_ci{ 13688c2ecf20Sopenharmony_ci unsigned long locked, lock_limit; 13698c2ecf20Sopenharmony_ci 13708c2ecf20Sopenharmony_ci /* mlock MCL_FUTURE? */ 13718c2ecf20Sopenharmony_ci if (flags & VM_LOCKED) { 13728c2ecf20Sopenharmony_ci locked = len >> PAGE_SHIFT; 13738c2ecf20Sopenharmony_ci locked += mm->locked_vm; 13748c2ecf20Sopenharmony_ci lock_limit = rlimit(RLIMIT_MEMLOCK); 13758c2ecf20Sopenharmony_ci lock_limit >>= PAGE_SHIFT; 13768c2ecf20Sopenharmony_ci if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 13778c2ecf20Sopenharmony_ci return -EAGAIN; 13788c2ecf20Sopenharmony_ci } 13798c2ecf20Sopenharmony_ci return 0; 13808c2ecf20Sopenharmony_ci} 13818c2ecf20Sopenharmony_ci 13828c2ecf20Sopenharmony_cistatic inline u64 file_mmap_size_max(struct file *file, struct inode *inode) 13838c2ecf20Sopenharmony_ci{ 13848c2ecf20Sopenharmony_ci if (S_ISREG(inode->i_mode)) 13858c2ecf20Sopenharmony_ci return MAX_LFS_FILESIZE; 13868c2ecf20Sopenharmony_ci 13878c2ecf20Sopenharmony_ci if (S_ISBLK(inode->i_mode)) 13888c2ecf20Sopenharmony_ci return MAX_LFS_FILESIZE; 13898c2ecf20Sopenharmony_ci 13908c2ecf20Sopenharmony_ci if (S_ISSOCK(inode->i_mode)) 13918c2ecf20Sopenharmony_ci return MAX_LFS_FILESIZE; 13928c2ecf20Sopenharmony_ci 13938c2ecf20Sopenharmony_ci /* Special "we do even unsigned file positions" case */ 13948c2ecf20Sopenharmony_ci if (file->f_mode & FMODE_UNSIGNED_OFFSET) 13958c2ecf20Sopenharmony_ci return 0; 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci /* Yes, random drivers might want more. But I'm tired of buggy drivers */ 13988c2ecf20Sopenharmony_ci return ULONG_MAX; 13998c2ecf20Sopenharmony_ci} 14008c2ecf20Sopenharmony_ci 14018c2ecf20Sopenharmony_cistatic inline bool file_mmap_ok(struct file *file, struct inode *inode, 14028c2ecf20Sopenharmony_ci unsigned long pgoff, unsigned long len) 14038c2ecf20Sopenharmony_ci{ 14048c2ecf20Sopenharmony_ci u64 maxsize = file_mmap_size_max(file, inode); 14058c2ecf20Sopenharmony_ci 14068c2ecf20Sopenharmony_ci if (maxsize && len > maxsize) 14078c2ecf20Sopenharmony_ci return false; 14088c2ecf20Sopenharmony_ci maxsize -= len; 14098c2ecf20Sopenharmony_ci if (pgoff > maxsize >> PAGE_SHIFT) 14108c2ecf20Sopenharmony_ci return false; 14118c2ecf20Sopenharmony_ci return true; 14128c2ecf20Sopenharmony_ci} 14138c2ecf20Sopenharmony_ci 14148c2ecf20Sopenharmony_ci/* 14158c2ecf20Sopenharmony_ci * The caller must write-lock current->mm->mmap_lock. 14168c2ecf20Sopenharmony_ci */ 14178c2ecf20Sopenharmony_ciunsigned long do_mmap(struct file *file, unsigned long addr, 14188c2ecf20Sopenharmony_ci unsigned long len, unsigned long prot, 14198c2ecf20Sopenharmony_ci unsigned long flags, unsigned long pgoff, 14208c2ecf20Sopenharmony_ci unsigned long *populate, struct list_head *uf) 14218c2ecf20Sopenharmony_ci{ 14228c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 14238c2ecf20Sopenharmony_ci vm_flags_t vm_flags; 14248c2ecf20Sopenharmony_ci int pkey = 0; 14258c2ecf20Sopenharmony_ci int err = 0; 14268c2ecf20Sopenharmony_ci 14278c2ecf20Sopenharmony_ci *populate = 0; 14288c2ecf20Sopenharmony_ci 14298c2ecf20Sopenharmony_ci if (!len) 14308c2ecf20Sopenharmony_ci return -EINVAL; 14318c2ecf20Sopenharmony_ci 14328c2ecf20Sopenharmony_ci /* 14338c2ecf20Sopenharmony_ci * Does the application expect PROT_READ to imply PROT_EXEC? 14348c2ecf20Sopenharmony_ci * 14358c2ecf20Sopenharmony_ci * (the exception is when the underlying filesystem is noexec 14368c2ecf20Sopenharmony_ci * mounted, in which case we dont add PROT_EXEC.) 14378c2ecf20Sopenharmony_ci */ 14388c2ecf20Sopenharmony_ci if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) 14398c2ecf20Sopenharmony_ci if (!(file && path_noexec(&file->f_path))) 14408c2ecf20Sopenharmony_ci prot |= PROT_EXEC; 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci /* force arch specific MAP_FIXED handling in get_unmapped_area */ 14438c2ecf20Sopenharmony_ci if (flags & MAP_FIXED_NOREPLACE) 14448c2ecf20Sopenharmony_ci flags |= MAP_FIXED; 14458c2ecf20Sopenharmony_ci 14468c2ecf20Sopenharmony_ci if (!(flags & MAP_FIXED)) 14478c2ecf20Sopenharmony_ci addr = round_hint_to_min(addr); 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_ci /* Careful about overflows.. */ 14508c2ecf20Sopenharmony_ci len = PAGE_ALIGN(len); 14518c2ecf20Sopenharmony_ci if (!len) 14528c2ecf20Sopenharmony_ci return -ENOMEM; 14538c2ecf20Sopenharmony_ci 14548c2ecf20Sopenharmony_ci /* offset overflow? */ 14558c2ecf20Sopenharmony_ci if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) 14568c2ecf20Sopenharmony_ci return -EOVERFLOW; 14578c2ecf20Sopenharmony_ci 14588c2ecf20Sopenharmony_ci /* Too many mappings? */ 14598c2ecf20Sopenharmony_ci if (mm->map_count > sysctl_max_map_count) 14608c2ecf20Sopenharmony_ci return -ENOMEM; 14618c2ecf20Sopenharmony_ci 14628c2ecf20Sopenharmony_ci /* Obtain the address to map to. we verify (or select) it and ensure 14638c2ecf20Sopenharmony_ci * that it represents a valid section of the address space. 14648c2ecf20Sopenharmony_ci */ 14658c2ecf20Sopenharmony_ci addr = get_unmapped_area(file, addr, len, pgoff, flags); 14668c2ecf20Sopenharmony_ci if (IS_ERR_VALUE(addr)) 14678c2ecf20Sopenharmony_ci return addr; 14688c2ecf20Sopenharmony_ci 14698c2ecf20Sopenharmony_ci if (flags & MAP_FIXED_NOREPLACE) { 14708c2ecf20Sopenharmony_ci struct vm_area_struct *vma = find_vma(mm, addr); 14718c2ecf20Sopenharmony_ci 14728c2ecf20Sopenharmony_ci if (vma && vma->vm_start < addr + len) 14738c2ecf20Sopenharmony_ci return -EEXIST; 14748c2ecf20Sopenharmony_ci } 14758c2ecf20Sopenharmony_ci 14768c2ecf20Sopenharmony_ci if (prot == PROT_EXEC) { 14778c2ecf20Sopenharmony_ci pkey = execute_only_pkey(mm); 14788c2ecf20Sopenharmony_ci if (pkey < 0) 14798c2ecf20Sopenharmony_ci pkey = 0; 14808c2ecf20Sopenharmony_ci } 14818c2ecf20Sopenharmony_ci 14828c2ecf20Sopenharmony_ci /* Do simple checking here so the lower-level routines won't have 14838c2ecf20Sopenharmony_ci * to. we assume access permissions have been handled by the open 14848c2ecf20Sopenharmony_ci * of the memory object, so we don't do any here. 14858c2ecf20Sopenharmony_ci */ 14868c2ecf20Sopenharmony_ci vm_flags = calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | 14878c2ecf20Sopenharmony_ci mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 14888c2ecf20Sopenharmony_ci 14898c2ecf20Sopenharmony_ci trace_vendor_do_mmap(&vm_flags, &err); 14908c2ecf20Sopenharmony_ci if (err) 14918c2ecf20Sopenharmony_ci return err; 14928c2ecf20Sopenharmony_ci 14938c2ecf20Sopenharmony_ci if (flags & MAP_LOCKED) 14948c2ecf20Sopenharmony_ci if (!can_do_mlock()) 14958c2ecf20Sopenharmony_ci return -EPERM; 14968c2ecf20Sopenharmony_ci 14978c2ecf20Sopenharmony_ci if (mlock_future_check(mm, vm_flags, len)) 14988c2ecf20Sopenharmony_ci return -EAGAIN; 14998c2ecf20Sopenharmony_ci 15008c2ecf20Sopenharmony_ci if (file) { 15018c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 15028c2ecf20Sopenharmony_ci unsigned long flags_mask; 15038c2ecf20Sopenharmony_ci 15048c2ecf20Sopenharmony_ci if (!file_mmap_ok(file, inode, pgoff, len)) 15058c2ecf20Sopenharmony_ci return -EOVERFLOW; 15068c2ecf20Sopenharmony_ci 15078c2ecf20Sopenharmony_ci flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags; 15088c2ecf20Sopenharmony_ci 15098c2ecf20Sopenharmony_ci switch (flags & MAP_TYPE) { 15108c2ecf20Sopenharmony_ci case MAP_SHARED: 15118c2ecf20Sopenharmony_ci /* 15128c2ecf20Sopenharmony_ci * Force use of MAP_SHARED_VALIDATE with non-legacy 15138c2ecf20Sopenharmony_ci * flags. E.g. MAP_SYNC is dangerous to use with 15148c2ecf20Sopenharmony_ci * MAP_SHARED as you don't know which consistency model 15158c2ecf20Sopenharmony_ci * you will get. We silently ignore unsupported flags 15168c2ecf20Sopenharmony_ci * with MAP_SHARED to preserve backward compatibility. 15178c2ecf20Sopenharmony_ci */ 15188c2ecf20Sopenharmony_ci flags &= LEGACY_MAP_MASK; 15198c2ecf20Sopenharmony_ci fallthrough; 15208c2ecf20Sopenharmony_ci case MAP_SHARED_VALIDATE: 15218c2ecf20Sopenharmony_ci if (flags & ~flags_mask) 15228c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 15238c2ecf20Sopenharmony_ci if (prot & PROT_WRITE) { 15248c2ecf20Sopenharmony_ci if (!(file->f_mode & FMODE_WRITE)) 15258c2ecf20Sopenharmony_ci return -EACCES; 15268c2ecf20Sopenharmony_ci if (IS_SWAPFILE(file->f_mapping->host)) 15278c2ecf20Sopenharmony_ci return -ETXTBSY; 15288c2ecf20Sopenharmony_ci } 15298c2ecf20Sopenharmony_ci 15308c2ecf20Sopenharmony_ci /* 15318c2ecf20Sopenharmony_ci * Make sure we don't allow writing to an append-only 15328c2ecf20Sopenharmony_ci * file.. 15338c2ecf20Sopenharmony_ci */ 15348c2ecf20Sopenharmony_ci if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE)) 15358c2ecf20Sopenharmony_ci return -EACCES; 15368c2ecf20Sopenharmony_ci 15378c2ecf20Sopenharmony_ci /* 15388c2ecf20Sopenharmony_ci * Make sure there are no mandatory locks on the file. 15398c2ecf20Sopenharmony_ci */ 15408c2ecf20Sopenharmony_ci if (locks_verify_locked(file)) 15418c2ecf20Sopenharmony_ci return -EAGAIN; 15428c2ecf20Sopenharmony_ci 15438c2ecf20Sopenharmony_ci vm_flags |= VM_SHARED | VM_MAYSHARE; 15448c2ecf20Sopenharmony_ci if (!(file->f_mode & FMODE_WRITE)) 15458c2ecf20Sopenharmony_ci vm_flags &= ~(VM_MAYWRITE | VM_SHARED); 15468c2ecf20Sopenharmony_ci fallthrough; 15478c2ecf20Sopenharmony_ci case MAP_PRIVATE: 15488c2ecf20Sopenharmony_ci if (!(file->f_mode & FMODE_READ)) 15498c2ecf20Sopenharmony_ci return -EACCES; 15508c2ecf20Sopenharmony_ci if (path_noexec(&file->f_path)) { 15518c2ecf20Sopenharmony_ci if (vm_flags & VM_EXEC) 15528c2ecf20Sopenharmony_ci return -EPERM; 15538c2ecf20Sopenharmony_ci vm_flags &= ~VM_MAYEXEC; 15548c2ecf20Sopenharmony_ci } 15558c2ecf20Sopenharmony_ci 15568c2ecf20Sopenharmony_ci if (!file->f_op->mmap) 15578c2ecf20Sopenharmony_ci return -ENODEV; 15588c2ecf20Sopenharmony_ci if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) 15598c2ecf20Sopenharmony_ci return -EINVAL; 15608c2ecf20Sopenharmony_ci break; 15618c2ecf20Sopenharmony_ci 15628c2ecf20Sopenharmony_ci default: 15638c2ecf20Sopenharmony_ci return -EINVAL; 15648c2ecf20Sopenharmony_ci } 15658c2ecf20Sopenharmony_ci } else { 15668c2ecf20Sopenharmony_ci switch (flags & MAP_TYPE) { 15678c2ecf20Sopenharmony_ci case MAP_SHARED: 15688c2ecf20Sopenharmony_ci if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) 15698c2ecf20Sopenharmony_ci return -EINVAL; 15708c2ecf20Sopenharmony_ci /* 15718c2ecf20Sopenharmony_ci * Ignore pgoff. 15728c2ecf20Sopenharmony_ci */ 15738c2ecf20Sopenharmony_ci pgoff = 0; 15748c2ecf20Sopenharmony_ci vm_flags |= VM_SHARED | VM_MAYSHARE; 15758c2ecf20Sopenharmony_ci break; 15768c2ecf20Sopenharmony_ci case MAP_PRIVATE: 15778c2ecf20Sopenharmony_ci /* 15788c2ecf20Sopenharmony_ci * Set pgoff according to addr for anon_vma. 15798c2ecf20Sopenharmony_ci */ 15808c2ecf20Sopenharmony_ci pgoff = addr >> PAGE_SHIFT; 15818c2ecf20Sopenharmony_ci break; 15828c2ecf20Sopenharmony_ci#ifdef CONFIG_MEM_PURGEABLE 15838c2ecf20Sopenharmony_ci case MAP_PURGEABLE: 15848c2ecf20Sopenharmony_ci vm_flags |= VM_PURGEABLE; 15858c2ecf20Sopenharmony_ci pr_info("vm_flags purgeable = %lx.\n", VM_PURGEABLE); 15868c2ecf20Sopenharmony_ci break; 15878c2ecf20Sopenharmony_ci case MAP_USEREXPTE: 15888c2ecf20Sopenharmony_ci vm_flags |= VM_USEREXPTE; 15898c2ecf20Sopenharmony_ci pr_info("vm_flags useredpte = %lx.\n", VM_USEREXPTE); 15908c2ecf20Sopenharmony_ci break; 15918c2ecf20Sopenharmony_ci#endif 15928c2ecf20Sopenharmony_ci default: 15938c2ecf20Sopenharmony_ci return -EINVAL; 15948c2ecf20Sopenharmony_ci } 15958c2ecf20Sopenharmony_ci } 15968c2ecf20Sopenharmony_ci 15978c2ecf20Sopenharmony_ci /* 15988c2ecf20Sopenharmony_ci * Set 'VM_NORESERVE' if we should not account for the 15998c2ecf20Sopenharmony_ci * memory use of this mapping. 16008c2ecf20Sopenharmony_ci */ 16018c2ecf20Sopenharmony_ci if (flags & MAP_NORESERVE) { 16028c2ecf20Sopenharmony_ci /* We honor MAP_NORESERVE if allowed to overcommit */ 16038c2ecf20Sopenharmony_ci if (sysctl_overcommit_memory != OVERCOMMIT_NEVER) 16048c2ecf20Sopenharmony_ci vm_flags |= VM_NORESERVE; 16058c2ecf20Sopenharmony_ci 16068c2ecf20Sopenharmony_ci /* hugetlb applies strict overcommit unless MAP_NORESERVE */ 16078c2ecf20Sopenharmony_ci if (file && is_file_hugepages(file)) 16088c2ecf20Sopenharmony_ci vm_flags |= VM_NORESERVE; 16098c2ecf20Sopenharmony_ci } 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ci addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); 16128c2ecf20Sopenharmony_ci if (!IS_ERR_VALUE(addr) && 16138c2ecf20Sopenharmony_ci ((vm_flags & VM_LOCKED) || 16148c2ecf20Sopenharmony_ci (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) 16158c2ecf20Sopenharmony_ci *populate = len; 16168c2ecf20Sopenharmony_ci return addr; 16178c2ecf20Sopenharmony_ci} 16188c2ecf20Sopenharmony_ci 16198c2ecf20Sopenharmony_ciunsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, 16208c2ecf20Sopenharmony_ci unsigned long prot, unsigned long flags, 16218c2ecf20Sopenharmony_ci unsigned long fd, unsigned long pgoff) 16228c2ecf20Sopenharmony_ci{ 16238c2ecf20Sopenharmony_ci struct file *file = NULL; 16248c2ecf20Sopenharmony_ci unsigned long retval; 16258c2ecf20Sopenharmony_ci 16268c2ecf20Sopenharmony_ci if (!(flags & MAP_ANONYMOUS)) { 16278c2ecf20Sopenharmony_ci audit_mmap_fd(fd, flags); 16288c2ecf20Sopenharmony_ci file = fget(fd); 16298c2ecf20Sopenharmony_ci if (!file) 16308c2ecf20Sopenharmony_ci return -EBADF; 16318c2ecf20Sopenharmony_ci if (is_file_hugepages(file)) { 16328c2ecf20Sopenharmony_ci len = ALIGN(len, huge_page_size(hstate_file(file))); 16338c2ecf20Sopenharmony_ci } else if (unlikely(flags & MAP_HUGETLB)) { 16348c2ecf20Sopenharmony_ci retval = -EINVAL; 16358c2ecf20Sopenharmony_ci goto out_fput; 16368c2ecf20Sopenharmony_ci } 16378c2ecf20Sopenharmony_ci } else if (flags & MAP_HUGETLB) { 16388c2ecf20Sopenharmony_ci struct user_struct *user = NULL; 16398c2ecf20Sopenharmony_ci struct hstate *hs; 16408c2ecf20Sopenharmony_ci 16418c2ecf20Sopenharmony_ci hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); 16428c2ecf20Sopenharmony_ci if (!hs) 16438c2ecf20Sopenharmony_ci return -EINVAL; 16448c2ecf20Sopenharmony_ci 16458c2ecf20Sopenharmony_ci len = ALIGN(len, huge_page_size(hs)); 16468c2ecf20Sopenharmony_ci /* 16478c2ecf20Sopenharmony_ci * VM_NORESERVE is used because the reservations will be 16488c2ecf20Sopenharmony_ci * taken when vm_ops->mmap() is called 16498c2ecf20Sopenharmony_ci * A dummy user value is used because we are not locking 16508c2ecf20Sopenharmony_ci * memory so no accounting is necessary 16518c2ecf20Sopenharmony_ci */ 16528c2ecf20Sopenharmony_ci file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, 16538c2ecf20Sopenharmony_ci VM_NORESERVE, 16548c2ecf20Sopenharmony_ci &user, HUGETLB_ANONHUGE_INODE, 16558c2ecf20Sopenharmony_ci (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); 16568c2ecf20Sopenharmony_ci if (IS_ERR(file)) 16578c2ecf20Sopenharmony_ci return PTR_ERR(file); 16588c2ecf20Sopenharmony_ci } 16598c2ecf20Sopenharmony_ci 16608c2ecf20Sopenharmony_ci flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); 16618c2ecf20Sopenharmony_ci 16628c2ecf20Sopenharmony_ci retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); 16638c2ecf20Sopenharmony_ci 16648c2ecf20Sopenharmony_ci if (!IS_ERR_VALUE(retval)) { 16658c2ecf20Sopenharmony_ci CALL_HCK_LITE_HOOK(check_jit_memory_lhck, current, fd, prot, flags, PAGE_ALIGN(len), &retval); 16668c2ecf20Sopenharmony_ci if (IS_ERR_VALUE(retval)) 16678c2ecf20Sopenharmony_ci pr_info("JITINFO: jit request denied"); 16688c2ecf20Sopenharmony_ci } 16698c2ecf20Sopenharmony_ciout_fput: 16708c2ecf20Sopenharmony_ci if (file) 16718c2ecf20Sopenharmony_ci fput(file); 16728c2ecf20Sopenharmony_ci return retval; 16738c2ecf20Sopenharmony_ci} 16748c2ecf20Sopenharmony_ci 16758c2ecf20Sopenharmony_ciSYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, 16768c2ecf20Sopenharmony_ci unsigned long, prot, unsigned long, flags, 16778c2ecf20Sopenharmony_ci unsigned long, fd, unsigned long, pgoff) 16788c2ecf20Sopenharmony_ci{ 16798c2ecf20Sopenharmony_ci return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); 16808c2ecf20Sopenharmony_ci} 16818c2ecf20Sopenharmony_ci 16828c2ecf20Sopenharmony_ci#ifdef __ARCH_WANT_SYS_OLD_MMAP 16838c2ecf20Sopenharmony_cistruct mmap_arg_struct { 16848c2ecf20Sopenharmony_ci unsigned long addr; 16858c2ecf20Sopenharmony_ci unsigned long len; 16868c2ecf20Sopenharmony_ci unsigned long prot; 16878c2ecf20Sopenharmony_ci unsigned long flags; 16888c2ecf20Sopenharmony_ci unsigned long fd; 16898c2ecf20Sopenharmony_ci unsigned long offset; 16908c2ecf20Sopenharmony_ci}; 16918c2ecf20Sopenharmony_ci 16928c2ecf20Sopenharmony_ciSYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) 16938c2ecf20Sopenharmony_ci{ 16948c2ecf20Sopenharmony_ci struct mmap_arg_struct a; 16958c2ecf20Sopenharmony_ci 16968c2ecf20Sopenharmony_ci if (copy_from_user(&a, arg, sizeof(a))) 16978c2ecf20Sopenharmony_ci return -EFAULT; 16988c2ecf20Sopenharmony_ci if (offset_in_page(a.offset)) 16998c2ecf20Sopenharmony_ci return -EINVAL; 17008c2ecf20Sopenharmony_ci 17018c2ecf20Sopenharmony_ci return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, 17028c2ecf20Sopenharmony_ci a.offset >> PAGE_SHIFT); 17038c2ecf20Sopenharmony_ci} 17048c2ecf20Sopenharmony_ci#endif /* __ARCH_WANT_SYS_OLD_MMAP */ 17058c2ecf20Sopenharmony_ci 17068c2ecf20Sopenharmony_ci/* 17078c2ecf20Sopenharmony_ci * Some shared mappings will want the pages marked read-only 17088c2ecf20Sopenharmony_ci * to track write events. If so, we'll downgrade vm_page_prot 17098c2ecf20Sopenharmony_ci * to the private version (using protection_map[] without the 17108c2ecf20Sopenharmony_ci * VM_SHARED bit). 17118c2ecf20Sopenharmony_ci */ 17128c2ecf20Sopenharmony_ciint vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) 17138c2ecf20Sopenharmony_ci{ 17148c2ecf20Sopenharmony_ci vm_flags_t vm_flags = vma->vm_flags; 17158c2ecf20Sopenharmony_ci const struct vm_operations_struct *vm_ops = vma->vm_ops; 17168c2ecf20Sopenharmony_ci 17178c2ecf20Sopenharmony_ci /* If it was private or non-writable, the write bit is already clear */ 17188c2ecf20Sopenharmony_ci if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) 17198c2ecf20Sopenharmony_ci return 0; 17208c2ecf20Sopenharmony_ci 17218c2ecf20Sopenharmony_ci /* The backer wishes to know when pages are first written to? */ 17228c2ecf20Sopenharmony_ci if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite)) 17238c2ecf20Sopenharmony_ci return 1; 17248c2ecf20Sopenharmony_ci 17258c2ecf20Sopenharmony_ci /* The open routine did something to the protections that pgprot_modify 17268c2ecf20Sopenharmony_ci * won't preserve? */ 17278c2ecf20Sopenharmony_ci if (pgprot_val(vm_page_prot) != 17288c2ecf20Sopenharmony_ci pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags))) 17298c2ecf20Sopenharmony_ci return 0; 17308c2ecf20Sopenharmony_ci 17318c2ecf20Sopenharmony_ci /* 17328c2ecf20Sopenharmony_ci * Do we need to track softdirty? hugetlb does not support softdirty 17338c2ecf20Sopenharmony_ci * tracking yet. 17348c2ecf20Sopenharmony_ci */ 17358c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY) && 17368c2ecf20Sopenharmony_ci !is_vm_hugetlb_page(vma)) 17378c2ecf20Sopenharmony_ci return 1; 17388c2ecf20Sopenharmony_ci 17398c2ecf20Sopenharmony_ci /* Specialty mapping? */ 17408c2ecf20Sopenharmony_ci if (vm_flags & VM_PFNMAP) 17418c2ecf20Sopenharmony_ci return 0; 17428c2ecf20Sopenharmony_ci 17438c2ecf20Sopenharmony_ci /* Can the mapping track the dirty pages? */ 17448c2ecf20Sopenharmony_ci return vma->vm_file && vma->vm_file->f_mapping && 17458c2ecf20Sopenharmony_ci mapping_can_writeback(vma->vm_file->f_mapping); 17468c2ecf20Sopenharmony_ci} 17478c2ecf20Sopenharmony_ci 17488c2ecf20Sopenharmony_ci/* 17498c2ecf20Sopenharmony_ci * We account for memory if it's a private writeable mapping, 17508c2ecf20Sopenharmony_ci * not hugepages and VM_NORESERVE wasn't set. 17518c2ecf20Sopenharmony_ci */ 17528c2ecf20Sopenharmony_cistatic inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) 17538c2ecf20Sopenharmony_ci{ 17548c2ecf20Sopenharmony_ci /* 17558c2ecf20Sopenharmony_ci * hugetlb has its own accounting separate from the core VM 17568c2ecf20Sopenharmony_ci * VM_HUGETLB may not be set yet so we cannot check for that flag. 17578c2ecf20Sopenharmony_ci */ 17588c2ecf20Sopenharmony_ci if (file && is_file_hugepages(file)) 17598c2ecf20Sopenharmony_ci return 0; 17608c2ecf20Sopenharmony_ci 17618c2ecf20Sopenharmony_ci return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; 17628c2ecf20Sopenharmony_ci} 17638c2ecf20Sopenharmony_ci 17648c2ecf20Sopenharmony_ciunsigned long mmap_region(struct file *file, unsigned long addr, 17658c2ecf20Sopenharmony_ci unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, 17668c2ecf20Sopenharmony_ci struct list_head *uf) 17678c2ecf20Sopenharmony_ci{ 17688c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 17698c2ecf20Sopenharmony_ci struct vm_area_struct *vma, *prev, *merge; 17708c2ecf20Sopenharmony_ci int error; 17718c2ecf20Sopenharmony_ci struct rb_node **rb_link, *rb_parent; 17728c2ecf20Sopenharmony_ci unsigned long charged = 0; 17738c2ecf20Sopenharmony_ci 17748c2ecf20Sopenharmony_ci /* Check against address space limit. */ 17758c2ecf20Sopenharmony_ci if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) { 17768c2ecf20Sopenharmony_ci unsigned long nr_pages; 17778c2ecf20Sopenharmony_ci 17788c2ecf20Sopenharmony_ci /* 17798c2ecf20Sopenharmony_ci * MAP_FIXED may remove pages of mappings that intersects with 17808c2ecf20Sopenharmony_ci * requested mapping. Account for the pages it would unmap. 17818c2ecf20Sopenharmony_ci */ 17828c2ecf20Sopenharmony_ci nr_pages = count_vma_pages_range(mm, addr, addr + len); 17838c2ecf20Sopenharmony_ci 17848c2ecf20Sopenharmony_ci if (!may_expand_vm(mm, vm_flags, 17858c2ecf20Sopenharmony_ci (len >> PAGE_SHIFT) - nr_pages)) 17868c2ecf20Sopenharmony_ci return -ENOMEM; 17878c2ecf20Sopenharmony_ci } 17888c2ecf20Sopenharmony_ci 17898c2ecf20Sopenharmony_ci /* Clear old maps, set up prev, rb_link, rb_parent, and uf */ 17908c2ecf20Sopenharmony_ci if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf)) 17918c2ecf20Sopenharmony_ci return -ENOMEM; 17928c2ecf20Sopenharmony_ci /* 17938c2ecf20Sopenharmony_ci * Private writable mapping: check memory availability 17948c2ecf20Sopenharmony_ci */ 17958c2ecf20Sopenharmony_ci if (accountable_mapping(file, vm_flags)) { 17968c2ecf20Sopenharmony_ci charged = len >> PAGE_SHIFT; 17978c2ecf20Sopenharmony_ci if (security_vm_enough_memory_mm(mm, charged)) 17988c2ecf20Sopenharmony_ci return -ENOMEM; 17998c2ecf20Sopenharmony_ci vm_flags |= VM_ACCOUNT; 18008c2ecf20Sopenharmony_ci } 18018c2ecf20Sopenharmony_ci 18028c2ecf20Sopenharmony_ci /* 18038c2ecf20Sopenharmony_ci * Can we just expand an old mapping? 18048c2ecf20Sopenharmony_ci */ 18058c2ecf20Sopenharmony_ci vma = vma_merge(mm, prev, addr, addr + len, vm_flags, 18068c2ecf20Sopenharmony_ci NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL); 18078c2ecf20Sopenharmony_ci if (vma) 18088c2ecf20Sopenharmony_ci goto out; 18098c2ecf20Sopenharmony_ci 18108c2ecf20Sopenharmony_ci /* 18118c2ecf20Sopenharmony_ci * Determine the object being mapped and call the appropriate 18128c2ecf20Sopenharmony_ci * specific mapper. the address has already been validated, but 18138c2ecf20Sopenharmony_ci * not unmapped, but the maps are removed from the list. 18148c2ecf20Sopenharmony_ci */ 18158c2ecf20Sopenharmony_ci vma = vm_area_alloc(mm); 18168c2ecf20Sopenharmony_ci if (!vma) { 18178c2ecf20Sopenharmony_ci error = -ENOMEM; 18188c2ecf20Sopenharmony_ci goto unacct_error; 18198c2ecf20Sopenharmony_ci } 18208c2ecf20Sopenharmony_ci 18218c2ecf20Sopenharmony_ci vma->vm_start = addr; 18228c2ecf20Sopenharmony_ci vma->vm_end = addr + len; 18238c2ecf20Sopenharmony_ci vma->vm_flags = vm_flags; 18248c2ecf20Sopenharmony_ci vma->vm_page_prot = vm_get_page_prot(vm_flags); 18258c2ecf20Sopenharmony_ci vma->vm_pgoff = pgoff; 18268c2ecf20Sopenharmony_ci 18278c2ecf20Sopenharmony_ci if (file) { 18288c2ecf20Sopenharmony_ci if (vm_flags & VM_DENYWRITE) { 18298c2ecf20Sopenharmony_ci error = deny_write_access(file); 18308c2ecf20Sopenharmony_ci if (error) 18318c2ecf20Sopenharmony_ci goto free_vma; 18328c2ecf20Sopenharmony_ci } 18338c2ecf20Sopenharmony_ci if (vm_flags & VM_SHARED) { 18348c2ecf20Sopenharmony_ci error = mapping_map_writable(file->f_mapping); 18358c2ecf20Sopenharmony_ci if (error) 18368c2ecf20Sopenharmony_ci goto allow_write_and_free_vma; 18378c2ecf20Sopenharmony_ci } 18388c2ecf20Sopenharmony_ci 18398c2ecf20Sopenharmony_ci /* ->mmap() can change vma->vm_file, but must guarantee that 18408c2ecf20Sopenharmony_ci * vma_link() below can deny write-access if VM_DENYWRITE is set 18418c2ecf20Sopenharmony_ci * and map writably if VM_SHARED is set. This usually means the 18428c2ecf20Sopenharmony_ci * new file must not have been exposed to user-space, yet. 18438c2ecf20Sopenharmony_ci */ 18448c2ecf20Sopenharmony_ci vma->vm_file = get_file(file); 18458c2ecf20Sopenharmony_ci error = call_mmap(file, vma); 18468c2ecf20Sopenharmony_ci if (error) 18478c2ecf20Sopenharmony_ci goto unmap_and_free_vma; 18488c2ecf20Sopenharmony_ci 18498c2ecf20Sopenharmony_ci /* Can addr have changed?? 18508c2ecf20Sopenharmony_ci * 18518c2ecf20Sopenharmony_ci * Answer: Yes, several device drivers can do it in their 18528c2ecf20Sopenharmony_ci * f_op->mmap method. -DaveM 18538c2ecf20Sopenharmony_ci * Bug: If addr is changed, prev, rb_link, rb_parent should 18548c2ecf20Sopenharmony_ci * be updated for vma_link() 18558c2ecf20Sopenharmony_ci */ 18568c2ecf20Sopenharmony_ci WARN_ON_ONCE(addr != vma->vm_start); 18578c2ecf20Sopenharmony_ci 18588c2ecf20Sopenharmony_ci addr = vma->vm_start; 18598c2ecf20Sopenharmony_ci 18608c2ecf20Sopenharmony_ci /* If vm_flags changed after call_mmap(), we should try merge vma again 18618c2ecf20Sopenharmony_ci * as we may succeed this time. 18628c2ecf20Sopenharmony_ci */ 18638c2ecf20Sopenharmony_ci if (unlikely(vm_flags != vma->vm_flags && prev)) { 18648c2ecf20Sopenharmony_ci merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags, 18658c2ecf20Sopenharmony_ci NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL); 18668c2ecf20Sopenharmony_ci if (merge) { 18678c2ecf20Sopenharmony_ci /* ->mmap() can change vma->vm_file and fput the original file. So 18688c2ecf20Sopenharmony_ci * fput the vma->vm_file here or we would add an extra fput for file 18698c2ecf20Sopenharmony_ci * and cause general protection fault ultimately. 18708c2ecf20Sopenharmony_ci */ 18718c2ecf20Sopenharmony_ci fput(vma->vm_file); 18728c2ecf20Sopenharmony_ci vm_area_free(vma); 18738c2ecf20Sopenharmony_ci vma = merge; 18748c2ecf20Sopenharmony_ci /* Update vm_flags to pick up the change. */ 18758c2ecf20Sopenharmony_ci vm_flags = vma->vm_flags; 18768c2ecf20Sopenharmony_ci goto unmap_writable; 18778c2ecf20Sopenharmony_ci } 18788c2ecf20Sopenharmony_ci } 18798c2ecf20Sopenharmony_ci 18808c2ecf20Sopenharmony_ci vm_flags = vma->vm_flags; 18818c2ecf20Sopenharmony_ci } else if (vm_flags & VM_SHARED) { 18828c2ecf20Sopenharmony_ci error = shmem_zero_setup(vma); 18838c2ecf20Sopenharmony_ci if (error) 18848c2ecf20Sopenharmony_ci goto free_vma; 18858c2ecf20Sopenharmony_ci } else { 18868c2ecf20Sopenharmony_ci vma_set_anonymous(vma); 18878c2ecf20Sopenharmony_ci } 18888c2ecf20Sopenharmony_ci 18898c2ecf20Sopenharmony_ci /* Allow architectures to sanity-check the vma */ 18908c2ecf20Sopenharmony_ci if (security_mmap_region(vma) || 18918c2ecf20Sopenharmony_ci !arch_validate_flags(vma->vm_flags)) { 18928c2ecf20Sopenharmony_ci error = -EINVAL; 18938c2ecf20Sopenharmony_ci if (file) 18948c2ecf20Sopenharmony_ci goto close_and_free_vma; 18958c2ecf20Sopenharmony_ci else 18968c2ecf20Sopenharmony_ci goto free_vma; 18978c2ecf20Sopenharmony_ci } 18988c2ecf20Sopenharmony_ci 18998c2ecf20Sopenharmony_ci vma_link(mm, vma, prev, rb_link, rb_parent); 19008c2ecf20Sopenharmony_ci /* Once vma denies write, undo our temporary denial count */ 19018c2ecf20Sopenharmony_ci if (file) { 19028c2ecf20Sopenharmony_ciunmap_writable: 19038c2ecf20Sopenharmony_ci if (vm_flags & VM_SHARED) 19048c2ecf20Sopenharmony_ci mapping_unmap_writable(file->f_mapping); 19058c2ecf20Sopenharmony_ci if (vm_flags & VM_DENYWRITE) 19068c2ecf20Sopenharmony_ci allow_write_access(file); 19078c2ecf20Sopenharmony_ci } 19088c2ecf20Sopenharmony_ci file = vma->vm_file; 19098c2ecf20Sopenharmony_ciout: 19108c2ecf20Sopenharmony_ci perf_event_mmap(vma); 19118c2ecf20Sopenharmony_ci 19128c2ecf20Sopenharmony_ci vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT); 19138c2ecf20Sopenharmony_ci if (vm_flags & VM_LOCKED) { 19148c2ecf20Sopenharmony_ci if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || 19158c2ecf20Sopenharmony_ci is_vm_hugetlb_page(vma) || 19168c2ecf20Sopenharmony_ci vma == get_gate_vma(current->mm)) 19178c2ecf20Sopenharmony_ci vma->vm_flags &= VM_LOCKED_CLEAR_MASK; 19188c2ecf20Sopenharmony_ci else 19198c2ecf20Sopenharmony_ci mm->locked_vm += (len >> PAGE_SHIFT); 19208c2ecf20Sopenharmony_ci } 19218c2ecf20Sopenharmony_ci 19228c2ecf20Sopenharmony_ci if (file) 19238c2ecf20Sopenharmony_ci uprobe_mmap(vma); 19248c2ecf20Sopenharmony_ci 19258c2ecf20Sopenharmony_ci /* 19268c2ecf20Sopenharmony_ci * New (or expanded) vma always get soft dirty status. 19278c2ecf20Sopenharmony_ci * Otherwise user-space soft-dirty page tracker won't 19288c2ecf20Sopenharmony_ci * be able to distinguish situation when vma area unmapped, 19298c2ecf20Sopenharmony_ci * then new mapped in-place (which must be aimed as 19308c2ecf20Sopenharmony_ci * a completely new data area). 19318c2ecf20Sopenharmony_ci */ 19328c2ecf20Sopenharmony_ci vma->vm_flags |= VM_SOFTDIRTY; 19338c2ecf20Sopenharmony_ci 19348c2ecf20Sopenharmony_ci vma_set_page_prot(vma); 19358c2ecf20Sopenharmony_ci 19368c2ecf20Sopenharmony_ci return addr; 19378c2ecf20Sopenharmony_ci 19388c2ecf20Sopenharmony_ciclose_and_free_vma: 19398c2ecf20Sopenharmony_ci if (vma->vm_ops && vma->vm_ops->close) 19408c2ecf20Sopenharmony_ci vma->vm_ops->close(vma); 19418c2ecf20Sopenharmony_ciunmap_and_free_vma: 19428c2ecf20Sopenharmony_ci vma->vm_file = NULL; 19438c2ecf20Sopenharmony_ci fput(file); 19448c2ecf20Sopenharmony_ci 19458c2ecf20Sopenharmony_ci /* Undo any partial mapping done by a device driver. */ 19468c2ecf20Sopenharmony_ci unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); 19478c2ecf20Sopenharmony_ci if (vm_flags & VM_SHARED) 19488c2ecf20Sopenharmony_ci mapping_unmap_writable(file->f_mapping); 19498c2ecf20Sopenharmony_ciallow_write_and_free_vma: 19508c2ecf20Sopenharmony_ci if (vm_flags & VM_DENYWRITE) 19518c2ecf20Sopenharmony_ci allow_write_access(file); 19528c2ecf20Sopenharmony_cifree_vma: 19538c2ecf20Sopenharmony_ci vm_area_free(vma); 19548c2ecf20Sopenharmony_ciunacct_error: 19558c2ecf20Sopenharmony_ci if (charged) 19568c2ecf20Sopenharmony_ci vm_unacct_memory(charged); 19578c2ecf20Sopenharmony_ci return error; 19588c2ecf20Sopenharmony_ci} 19598c2ecf20Sopenharmony_ci 19608c2ecf20Sopenharmony_cistatic unsigned long unmapped_area(struct vm_unmapped_area_info *info) 19618c2ecf20Sopenharmony_ci{ 19628c2ecf20Sopenharmony_ci /* 19638c2ecf20Sopenharmony_ci * We implement the search by looking for an rbtree node that 19648c2ecf20Sopenharmony_ci * immediately follows a suitable gap. That is, 19658c2ecf20Sopenharmony_ci * - gap_start = vma->vm_prev->vm_end <= info->high_limit - length; 19668c2ecf20Sopenharmony_ci * - gap_end = vma->vm_start >= info->low_limit + length; 19678c2ecf20Sopenharmony_ci * - gap_end - gap_start >= length 19688c2ecf20Sopenharmony_ci */ 19698c2ecf20Sopenharmony_ci 19708c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 19718c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 19728c2ecf20Sopenharmony_ci unsigned long length, low_limit, high_limit, gap_start, gap_end; 19738c2ecf20Sopenharmony_ci 19748c2ecf20Sopenharmony_ci /* Adjust search length to account for worst case alignment overhead */ 19758c2ecf20Sopenharmony_ci length = info->length + info->align_mask; 19768c2ecf20Sopenharmony_ci if (length < info->length) 19778c2ecf20Sopenharmony_ci return -ENOMEM; 19788c2ecf20Sopenharmony_ci 19798c2ecf20Sopenharmony_ci /* Adjust search limits by the desired length */ 19808c2ecf20Sopenharmony_ci if (info->high_limit < length) 19818c2ecf20Sopenharmony_ci return -ENOMEM; 19828c2ecf20Sopenharmony_ci high_limit = info->high_limit - length; 19838c2ecf20Sopenharmony_ci 19848c2ecf20Sopenharmony_ci if (info->low_limit > high_limit) 19858c2ecf20Sopenharmony_ci return -ENOMEM; 19868c2ecf20Sopenharmony_ci low_limit = info->low_limit + length; 19878c2ecf20Sopenharmony_ci 19888c2ecf20Sopenharmony_ci /* Check if rbtree root looks promising */ 19898c2ecf20Sopenharmony_ci if (RB_EMPTY_ROOT(&mm->mm_rb)) 19908c2ecf20Sopenharmony_ci goto check_highest; 19918c2ecf20Sopenharmony_ci vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); 19928c2ecf20Sopenharmony_ci if (vma->rb_subtree_gap < length) 19938c2ecf20Sopenharmony_ci goto check_highest; 19948c2ecf20Sopenharmony_ci 19958c2ecf20Sopenharmony_ci while (true) { 19968c2ecf20Sopenharmony_ci /* Visit left subtree if it looks promising */ 19978c2ecf20Sopenharmony_ci gap_end = vm_start_gap(vma); 19988c2ecf20Sopenharmony_ci if (gap_end >= low_limit && vma->vm_rb.rb_left) { 19998c2ecf20Sopenharmony_ci struct vm_area_struct *left = 20008c2ecf20Sopenharmony_ci rb_entry(vma->vm_rb.rb_left, 20018c2ecf20Sopenharmony_ci struct vm_area_struct, vm_rb); 20028c2ecf20Sopenharmony_ci if (left->rb_subtree_gap >= length) { 20038c2ecf20Sopenharmony_ci vma = left; 20048c2ecf20Sopenharmony_ci continue; 20058c2ecf20Sopenharmony_ci } 20068c2ecf20Sopenharmony_ci } 20078c2ecf20Sopenharmony_ci 20088c2ecf20Sopenharmony_ci gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; 20098c2ecf20Sopenharmony_cicheck_current: 20108c2ecf20Sopenharmony_ci /* Check if current node has a suitable gap */ 20118c2ecf20Sopenharmony_ci if (gap_start > high_limit) 20128c2ecf20Sopenharmony_ci return -ENOMEM; 20138c2ecf20Sopenharmony_ci if ((gap_end >= low_limit && 20148c2ecf20Sopenharmony_ci gap_end > gap_start && gap_end - gap_start >= length) && 20158c2ecf20Sopenharmony_ci (xpm_region_outer_hook(gap_start, gap_end, info->flags))) 20168c2ecf20Sopenharmony_ci goto found; 20178c2ecf20Sopenharmony_ci 20188c2ecf20Sopenharmony_ci /* Visit right subtree if it looks promising */ 20198c2ecf20Sopenharmony_ci if (vma->vm_rb.rb_right) { 20208c2ecf20Sopenharmony_ci struct vm_area_struct *right = 20218c2ecf20Sopenharmony_ci rb_entry(vma->vm_rb.rb_right, 20228c2ecf20Sopenharmony_ci struct vm_area_struct, vm_rb); 20238c2ecf20Sopenharmony_ci if (right->rb_subtree_gap >= length) { 20248c2ecf20Sopenharmony_ci vma = right; 20258c2ecf20Sopenharmony_ci continue; 20268c2ecf20Sopenharmony_ci } 20278c2ecf20Sopenharmony_ci } 20288c2ecf20Sopenharmony_ci 20298c2ecf20Sopenharmony_ci /* Go back up the rbtree to find next candidate node */ 20308c2ecf20Sopenharmony_ci while (true) { 20318c2ecf20Sopenharmony_ci struct rb_node *prev = &vma->vm_rb; 20328c2ecf20Sopenharmony_ci if (!rb_parent(prev)) 20338c2ecf20Sopenharmony_ci goto check_highest; 20348c2ecf20Sopenharmony_ci vma = rb_entry(rb_parent(prev), 20358c2ecf20Sopenharmony_ci struct vm_area_struct, vm_rb); 20368c2ecf20Sopenharmony_ci if (prev == vma->vm_rb.rb_left) { 20378c2ecf20Sopenharmony_ci gap_start = vm_end_gap(vma->vm_prev); 20388c2ecf20Sopenharmony_ci gap_end = vm_start_gap(vma); 20398c2ecf20Sopenharmony_ci goto check_current; 20408c2ecf20Sopenharmony_ci } 20418c2ecf20Sopenharmony_ci } 20428c2ecf20Sopenharmony_ci } 20438c2ecf20Sopenharmony_ci 20448c2ecf20Sopenharmony_cicheck_highest: 20458c2ecf20Sopenharmony_ci /* Check highest gap, which does not precede any rbtree node */ 20468c2ecf20Sopenharmony_ci gap_start = mm->highest_vm_end; 20478c2ecf20Sopenharmony_ci gap_end = ULONG_MAX; /* Only for VM_BUG_ON below */ 20488c2ecf20Sopenharmony_ci if (gap_start > high_limit) 20498c2ecf20Sopenharmony_ci return -ENOMEM; 20508c2ecf20Sopenharmony_ci 20518c2ecf20Sopenharmony_cifound: 20528c2ecf20Sopenharmony_ci /* We found a suitable gap. Clip it with the original low_limit. */ 20538c2ecf20Sopenharmony_ci if (gap_start < info->low_limit) 20548c2ecf20Sopenharmony_ci gap_start = info->low_limit; 20558c2ecf20Sopenharmony_ci 20568c2ecf20Sopenharmony_ci /* Adjust gap address to the desired alignment */ 20578c2ecf20Sopenharmony_ci gap_start += (info->align_offset - gap_start) & info->align_mask; 20588c2ecf20Sopenharmony_ci 20598c2ecf20Sopenharmony_ci VM_BUG_ON(gap_start + info->length > info->high_limit); 20608c2ecf20Sopenharmony_ci VM_BUG_ON(gap_start + info->length > gap_end); 20618c2ecf20Sopenharmony_ci return gap_start; 20628c2ecf20Sopenharmony_ci} 20638c2ecf20Sopenharmony_ci 20648c2ecf20Sopenharmony_cistatic unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) 20658c2ecf20Sopenharmony_ci{ 20668c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 20678c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 20688c2ecf20Sopenharmony_ci unsigned long length, low_limit, high_limit, gap_start, gap_end; 20698c2ecf20Sopenharmony_ci 20708c2ecf20Sopenharmony_ci /* Adjust search length to account for worst case alignment overhead */ 20718c2ecf20Sopenharmony_ci length = info->length + info->align_mask; 20728c2ecf20Sopenharmony_ci if (length < info->length) 20738c2ecf20Sopenharmony_ci return -ENOMEM; 20748c2ecf20Sopenharmony_ci 20758c2ecf20Sopenharmony_ci /* 20768c2ecf20Sopenharmony_ci * Adjust search limits by the desired length. 20778c2ecf20Sopenharmony_ci * See implementation comment at top of unmapped_area(). 20788c2ecf20Sopenharmony_ci */ 20798c2ecf20Sopenharmony_ci gap_end = info->high_limit; 20808c2ecf20Sopenharmony_ci if (gap_end < length) 20818c2ecf20Sopenharmony_ci return -ENOMEM; 20828c2ecf20Sopenharmony_ci high_limit = gap_end - length; 20838c2ecf20Sopenharmony_ci 20848c2ecf20Sopenharmony_ci if (info->low_limit > high_limit) 20858c2ecf20Sopenharmony_ci return -ENOMEM; 20868c2ecf20Sopenharmony_ci low_limit = info->low_limit + length; 20878c2ecf20Sopenharmony_ci 20888c2ecf20Sopenharmony_ci /* Check highest gap, which does not precede any rbtree node */ 20898c2ecf20Sopenharmony_ci gap_start = mm->highest_vm_end; 20908c2ecf20Sopenharmony_ci if (gap_start <= high_limit) 20918c2ecf20Sopenharmony_ci goto found_highest; 20928c2ecf20Sopenharmony_ci 20938c2ecf20Sopenharmony_ci /* Check if rbtree root looks promising */ 20948c2ecf20Sopenharmony_ci if (RB_EMPTY_ROOT(&mm->mm_rb)) 20958c2ecf20Sopenharmony_ci return -ENOMEM; 20968c2ecf20Sopenharmony_ci vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); 20978c2ecf20Sopenharmony_ci if (vma->rb_subtree_gap < length) 20988c2ecf20Sopenharmony_ci return -ENOMEM; 20998c2ecf20Sopenharmony_ci 21008c2ecf20Sopenharmony_ci while (true) { 21018c2ecf20Sopenharmony_ci /* Visit right subtree if it looks promising */ 21028c2ecf20Sopenharmony_ci gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; 21038c2ecf20Sopenharmony_ci if (gap_start <= high_limit && vma->vm_rb.rb_right) { 21048c2ecf20Sopenharmony_ci struct vm_area_struct *right = 21058c2ecf20Sopenharmony_ci rb_entry(vma->vm_rb.rb_right, 21068c2ecf20Sopenharmony_ci struct vm_area_struct, vm_rb); 21078c2ecf20Sopenharmony_ci if (right->rb_subtree_gap >= length) { 21088c2ecf20Sopenharmony_ci vma = right; 21098c2ecf20Sopenharmony_ci continue; 21108c2ecf20Sopenharmony_ci } 21118c2ecf20Sopenharmony_ci } 21128c2ecf20Sopenharmony_ci 21138c2ecf20Sopenharmony_cicheck_current: 21148c2ecf20Sopenharmony_ci /* Check if current node has a suitable gap */ 21158c2ecf20Sopenharmony_ci gap_end = vm_start_gap(vma); 21168c2ecf20Sopenharmony_ci if (gap_end < low_limit) 21178c2ecf20Sopenharmony_ci return -ENOMEM; 21188c2ecf20Sopenharmony_ci if ((gap_start <= high_limit && 21198c2ecf20Sopenharmony_ci gap_end > gap_start && gap_end - gap_start >= length) && 21208c2ecf20Sopenharmony_ci (xpm_region_outer_hook(gap_start, gap_end, info->flags))) 21218c2ecf20Sopenharmony_ci goto found; 21228c2ecf20Sopenharmony_ci 21238c2ecf20Sopenharmony_ci /* Visit left subtree if it looks promising */ 21248c2ecf20Sopenharmony_ci if (vma->vm_rb.rb_left) { 21258c2ecf20Sopenharmony_ci struct vm_area_struct *left = 21268c2ecf20Sopenharmony_ci rb_entry(vma->vm_rb.rb_left, 21278c2ecf20Sopenharmony_ci struct vm_area_struct, vm_rb); 21288c2ecf20Sopenharmony_ci if (left->rb_subtree_gap >= length) { 21298c2ecf20Sopenharmony_ci vma = left; 21308c2ecf20Sopenharmony_ci continue; 21318c2ecf20Sopenharmony_ci } 21328c2ecf20Sopenharmony_ci } 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_ci /* Go back up the rbtree to find next candidate node */ 21358c2ecf20Sopenharmony_ci while (true) { 21368c2ecf20Sopenharmony_ci struct rb_node *prev = &vma->vm_rb; 21378c2ecf20Sopenharmony_ci if (!rb_parent(prev)) 21388c2ecf20Sopenharmony_ci return -ENOMEM; 21398c2ecf20Sopenharmony_ci vma = rb_entry(rb_parent(prev), 21408c2ecf20Sopenharmony_ci struct vm_area_struct, vm_rb); 21418c2ecf20Sopenharmony_ci if (prev == vma->vm_rb.rb_right) { 21428c2ecf20Sopenharmony_ci gap_start = vma->vm_prev ? 21438c2ecf20Sopenharmony_ci vm_end_gap(vma->vm_prev) : 0; 21448c2ecf20Sopenharmony_ci goto check_current; 21458c2ecf20Sopenharmony_ci } 21468c2ecf20Sopenharmony_ci } 21478c2ecf20Sopenharmony_ci } 21488c2ecf20Sopenharmony_ci 21498c2ecf20Sopenharmony_cifound: 21508c2ecf20Sopenharmony_ci /* We found a suitable gap. Clip it with the original high_limit. */ 21518c2ecf20Sopenharmony_ci if (gap_end > info->high_limit) 21528c2ecf20Sopenharmony_ci gap_end = info->high_limit; 21538c2ecf20Sopenharmony_ci 21548c2ecf20Sopenharmony_cifound_highest: 21558c2ecf20Sopenharmony_ci /* Compute highest gap address at the desired alignment */ 21568c2ecf20Sopenharmony_ci gap_end -= info->length; 21578c2ecf20Sopenharmony_ci gap_end -= (gap_end - info->align_offset) & info->align_mask; 21588c2ecf20Sopenharmony_ci 21598c2ecf20Sopenharmony_ci VM_BUG_ON(gap_end < info->low_limit); 21608c2ecf20Sopenharmony_ci VM_BUG_ON(gap_end < gap_start); 21618c2ecf20Sopenharmony_ci return gap_end; 21628c2ecf20Sopenharmony_ci} 21638c2ecf20Sopenharmony_ci 21648c2ecf20Sopenharmony_ci/* 21658c2ecf20Sopenharmony_ci * Search for an unmapped address range. 21668c2ecf20Sopenharmony_ci * 21678c2ecf20Sopenharmony_ci * We are looking for a range that: 21688c2ecf20Sopenharmony_ci * - does not intersect with any VMA; 21698c2ecf20Sopenharmony_ci * - is contained within the [low_limit, high_limit) interval; 21708c2ecf20Sopenharmony_ci * - is at least the desired size. 21718c2ecf20Sopenharmony_ci * - satisfies (begin_addr & align_mask) == (align_offset & align_mask) 21728c2ecf20Sopenharmony_ci */ 21738c2ecf20Sopenharmony_ciunsigned long vm_unmapped_area(struct vm_unmapped_area_info *info) 21748c2ecf20Sopenharmony_ci{ 21758c2ecf20Sopenharmony_ci unsigned long addr; 21768c2ecf20Sopenharmony_ci 21778c2ecf20Sopenharmony_ci if (info->flags & VM_UNMAPPED_AREA_TOPDOWN) 21788c2ecf20Sopenharmony_ci addr = unmapped_area_topdown(info); 21798c2ecf20Sopenharmony_ci else 21808c2ecf20Sopenharmony_ci addr = unmapped_area(info); 21818c2ecf20Sopenharmony_ci 21828c2ecf20Sopenharmony_ci trace_vm_unmapped_area(addr, info); 21838c2ecf20Sopenharmony_ci return addr; 21848c2ecf20Sopenharmony_ci} 21858c2ecf20Sopenharmony_ci 21868c2ecf20Sopenharmony_ci/* Get an address range which is currently unmapped. 21878c2ecf20Sopenharmony_ci * For shmat() with addr=0. 21888c2ecf20Sopenharmony_ci * 21898c2ecf20Sopenharmony_ci * Ugly calling convention alert: 21908c2ecf20Sopenharmony_ci * Return value with the low bits set means error value, 21918c2ecf20Sopenharmony_ci * ie 21928c2ecf20Sopenharmony_ci * if (ret & ~PAGE_MASK) 21938c2ecf20Sopenharmony_ci * error = ret; 21948c2ecf20Sopenharmony_ci * 21958c2ecf20Sopenharmony_ci * This function "knows" that -ENOMEM has the bits set. 21968c2ecf20Sopenharmony_ci */ 21978c2ecf20Sopenharmony_ci#ifndef HAVE_ARCH_UNMAPPED_AREA 21988c2ecf20Sopenharmony_ciunsigned long 21998c2ecf20Sopenharmony_ciarch_get_unmapped_area(struct file *filp, unsigned long addr, 22008c2ecf20Sopenharmony_ci unsigned long len, unsigned long pgoff, unsigned long flags) 22018c2ecf20Sopenharmony_ci{ 22028c2ecf20Sopenharmony_ci unsigned long xpm_addr; 22038c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 22048c2ecf20Sopenharmony_ci struct vm_area_struct *vma, *prev; 22058c2ecf20Sopenharmony_ci struct vm_unmapped_area_info info; 22068c2ecf20Sopenharmony_ci const unsigned long mmap_end = arch_get_mmap_end(addr); 22078c2ecf20Sopenharmony_ci 22088c2ecf20Sopenharmony_ci if (len > mmap_end - mmap_min_addr) 22098c2ecf20Sopenharmony_ci return -ENOMEM; 22108c2ecf20Sopenharmony_ci 22118c2ecf20Sopenharmony_ci xpm_addr = xpm_get_unmapped_area_hook(addr, len, flags, 0); 22128c2ecf20Sopenharmony_ci if (xpm_addr) 22138c2ecf20Sopenharmony_ci return xpm_addr; 22148c2ecf20Sopenharmony_ci 22158c2ecf20Sopenharmony_ci if (flags & MAP_FIXED) 22168c2ecf20Sopenharmony_ci return addr; 22178c2ecf20Sopenharmony_ci 22188c2ecf20Sopenharmony_ci if (addr) { 22198c2ecf20Sopenharmony_ci addr = PAGE_ALIGN(addr); 22208c2ecf20Sopenharmony_ci vma = find_vma_prev(mm, addr, &prev); 22218c2ecf20Sopenharmony_ci if (mmap_end - len >= addr && addr >= mmap_min_addr && 22228c2ecf20Sopenharmony_ci (!vma || addr + len <= vm_start_gap(vma)) && 22238c2ecf20Sopenharmony_ci (!prev || addr >= vm_end_gap(prev)) && 22248c2ecf20Sopenharmony_ci (xpm_region_outer_hook(addr, addr + len, 0))) 22258c2ecf20Sopenharmony_ci return addr; 22268c2ecf20Sopenharmony_ci } 22278c2ecf20Sopenharmony_ci 22288c2ecf20Sopenharmony_ci info.flags = 0; 22298c2ecf20Sopenharmony_ci info.length = len; 22308c2ecf20Sopenharmony_ci info.low_limit = mm->mmap_base; 22318c2ecf20Sopenharmony_ci info.high_limit = mmap_end; 22328c2ecf20Sopenharmony_ci info.align_mask = 0; 22338c2ecf20Sopenharmony_ci info.align_offset = 0; 22348c2ecf20Sopenharmony_ci return vm_unmapped_area(&info); 22358c2ecf20Sopenharmony_ci} 22368c2ecf20Sopenharmony_ci#endif 22378c2ecf20Sopenharmony_ci 22388c2ecf20Sopenharmony_ci/* 22398c2ecf20Sopenharmony_ci * This mmap-allocator allocates new areas top-down from below the 22408c2ecf20Sopenharmony_ci * stack's low limit (the base): 22418c2ecf20Sopenharmony_ci */ 22428c2ecf20Sopenharmony_ci#ifndef HAVE_ARCH_UNMAPPED_AREA_TOPDOWN 22438c2ecf20Sopenharmony_ciunsigned long 22448c2ecf20Sopenharmony_ciarch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, 22458c2ecf20Sopenharmony_ci unsigned long len, unsigned long pgoff, 22468c2ecf20Sopenharmony_ci unsigned long flags) 22478c2ecf20Sopenharmony_ci{ 22488c2ecf20Sopenharmony_ci unsigned long xpm_addr; 22498c2ecf20Sopenharmony_ci struct vm_area_struct *vma, *prev; 22508c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 22518c2ecf20Sopenharmony_ci struct vm_unmapped_area_info info; 22528c2ecf20Sopenharmony_ci const unsigned long mmap_end = arch_get_mmap_end(addr); 22538c2ecf20Sopenharmony_ci 22548c2ecf20Sopenharmony_ci /* requested length too big for entire address space */ 22558c2ecf20Sopenharmony_ci if (len > mmap_end - mmap_min_addr) 22568c2ecf20Sopenharmony_ci return -ENOMEM; 22578c2ecf20Sopenharmony_ci 22588c2ecf20Sopenharmony_ci xpm_addr = xpm_get_unmapped_area_hook(addr, len, flags, 22598c2ecf20Sopenharmony_ci VM_UNMAPPED_AREA_TOPDOWN); 22608c2ecf20Sopenharmony_ci if (xpm_addr) 22618c2ecf20Sopenharmony_ci return xpm_addr; 22628c2ecf20Sopenharmony_ci 22638c2ecf20Sopenharmony_ci if (flags & MAP_FIXED) 22648c2ecf20Sopenharmony_ci return addr; 22658c2ecf20Sopenharmony_ci 22668c2ecf20Sopenharmony_ci /* requesting a specific address */ 22678c2ecf20Sopenharmony_ci if (addr) { 22688c2ecf20Sopenharmony_ci addr = PAGE_ALIGN(addr); 22698c2ecf20Sopenharmony_ci vma = find_vma_prev(mm, addr, &prev); 22708c2ecf20Sopenharmony_ci if (mmap_end - len >= addr && addr >= mmap_min_addr && 22718c2ecf20Sopenharmony_ci (!vma || addr + len <= vm_start_gap(vma)) && 22728c2ecf20Sopenharmony_ci (!prev || addr >= vm_end_gap(prev)) && 22738c2ecf20Sopenharmony_ci (xpm_region_outer_hook(addr, addr + len, 0))) 22748c2ecf20Sopenharmony_ci return addr; 22758c2ecf20Sopenharmony_ci } 22768c2ecf20Sopenharmony_ci 22778c2ecf20Sopenharmony_ci info.flags = VM_UNMAPPED_AREA_TOPDOWN; 22788c2ecf20Sopenharmony_ci info.length = len; 22798c2ecf20Sopenharmony_ci info.low_limit = max(PAGE_SIZE, mmap_min_addr); 22808c2ecf20Sopenharmony_ci info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); 22818c2ecf20Sopenharmony_ci info.align_mask = 0; 22828c2ecf20Sopenharmony_ci info.align_offset = 0; 22838c2ecf20Sopenharmony_ci addr = vm_unmapped_area(&info); 22848c2ecf20Sopenharmony_ci 22858c2ecf20Sopenharmony_ci /* 22868c2ecf20Sopenharmony_ci * A failed mmap() very likely causes application failure, 22878c2ecf20Sopenharmony_ci * so fall back to the bottom-up function here. This scenario 22888c2ecf20Sopenharmony_ci * can happen with large stack limits and large mmap() 22898c2ecf20Sopenharmony_ci * allocations. 22908c2ecf20Sopenharmony_ci */ 22918c2ecf20Sopenharmony_ci if (offset_in_page(addr)) { 22928c2ecf20Sopenharmony_ci VM_BUG_ON(addr != -ENOMEM); 22938c2ecf20Sopenharmony_ci info.flags = 0; 22948c2ecf20Sopenharmony_ci info.low_limit = TASK_UNMAPPED_BASE; 22958c2ecf20Sopenharmony_ci info.high_limit = mmap_end; 22968c2ecf20Sopenharmony_ci addr = vm_unmapped_area(&info); 22978c2ecf20Sopenharmony_ci } 22988c2ecf20Sopenharmony_ci 22998c2ecf20Sopenharmony_ci return addr; 23008c2ecf20Sopenharmony_ci} 23018c2ecf20Sopenharmony_ci#endif 23028c2ecf20Sopenharmony_ci 23038c2ecf20Sopenharmony_ciunsigned long 23048c2ecf20Sopenharmony_ciget_unmapped_area(struct file *file, unsigned long addr, unsigned long len, 23058c2ecf20Sopenharmony_ci unsigned long pgoff, unsigned long flags) 23068c2ecf20Sopenharmony_ci{ 23078c2ecf20Sopenharmony_ci unsigned long (*get_area)(struct file *, unsigned long, 23088c2ecf20Sopenharmony_ci unsigned long, unsigned long, unsigned long); 23098c2ecf20Sopenharmony_ci 23108c2ecf20Sopenharmony_ci unsigned long error = arch_mmap_check(addr, len, flags); 23118c2ecf20Sopenharmony_ci if (error) 23128c2ecf20Sopenharmony_ci return error; 23138c2ecf20Sopenharmony_ci 23148c2ecf20Sopenharmony_ci /* Careful about overflows.. */ 23158c2ecf20Sopenharmony_ci if (len > TASK_SIZE) 23168c2ecf20Sopenharmony_ci return -ENOMEM; 23178c2ecf20Sopenharmony_ci 23188c2ecf20Sopenharmony_ci get_area = current->mm->get_unmapped_area; 23198c2ecf20Sopenharmony_ci if (file) { 23208c2ecf20Sopenharmony_ci if (file->f_op->get_unmapped_area) 23218c2ecf20Sopenharmony_ci get_area = file->f_op->get_unmapped_area; 23228c2ecf20Sopenharmony_ci } else if (flags & MAP_SHARED) { 23238c2ecf20Sopenharmony_ci /* 23248c2ecf20Sopenharmony_ci * mmap_region() will call shmem_zero_setup() to create a file, 23258c2ecf20Sopenharmony_ci * so use shmem's get_unmapped_area in case it can be huge. 23268c2ecf20Sopenharmony_ci * do_mmap() will clear pgoff, so match alignment. 23278c2ecf20Sopenharmony_ci */ 23288c2ecf20Sopenharmony_ci pgoff = 0; 23298c2ecf20Sopenharmony_ci get_area = shmem_get_unmapped_area; 23308c2ecf20Sopenharmony_ci } 23318c2ecf20Sopenharmony_ci 23328c2ecf20Sopenharmony_ci addr = get_area(file, addr, len, pgoff, flags); 23338c2ecf20Sopenharmony_ci if (IS_ERR_VALUE(addr)) 23348c2ecf20Sopenharmony_ci return addr; 23358c2ecf20Sopenharmony_ci 23368c2ecf20Sopenharmony_ci if (addr > TASK_SIZE - len) 23378c2ecf20Sopenharmony_ci return -ENOMEM; 23388c2ecf20Sopenharmony_ci if (offset_in_page(addr)) 23398c2ecf20Sopenharmony_ci return -EINVAL; 23408c2ecf20Sopenharmony_ci 23418c2ecf20Sopenharmony_ci error = security_mmap_addr(addr); 23428c2ecf20Sopenharmony_ci return error ? error : addr; 23438c2ecf20Sopenharmony_ci} 23448c2ecf20Sopenharmony_ci 23458c2ecf20Sopenharmony_ciEXPORT_SYMBOL(get_unmapped_area); 23468c2ecf20Sopenharmony_ci 23478c2ecf20Sopenharmony_ci/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 23488c2ecf20Sopenharmony_cistruct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 23498c2ecf20Sopenharmony_ci{ 23508c2ecf20Sopenharmony_ci struct rb_node *rb_node; 23518c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 23528c2ecf20Sopenharmony_ci 23538c2ecf20Sopenharmony_ci /* Check the cache first. */ 23548c2ecf20Sopenharmony_ci vma = vmacache_find(mm, addr); 23558c2ecf20Sopenharmony_ci if (likely(vma)) 23568c2ecf20Sopenharmony_ci return vma; 23578c2ecf20Sopenharmony_ci 23588c2ecf20Sopenharmony_ci rb_node = mm->mm_rb.rb_node; 23598c2ecf20Sopenharmony_ci 23608c2ecf20Sopenharmony_ci while (rb_node) { 23618c2ecf20Sopenharmony_ci struct vm_area_struct *tmp; 23628c2ecf20Sopenharmony_ci 23638c2ecf20Sopenharmony_ci tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb); 23648c2ecf20Sopenharmony_ci 23658c2ecf20Sopenharmony_ci if (tmp->vm_end > addr) { 23668c2ecf20Sopenharmony_ci vma = tmp; 23678c2ecf20Sopenharmony_ci if (tmp->vm_start <= addr) 23688c2ecf20Sopenharmony_ci break; 23698c2ecf20Sopenharmony_ci rb_node = rb_node->rb_left; 23708c2ecf20Sopenharmony_ci } else 23718c2ecf20Sopenharmony_ci rb_node = rb_node->rb_right; 23728c2ecf20Sopenharmony_ci } 23738c2ecf20Sopenharmony_ci 23748c2ecf20Sopenharmony_ci if (vma) 23758c2ecf20Sopenharmony_ci vmacache_update(addr, vma); 23768c2ecf20Sopenharmony_ci return vma; 23778c2ecf20Sopenharmony_ci} 23788c2ecf20Sopenharmony_ci 23798c2ecf20Sopenharmony_ciEXPORT_SYMBOL(find_vma); 23808c2ecf20Sopenharmony_ci 23818c2ecf20Sopenharmony_ci/* 23828c2ecf20Sopenharmony_ci * Same as find_vma, but also return a pointer to the previous VMA in *pprev. 23838c2ecf20Sopenharmony_ci */ 23848c2ecf20Sopenharmony_cistruct vm_area_struct * 23858c2ecf20Sopenharmony_cifind_vma_prev(struct mm_struct *mm, unsigned long addr, 23868c2ecf20Sopenharmony_ci struct vm_area_struct **pprev) 23878c2ecf20Sopenharmony_ci{ 23888c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 23898c2ecf20Sopenharmony_ci 23908c2ecf20Sopenharmony_ci vma = find_vma(mm, addr); 23918c2ecf20Sopenharmony_ci if (vma) { 23928c2ecf20Sopenharmony_ci *pprev = vma->vm_prev; 23938c2ecf20Sopenharmony_ci } else { 23948c2ecf20Sopenharmony_ci struct rb_node *rb_node = rb_last(&mm->mm_rb); 23958c2ecf20Sopenharmony_ci 23968c2ecf20Sopenharmony_ci *pprev = rb_node ? rb_entry(rb_node, struct vm_area_struct, vm_rb) : NULL; 23978c2ecf20Sopenharmony_ci } 23988c2ecf20Sopenharmony_ci return vma; 23998c2ecf20Sopenharmony_ci} 24008c2ecf20Sopenharmony_ci 24018c2ecf20Sopenharmony_ci/* 24028c2ecf20Sopenharmony_ci * Verify that the stack growth is acceptable and 24038c2ecf20Sopenharmony_ci * update accounting. This is shared with both the 24048c2ecf20Sopenharmony_ci * grow-up and grow-down cases. 24058c2ecf20Sopenharmony_ci */ 24068c2ecf20Sopenharmony_cistatic int acct_stack_growth(struct vm_area_struct *vma, 24078c2ecf20Sopenharmony_ci unsigned long size, unsigned long grow) 24088c2ecf20Sopenharmony_ci{ 24098c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 24108c2ecf20Sopenharmony_ci unsigned long new_start; 24118c2ecf20Sopenharmony_ci 24128c2ecf20Sopenharmony_ci /* address space limit tests */ 24138c2ecf20Sopenharmony_ci if (!may_expand_vm(mm, vma->vm_flags, grow)) 24148c2ecf20Sopenharmony_ci return -ENOMEM; 24158c2ecf20Sopenharmony_ci 24168c2ecf20Sopenharmony_ci /* Stack limit test */ 24178c2ecf20Sopenharmony_ci if (size > rlimit(RLIMIT_STACK)) 24188c2ecf20Sopenharmony_ci return -ENOMEM; 24198c2ecf20Sopenharmony_ci 24208c2ecf20Sopenharmony_ci /* mlock limit tests */ 24218c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_LOCKED) { 24228c2ecf20Sopenharmony_ci unsigned long locked; 24238c2ecf20Sopenharmony_ci unsigned long limit; 24248c2ecf20Sopenharmony_ci locked = mm->locked_vm + grow; 24258c2ecf20Sopenharmony_ci limit = rlimit(RLIMIT_MEMLOCK); 24268c2ecf20Sopenharmony_ci limit >>= PAGE_SHIFT; 24278c2ecf20Sopenharmony_ci if (locked > limit && !capable(CAP_IPC_LOCK)) 24288c2ecf20Sopenharmony_ci return -ENOMEM; 24298c2ecf20Sopenharmony_ci } 24308c2ecf20Sopenharmony_ci 24318c2ecf20Sopenharmony_ci /* Check to ensure the stack will not grow into a hugetlb-only region */ 24328c2ecf20Sopenharmony_ci new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start : 24338c2ecf20Sopenharmony_ci vma->vm_end - size; 24348c2ecf20Sopenharmony_ci if (is_hugepage_only_range(vma->vm_mm, new_start, size)) 24358c2ecf20Sopenharmony_ci return -EFAULT; 24368c2ecf20Sopenharmony_ci 24378c2ecf20Sopenharmony_ci /* 24388c2ecf20Sopenharmony_ci * Overcommit.. This must be the final test, as it will 24398c2ecf20Sopenharmony_ci * update security statistics. 24408c2ecf20Sopenharmony_ci */ 24418c2ecf20Sopenharmony_ci if (security_vm_enough_memory_mm(mm, grow)) 24428c2ecf20Sopenharmony_ci return -ENOMEM; 24438c2ecf20Sopenharmony_ci 24448c2ecf20Sopenharmony_ci return 0; 24458c2ecf20Sopenharmony_ci} 24468c2ecf20Sopenharmony_ci 24478c2ecf20Sopenharmony_ci#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) 24488c2ecf20Sopenharmony_ci/* 24498c2ecf20Sopenharmony_ci * PA-RISC uses this for its stack; IA64 for its Register Backing Store. 24508c2ecf20Sopenharmony_ci * vma is the last one with address > vma->vm_end. Have to extend vma. 24518c2ecf20Sopenharmony_ci */ 24528c2ecf20Sopenharmony_ciint expand_upwards(struct vm_area_struct *vma, unsigned long address) 24538c2ecf20Sopenharmony_ci{ 24548c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 24558c2ecf20Sopenharmony_ci struct vm_area_struct *next; 24568c2ecf20Sopenharmony_ci unsigned long gap_addr; 24578c2ecf20Sopenharmony_ci int error = 0; 24588c2ecf20Sopenharmony_ci 24598c2ecf20Sopenharmony_ci if (!(vma->vm_flags & VM_GROWSUP)) 24608c2ecf20Sopenharmony_ci return -EFAULT; 24618c2ecf20Sopenharmony_ci 24628c2ecf20Sopenharmony_ci /* Guard against exceeding limits of the address space. */ 24638c2ecf20Sopenharmony_ci address &= PAGE_MASK; 24648c2ecf20Sopenharmony_ci if (address >= (TASK_SIZE & PAGE_MASK)) 24658c2ecf20Sopenharmony_ci return -ENOMEM; 24668c2ecf20Sopenharmony_ci address += PAGE_SIZE; 24678c2ecf20Sopenharmony_ci 24688c2ecf20Sopenharmony_ci /* Enforce stack_guard_gap */ 24698c2ecf20Sopenharmony_ci gap_addr = address + stack_guard_gap; 24708c2ecf20Sopenharmony_ci 24718c2ecf20Sopenharmony_ci /* Guard against overflow */ 24728c2ecf20Sopenharmony_ci if (gap_addr < address || gap_addr > TASK_SIZE) 24738c2ecf20Sopenharmony_ci gap_addr = TASK_SIZE; 24748c2ecf20Sopenharmony_ci 24758c2ecf20Sopenharmony_ci next = vma->vm_next; 24768c2ecf20Sopenharmony_ci if (next && next->vm_start < gap_addr && vma_is_accessible(next)) { 24778c2ecf20Sopenharmony_ci if (!(next->vm_flags & VM_GROWSUP)) 24788c2ecf20Sopenharmony_ci return -ENOMEM; 24798c2ecf20Sopenharmony_ci /* Check that both stack segments have the same anon_vma? */ 24808c2ecf20Sopenharmony_ci } 24818c2ecf20Sopenharmony_ci 24828c2ecf20Sopenharmony_ci /* We must make sure the anon_vma is allocated. */ 24838c2ecf20Sopenharmony_ci if (unlikely(anon_vma_prepare(vma))) 24848c2ecf20Sopenharmony_ci return -ENOMEM; 24858c2ecf20Sopenharmony_ci 24868c2ecf20Sopenharmony_ci /* 24878c2ecf20Sopenharmony_ci * vma->vm_start/vm_end cannot change under us because the caller 24888c2ecf20Sopenharmony_ci * is required to hold the mmap_lock in read mode. We need the 24898c2ecf20Sopenharmony_ci * anon_vma lock to serialize against concurrent expand_stacks. 24908c2ecf20Sopenharmony_ci */ 24918c2ecf20Sopenharmony_ci anon_vma_lock_write(vma->anon_vma); 24928c2ecf20Sopenharmony_ci 24938c2ecf20Sopenharmony_ci /* Somebody else might have raced and expanded it already */ 24948c2ecf20Sopenharmony_ci if (address > vma->vm_end) { 24958c2ecf20Sopenharmony_ci unsigned long size, grow; 24968c2ecf20Sopenharmony_ci 24978c2ecf20Sopenharmony_ci size = address - vma->vm_start; 24988c2ecf20Sopenharmony_ci grow = (address - vma->vm_end) >> PAGE_SHIFT; 24998c2ecf20Sopenharmony_ci 25008c2ecf20Sopenharmony_ci error = -ENOMEM; 25018c2ecf20Sopenharmony_ci if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) { 25028c2ecf20Sopenharmony_ci error = acct_stack_growth(vma, size, grow); 25038c2ecf20Sopenharmony_ci if (!error) { 25048c2ecf20Sopenharmony_ci /* 25058c2ecf20Sopenharmony_ci * vma_gap_update() doesn't support concurrent 25068c2ecf20Sopenharmony_ci * updates, but we only hold a shared mmap_lock 25078c2ecf20Sopenharmony_ci * lock here, so we need to protect against 25088c2ecf20Sopenharmony_ci * concurrent vma expansions. 25098c2ecf20Sopenharmony_ci * anon_vma_lock_write() doesn't help here, as 25108c2ecf20Sopenharmony_ci * we don't guarantee that all growable vmas 25118c2ecf20Sopenharmony_ci * in a mm share the same root anon vma. 25128c2ecf20Sopenharmony_ci * So, we reuse mm->page_table_lock to guard 25138c2ecf20Sopenharmony_ci * against concurrent vma expansions. 25148c2ecf20Sopenharmony_ci */ 25158c2ecf20Sopenharmony_ci spin_lock(&mm->page_table_lock); 25168c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_LOCKED) 25178c2ecf20Sopenharmony_ci mm->locked_vm += grow; 25188c2ecf20Sopenharmony_ci vm_stat_account(mm, vma->vm_flags, grow); 25198c2ecf20Sopenharmony_ci anon_vma_interval_tree_pre_update_vma(vma); 25208c2ecf20Sopenharmony_ci vma->vm_end = address; 25218c2ecf20Sopenharmony_ci anon_vma_interval_tree_post_update_vma(vma); 25228c2ecf20Sopenharmony_ci if (vma->vm_next) 25238c2ecf20Sopenharmony_ci vma_gap_update(vma->vm_next); 25248c2ecf20Sopenharmony_ci else 25258c2ecf20Sopenharmony_ci mm->highest_vm_end = vm_end_gap(vma); 25268c2ecf20Sopenharmony_ci spin_unlock(&mm->page_table_lock); 25278c2ecf20Sopenharmony_ci 25288c2ecf20Sopenharmony_ci perf_event_mmap(vma); 25298c2ecf20Sopenharmony_ci } 25308c2ecf20Sopenharmony_ci } 25318c2ecf20Sopenharmony_ci } 25328c2ecf20Sopenharmony_ci anon_vma_unlock_write(vma->anon_vma); 25338c2ecf20Sopenharmony_ci khugepaged_enter_vma_merge(vma, vma->vm_flags); 25348c2ecf20Sopenharmony_ci validate_mm(mm); 25358c2ecf20Sopenharmony_ci return error; 25368c2ecf20Sopenharmony_ci} 25378c2ecf20Sopenharmony_ci#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ 25388c2ecf20Sopenharmony_ci 25398c2ecf20Sopenharmony_ci/* 25408c2ecf20Sopenharmony_ci * vma is the first one with address < vma->vm_start. Have to extend vma. 25418c2ecf20Sopenharmony_ci */ 25428c2ecf20Sopenharmony_ciint expand_downwards(struct vm_area_struct *vma, 25438c2ecf20Sopenharmony_ci unsigned long address) 25448c2ecf20Sopenharmony_ci{ 25458c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 25468c2ecf20Sopenharmony_ci struct vm_area_struct *prev; 25478c2ecf20Sopenharmony_ci int error = 0; 25488c2ecf20Sopenharmony_ci 25498c2ecf20Sopenharmony_ci address &= PAGE_MASK; 25508c2ecf20Sopenharmony_ci if (address < mmap_min_addr) 25518c2ecf20Sopenharmony_ci return -EPERM; 25528c2ecf20Sopenharmony_ci 25538c2ecf20Sopenharmony_ci /* Enforce stack_guard_gap */ 25548c2ecf20Sopenharmony_ci prev = vma->vm_prev; 25558c2ecf20Sopenharmony_ci /* Check that both stack segments have the same anon_vma? */ 25568c2ecf20Sopenharmony_ci if (prev && !(prev->vm_flags & VM_GROWSDOWN) && 25578c2ecf20Sopenharmony_ci vma_is_accessible(prev)) { 25588c2ecf20Sopenharmony_ci if (address - prev->vm_end < stack_guard_gap) 25598c2ecf20Sopenharmony_ci return -ENOMEM; 25608c2ecf20Sopenharmony_ci } 25618c2ecf20Sopenharmony_ci 25628c2ecf20Sopenharmony_ci /* We must make sure the anon_vma is allocated. */ 25638c2ecf20Sopenharmony_ci if (unlikely(anon_vma_prepare(vma))) 25648c2ecf20Sopenharmony_ci return -ENOMEM; 25658c2ecf20Sopenharmony_ci 25668c2ecf20Sopenharmony_ci /* 25678c2ecf20Sopenharmony_ci * vma->vm_start/vm_end cannot change under us because the caller 25688c2ecf20Sopenharmony_ci * is required to hold the mmap_lock in read mode. We need the 25698c2ecf20Sopenharmony_ci * anon_vma lock to serialize against concurrent expand_stacks. 25708c2ecf20Sopenharmony_ci */ 25718c2ecf20Sopenharmony_ci anon_vma_lock_write(vma->anon_vma); 25728c2ecf20Sopenharmony_ci 25738c2ecf20Sopenharmony_ci /* Somebody else might have raced and expanded it already */ 25748c2ecf20Sopenharmony_ci if (address < vma->vm_start) { 25758c2ecf20Sopenharmony_ci unsigned long size, grow; 25768c2ecf20Sopenharmony_ci 25778c2ecf20Sopenharmony_ci size = vma->vm_end - address; 25788c2ecf20Sopenharmony_ci grow = (vma->vm_start - address) >> PAGE_SHIFT; 25798c2ecf20Sopenharmony_ci 25808c2ecf20Sopenharmony_ci error = -ENOMEM; 25818c2ecf20Sopenharmony_ci if (grow <= vma->vm_pgoff) { 25828c2ecf20Sopenharmony_ci error = acct_stack_growth(vma, size, grow); 25838c2ecf20Sopenharmony_ci if (!error) { 25848c2ecf20Sopenharmony_ci /* 25858c2ecf20Sopenharmony_ci * vma_gap_update() doesn't support concurrent 25868c2ecf20Sopenharmony_ci * updates, but we only hold a shared mmap_lock 25878c2ecf20Sopenharmony_ci * lock here, so we need to protect against 25888c2ecf20Sopenharmony_ci * concurrent vma expansions. 25898c2ecf20Sopenharmony_ci * anon_vma_lock_write() doesn't help here, as 25908c2ecf20Sopenharmony_ci * we don't guarantee that all growable vmas 25918c2ecf20Sopenharmony_ci * in a mm share the same root anon vma. 25928c2ecf20Sopenharmony_ci * So, we reuse mm->page_table_lock to guard 25938c2ecf20Sopenharmony_ci * against concurrent vma expansions. 25948c2ecf20Sopenharmony_ci */ 25958c2ecf20Sopenharmony_ci spin_lock(&mm->page_table_lock); 25968c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_LOCKED) 25978c2ecf20Sopenharmony_ci mm->locked_vm += grow; 25988c2ecf20Sopenharmony_ci vm_stat_account(mm, vma->vm_flags, grow); 25998c2ecf20Sopenharmony_ci anon_vma_interval_tree_pre_update_vma(vma); 26008c2ecf20Sopenharmony_ci vma->vm_start = address; 26018c2ecf20Sopenharmony_ci vma->vm_pgoff -= grow; 26028c2ecf20Sopenharmony_ci anon_vma_interval_tree_post_update_vma(vma); 26038c2ecf20Sopenharmony_ci vma_gap_update(vma); 26048c2ecf20Sopenharmony_ci spin_unlock(&mm->page_table_lock); 26058c2ecf20Sopenharmony_ci 26068c2ecf20Sopenharmony_ci perf_event_mmap(vma); 26078c2ecf20Sopenharmony_ci } 26088c2ecf20Sopenharmony_ci } 26098c2ecf20Sopenharmony_ci } 26108c2ecf20Sopenharmony_ci anon_vma_unlock_write(vma->anon_vma); 26118c2ecf20Sopenharmony_ci khugepaged_enter_vma_merge(vma, vma->vm_flags); 26128c2ecf20Sopenharmony_ci validate_mm(mm); 26138c2ecf20Sopenharmony_ci return error; 26148c2ecf20Sopenharmony_ci} 26158c2ecf20Sopenharmony_ci 26168c2ecf20Sopenharmony_ci/* enforced gap between the expanding stack and other mappings. */ 26178c2ecf20Sopenharmony_ciunsigned long stack_guard_gap = 256UL<<PAGE_SHIFT; 26188c2ecf20Sopenharmony_ci 26198c2ecf20Sopenharmony_cistatic int __init cmdline_parse_stack_guard_gap(char *p) 26208c2ecf20Sopenharmony_ci{ 26218c2ecf20Sopenharmony_ci unsigned long val; 26228c2ecf20Sopenharmony_ci char *endptr; 26238c2ecf20Sopenharmony_ci 26248c2ecf20Sopenharmony_ci val = simple_strtoul(p, &endptr, 10); 26258c2ecf20Sopenharmony_ci if (!*endptr) 26268c2ecf20Sopenharmony_ci stack_guard_gap = val << PAGE_SHIFT; 26278c2ecf20Sopenharmony_ci 26288c2ecf20Sopenharmony_ci return 1; 26298c2ecf20Sopenharmony_ci} 26308c2ecf20Sopenharmony_ci__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap); 26318c2ecf20Sopenharmony_ci 26328c2ecf20Sopenharmony_ci#ifdef CONFIG_STACK_GROWSUP 26338c2ecf20Sopenharmony_ciint expand_stack(struct vm_area_struct *vma, unsigned long address) 26348c2ecf20Sopenharmony_ci{ 26358c2ecf20Sopenharmony_ci return expand_upwards(vma, address); 26368c2ecf20Sopenharmony_ci} 26378c2ecf20Sopenharmony_ci 26388c2ecf20Sopenharmony_cistruct vm_area_struct * 26398c2ecf20Sopenharmony_cifind_extend_vma(struct mm_struct *mm, unsigned long addr) 26408c2ecf20Sopenharmony_ci{ 26418c2ecf20Sopenharmony_ci struct vm_area_struct *vma, *prev; 26428c2ecf20Sopenharmony_ci 26438c2ecf20Sopenharmony_ci addr &= PAGE_MASK; 26448c2ecf20Sopenharmony_ci vma = find_vma_prev(mm, addr, &prev); 26458c2ecf20Sopenharmony_ci if (vma && (vma->vm_start <= addr)) 26468c2ecf20Sopenharmony_ci return vma; 26478c2ecf20Sopenharmony_ci /* don't alter vm_end if the coredump is running */ 26488c2ecf20Sopenharmony_ci if (!prev || expand_stack(prev, addr)) 26498c2ecf20Sopenharmony_ci return NULL; 26508c2ecf20Sopenharmony_ci if (prev->vm_flags & VM_LOCKED) 26518c2ecf20Sopenharmony_ci populate_vma_page_range(prev, addr, prev->vm_end, NULL); 26528c2ecf20Sopenharmony_ci return prev; 26538c2ecf20Sopenharmony_ci} 26548c2ecf20Sopenharmony_ci#else 26558c2ecf20Sopenharmony_ciint expand_stack(struct vm_area_struct *vma, unsigned long address) 26568c2ecf20Sopenharmony_ci{ 26578c2ecf20Sopenharmony_ci return expand_downwards(vma, address); 26588c2ecf20Sopenharmony_ci} 26598c2ecf20Sopenharmony_ci 26608c2ecf20Sopenharmony_cistruct vm_area_struct * 26618c2ecf20Sopenharmony_cifind_extend_vma(struct mm_struct *mm, unsigned long addr) 26628c2ecf20Sopenharmony_ci{ 26638c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 26648c2ecf20Sopenharmony_ci unsigned long start; 26658c2ecf20Sopenharmony_ci 26668c2ecf20Sopenharmony_ci addr &= PAGE_MASK; 26678c2ecf20Sopenharmony_ci vma = find_vma(mm, addr); 26688c2ecf20Sopenharmony_ci if (!vma) 26698c2ecf20Sopenharmony_ci return NULL; 26708c2ecf20Sopenharmony_ci if (vma->vm_start <= addr) 26718c2ecf20Sopenharmony_ci return vma; 26728c2ecf20Sopenharmony_ci if (!(vma->vm_flags & VM_GROWSDOWN)) 26738c2ecf20Sopenharmony_ci return NULL; 26748c2ecf20Sopenharmony_ci start = vma->vm_start; 26758c2ecf20Sopenharmony_ci if (expand_stack(vma, addr)) 26768c2ecf20Sopenharmony_ci return NULL; 26778c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_LOCKED) 26788c2ecf20Sopenharmony_ci populate_vma_page_range(vma, addr, start, NULL); 26798c2ecf20Sopenharmony_ci return vma; 26808c2ecf20Sopenharmony_ci} 26818c2ecf20Sopenharmony_ci#endif 26828c2ecf20Sopenharmony_ci 26838c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(find_extend_vma); 26848c2ecf20Sopenharmony_ci 26858c2ecf20Sopenharmony_ci/* 26868c2ecf20Sopenharmony_ci * Ok - we have the memory areas we should free on the vma list, 26878c2ecf20Sopenharmony_ci * so release them, and do the vma updates. 26888c2ecf20Sopenharmony_ci * 26898c2ecf20Sopenharmony_ci * Called with the mm semaphore held. 26908c2ecf20Sopenharmony_ci */ 26918c2ecf20Sopenharmony_cistatic void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) 26928c2ecf20Sopenharmony_ci{ 26938c2ecf20Sopenharmony_ci unsigned long nr_accounted = 0; 26948c2ecf20Sopenharmony_ci 26958c2ecf20Sopenharmony_ci /* Update high watermark before we lower total_vm */ 26968c2ecf20Sopenharmony_ci update_hiwater_vm(mm); 26978c2ecf20Sopenharmony_ci do { 26988c2ecf20Sopenharmony_ci long nrpages = vma_pages(vma); 26998c2ecf20Sopenharmony_ci 27008c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_ACCOUNT) 27018c2ecf20Sopenharmony_ci nr_accounted += nrpages; 27028c2ecf20Sopenharmony_ci vm_stat_account(mm, vma->vm_flags, -nrpages); 27038c2ecf20Sopenharmony_ci vma = remove_vma(vma); 27048c2ecf20Sopenharmony_ci } while (vma); 27058c2ecf20Sopenharmony_ci vm_unacct_memory(nr_accounted); 27068c2ecf20Sopenharmony_ci validate_mm(mm); 27078c2ecf20Sopenharmony_ci} 27088c2ecf20Sopenharmony_ci 27098c2ecf20Sopenharmony_ci/* 27108c2ecf20Sopenharmony_ci * Get rid of page table information in the indicated region. 27118c2ecf20Sopenharmony_ci * 27128c2ecf20Sopenharmony_ci * Called with the mm semaphore held. 27138c2ecf20Sopenharmony_ci */ 27148c2ecf20Sopenharmony_cistatic void unmap_region(struct mm_struct *mm, 27158c2ecf20Sopenharmony_ci struct vm_area_struct *vma, struct vm_area_struct *prev, 27168c2ecf20Sopenharmony_ci unsigned long start, unsigned long end) 27178c2ecf20Sopenharmony_ci{ 27188c2ecf20Sopenharmony_ci struct vm_area_struct *next = vma_next(mm, prev); 27198c2ecf20Sopenharmony_ci struct mmu_gather tlb; 27208c2ecf20Sopenharmony_ci struct vm_area_struct *cur_vma; 27218c2ecf20Sopenharmony_ci 27228c2ecf20Sopenharmony_ci lru_add_drain(); 27238c2ecf20Sopenharmony_ci tlb_gather_mmu(&tlb, mm, start, end); 27248c2ecf20Sopenharmony_ci update_hiwater_rss(mm); 27258c2ecf20Sopenharmony_ci unmap_vmas(&tlb, vma, start, end); 27268c2ecf20Sopenharmony_ci 27278c2ecf20Sopenharmony_ci /* 27288c2ecf20Sopenharmony_ci * Ensure we have no stale TLB entries by the time this mapping is 27298c2ecf20Sopenharmony_ci * removed from the rmap. 27308c2ecf20Sopenharmony_ci * Note that we don't have to worry about nested flushes here because 27318c2ecf20Sopenharmony_ci * we're holding the mm semaphore for removing the mapping - so any 27328c2ecf20Sopenharmony_ci * concurrent flush in this region has to be coming through the rmap, 27338c2ecf20Sopenharmony_ci * and we synchronize against that using the rmap lock. 27348c2ecf20Sopenharmony_ci */ 27358c2ecf20Sopenharmony_ci for (cur_vma = vma; cur_vma; cur_vma = cur_vma->vm_next) { 27368c2ecf20Sopenharmony_ci if ((cur_vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) != 0) { 27378c2ecf20Sopenharmony_ci tlb_flush_mmu(&tlb); 27388c2ecf20Sopenharmony_ci break; 27398c2ecf20Sopenharmony_ci } 27408c2ecf20Sopenharmony_ci } 27418c2ecf20Sopenharmony_ci 27428c2ecf20Sopenharmony_ci free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, 27438c2ecf20Sopenharmony_ci next ? next->vm_start : USER_PGTABLES_CEILING); 27448c2ecf20Sopenharmony_ci tlb_finish_mmu(&tlb, start, end); 27458c2ecf20Sopenharmony_ci} 27468c2ecf20Sopenharmony_ci 27478c2ecf20Sopenharmony_ci/* 27488c2ecf20Sopenharmony_ci * Create a list of vma's touched by the unmap, removing them from the mm's 27498c2ecf20Sopenharmony_ci * vma list as we go.. 27508c2ecf20Sopenharmony_ci */ 27518c2ecf20Sopenharmony_cistatic bool 27528c2ecf20Sopenharmony_cidetach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, 27538c2ecf20Sopenharmony_ci struct vm_area_struct *prev, unsigned long end) 27548c2ecf20Sopenharmony_ci{ 27558c2ecf20Sopenharmony_ci struct vm_area_struct **insertion_point; 27568c2ecf20Sopenharmony_ci struct vm_area_struct *tail_vma = NULL; 27578c2ecf20Sopenharmony_ci 27588c2ecf20Sopenharmony_ci insertion_point = (prev ? &prev->vm_next : &mm->mmap); 27598c2ecf20Sopenharmony_ci vma->vm_prev = NULL; 27608c2ecf20Sopenharmony_ci do { 27618c2ecf20Sopenharmony_ci vma_rb_erase(vma, &mm->mm_rb); 27628c2ecf20Sopenharmony_ci mm->map_count--; 27638c2ecf20Sopenharmony_ci tail_vma = vma; 27648c2ecf20Sopenharmony_ci vma = vma->vm_next; 27658c2ecf20Sopenharmony_ci } while (vma && vma->vm_start < end); 27668c2ecf20Sopenharmony_ci *insertion_point = vma; 27678c2ecf20Sopenharmony_ci if (vma) { 27688c2ecf20Sopenharmony_ci vma->vm_prev = prev; 27698c2ecf20Sopenharmony_ci vma_gap_update(vma); 27708c2ecf20Sopenharmony_ci } else 27718c2ecf20Sopenharmony_ci mm->highest_vm_end = prev ? vm_end_gap(prev) : 0; 27728c2ecf20Sopenharmony_ci tail_vma->vm_next = NULL; 27738c2ecf20Sopenharmony_ci 27748c2ecf20Sopenharmony_ci /* Kill the cache */ 27758c2ecf20Sopenharmony_ci vmacache_invalidate(mm); 27768c2ecf20Sopenharmony_ci 27778c2ecf20Sopenharmony_ci /* 27788c2ecf20Sopenharmony_ci * Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or 27798c2ecf20Sopenharmony_ci * VM_GROWSUP VMA. Such VMAs can change their size under 27808c2ecf20Sopenharmony_ci * down_read(mmap_lock) and collide with the VMA we are about to unmap. 27818c2ecf20Sopenharmony_ci */ 27828c2ecf20Sopenharmony_ci if (vma && (vma->vm_flags & VM_GROWSDOWN)) 27838c2ecf20Sopenharmony_ci return false; 27848c2ecf20Sopenharmony_ci if (prev && (prev->vm_flags & VM_GROWSUP)) 27858c2ecf20Sopenharmony_ci return false; 27868c2ecf20Sopenharmony_ci return true; 27878c2ecf20Sopenharmony_ci} 27888c2ecf20Sopenharmony_ci 27898c2ecf20Sopenharmony_ci/* 27908c2ecf20Sopenharmony_ci * __split_vma() bypasses sysctl_max_map_count checking. We use this where it 27918c2ecf20Sopenharmony_ci * has already been checked or doesn't make sense to fail. 27928c2ecf20Sopenharmony_ci */ 27938c2ecf20Sopenharmony_ciint __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, 27948c2ecf20Sopenharmony_ci unsigned long addr, int new_below) 27958c2ecf20Sopenharmony_ci{ 27968c2ecf20Sopenharmony_ci struct vm_area_struct *new; 27978c2ecf20Sopenharmony_ci int err; 27988c2ecf20Sopenharmony_ci 27998c2ecf20Sopenharmony_ci if (vma->vm_ops && vma->vm_ops->split) { 28008c2ecf20Sopenharmony_ci err = vma->vm_ops->split(vma, addr); 28018c2ecf20Sopenharmony_ci if (err) 28028c2ecf20Sopenharmony_ci return err; 28038c2ecf20Sopenharmony_ci } 28048c2ecf20Sopenharmony_ci 28058c2ecf20Sopenharmony_ci new = vm_area_dup(vma); 28068c2ecf20Sopenharmony_ci if (!new) 28078c2ecf20Sopenharmony_ci return -ENOMEM; 28088c2ecf20Sopenharmony_ci 28098c2ecf20Sopenharmony_ci if (new_below) 28108c2ecf20Sopenharmony_ci new->vm_end = addr; 28118c2ecf20Sopenharmony_ci else { 28128c2ecf20Sopenharmony_ci new->vm_start = addr; 28138c2ecf20Sopenharmony_ci new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); 28148c2ecf20Sopenharmony_ci } 28158c2ecf20Sopenharmony_ci 28168c2ecf20Sopenharmony_ci err = vma_dup_policy(vma, new); 28178c2ecf20Sopenharmony_ci if (err) 28188c2ecf20Sopenharmony_ci goto out_free_vma; 28198c2ecf20Sopenharmony_ci 28208c2ecf20Sopenharmony_ci err = anon_vma_clone(new, vma); 28218c2ecf20Sopenharmony_ci if (err) 28228c2ecf20Sopenharmony_ci goto out_free_mpol; 28238c2ecf20Sopenharmony_ci 28248c2ecf20Sopenharmony_ci if (new->vm_file) 28258c2ecf20Sopenharmony_ci get_file(new->vm_file); 28268c2ecf20Sopenharmony_ci 28278c2ecf20Sopenharmony_ci if (new->vm_ops && new->vm_ops->open) 28288c2ecf20Sopenharmony_ci new->vm_ops->open(new); 28298c2ecf20Sopenharmony_ci 28308c2ecf20Sopenharmony_ci if (new_below) 28318c2ecf20Sopenharmony_ci err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + 28328c2ecf20Sopenharmony_ci ((addr - new->vm_start) >> PAGE_SHIFT), new); 28338c2ecf20Sopenharmony_ci else 28348c2ecf20Sopenharmony_ci err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); 28358c2ecf20Sopenharmony_ci 28368c2ecf20Sopenharmony_ci /* Success. */ 28378c2ecf20Sopenharmony_ci if (!err) 28388c2ecf20Sopenharmony_ci return 0; 28398c2ecf20Sopenharmony_ci 28408c2ecf20Sopenharmony_ci /* Clean everything up if vma_adjust failed. */ 28418c2ecf20Sopenharmony_ci if (new->vm_ops && new->vm_ops->close) 28428c2ecf20Sopenharmony_ci new->vm_ops->close(new); 28438c2ecf20Sopenharmony_ci if (new->vm_file) 28448c2ecf20Sopenharmony_ci fput(new->vm_file); 28458c2ecf20Sopenharmony_ci unlink_anon_vmas(new); 28468c2ecf20Sopenharmony_ci out_free_mpol: 28478c2ecf20Sopenharmony_ci mpol_put(vma_policy(new)); 28488c2ecf20Sopenharmony_ci out_free_vma: 28498c2ecf20Sopenharmony_ci vm_area_free(new); 28508c2ecf20Sopenharmony_ci return err; 28518c2ecf20Sopenharmony_ci} 28528c2ecf20Sopenharmony_ci 28538c2ecf20Sopenharmony_ci/* 28548c2ecf20Sopenharmony_ci * Split a vma into two pieces at address 'addr', a new vma is allocated 28558c2ecf20Sopenharmony_ci * either for the first part or the tail. 28568c2ecf20Sopenharmony_ci */ 28578c2ecf20Sopenharmony_ciint split_vma(struct mm_struct *mm, struct vm_area_struct *vma, 28588c2ecf20Sopenharmony_ci unsigned long addr, int new_below) 28598c2ecf20Sopenharmony_ci{ 28608c2ecf20Sopenharmony_ci if (mm->map_count >= sysctl_max_map_count) 28618c2ecf20Sopenharmony_ci return -ENOMEM; 28628c2ecf20Sopenharmony_ci 28638c2ecf20Sopenharmony_ci return __split_vma(mm, vma, addr, new_below); 28648c2ecf20Sopenharmony_ci} 28658c2ecf20Sopenharmony_ci 28668c2ecf20Sopenharmony_ci/* Munmap is split into 2 main parts -- this part which finds 28678c2ecf20Sopenharmony_ci * what needs doing, and the areas themselves, which do the 28688c2ecf20Sopenharmony_ci * work. This now handles partial unmappings. 28698c2ecf20Sopenharmony_ci * Jeremy Fitzhardinge <jeremy@goop.org> 28708c2ecf20Sopenharmony_ci */ 28718c2ecf20Sopenharmony_ciint __do_munmap(struct mm_struct *mm, unsigned long start, size_t len, 28728c2ecf20Sopenharmony_ci struct list_head *uf, bool downgrade) 28738c2ecf20Sopenharmony_ci{ 28748c2ecf20Sopenharmony_ci unsigned long end; 28758c2ecf20Sopenharmony_ci struct vm_area_struct *vma, *prev, *last; 28768c2ecf20Sopenharmony_ci 28778c2ecf20Sopenharmony_ci if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start) 28788c2ecf20Sopenharmony_ci return -EINVAL; 28798c2ecf20Sopenharmony_ci 28808c2ecf20Sopenharmony_ci len = PAGE_ALIGN(len); 28818c2ecf20Sopenharmony_ci end = start + len; 28828c2ecf20Sopenharmony_ci if (len == 0) 28838c2ecf20Sopenharmony_ci return -EINVAL; 28848c2ecf20Sopenharmony_ci 28858c2ecf20Sopenharmony_ci int errno = 0; 28868c2ecf20Sopenharmony_ci CALL_HCK_LITE_HOOK(delete_jit_memory_lhck, current, start, len, &errno); 28878c2ecf20Sopenharmony_ci if (errno) 28888c2ecf20Sopenharmony_ci return errno; 28898c2ecf20Sopenharmony_ci 28908c2ecf20Sopenharmony_ci /* 28918c2ecf20Sopenharmony_ci * arch_unmap() might do unmaps itself. It must be called 28928c2ecf20Sopenharmony_ci * and finish any rbtree manipulation before this code 28938c2ecf20Sopenharmony_ci * runs and also starts to manipulate the rbtree. 28948c2ecf20Sopenharmony_ci */ 28958c2ecf20Sopenharmony_ci arch_unmap(mm, start, end); 28968c2ecf20Sopenharmony_ci 28978c2ecf20Sopenharmony_ci /* Find the first overlapping VMA */ 28988c2ecf20Sopenharmony_ci vma = find_vma(mm, start); 28998c2ecf20Sopenharmony_ci if (!vma) 29008c2ecf20Sopenharmony_ci return 0; 29018c2ecf20Sopenharmony_ci prev = vma->vm_prev; 29028c2ecf20Sopenharmony_ci /* we have start < vma->vm_end */ 29038c2ecf20Sopenharmony_ci 29048c2ecf20Sopenharmony_ci /* if it doesn't overlap, we have nothing.. */ 29058c2ecf20Sopenharmony_ci if (vma->vm_start >= end) 29068c2ecf20Sopenharmony_ci return 0; 29078c2ecf20Sopenharmony_ci 29088c2ecf20Sopenharmony_ci /* 29098c2ecf20Sopenharmony_ci * If we need to split any vma, do it now to save pain later. 29108c2ecf20Sopenharmony_ci * 29118c2ecf20Sopenharmony_ci * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially 29128c2ecf20Sopenharmony_ci * unmapped vm_area_struct will remain in use: so lower split_vma 29138c2ecf20Sopenharmony_ci * places tmp vma above, and higher split_vma places tmp vma below. 29148c2ecf20Sopenharmony_ci */ 29158c2ecf20Sopenharmony_ci if (start > vma->vm_start) { 29168c2ecf20Sopenharmony_ci int error; 29178c2ecf20Sopenharmony_ci 29188c2ecf20Sopenharmony_ci /* 29198c2ecf20Sopenharmony_ci * Make sure that map_count on return from munmap() will 29208c2ecf20Sopenharmony_ci * not exceed its limit; but let map_count go just above 29218c2ecf20Sopenharmony_ci * its limit temporarily, to help free resources as expected. 29228c2ecf20Sopenharmony_ci */ 29238c2ecf20Sopenharmony_ci if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) 29248c2ecf20Sopenharmony_ci return -ENOMEM; 29258c2ecf20Sopenharmony_ci 29268c2ecf20Sopenharmony_ci error = __split_vma(mm, vma, start, 0); 29278c2ecf20Sopenharmony_ci if (error) 29288c2ecf20Sopenharmony_ci return error; 29298c2ecf20Sopenharmony_ci prev = vma; 29308c2ecf20Sopenharmony_ci } 29318c2ecf20Sopenharmony_ci 29328c2ecf20Sopenharmony_ci /* Does it split the last one? */ 29338c2ecf20Sopenharmony_ci last = find_vma(mm, end); 29348c2ecf20Sopenharmony_ci if (last && end > last->vm_start) { 29358c2ecf20Sopenharmony_ci int error = __split_vma(mm, last, end, 1); 29368c2ecf20Sopenharmony_ci if (error) 29378c2ecf20Sopenharmony_ci return error; 29388c2ecf20Sopenharmony_ci } 29398c2ecf20Sopenharmony_ci vma = vma_next(mm, prev); 29408c2ecf20Sopenharmony_ci 29418c2ecf20Sopenharmony_ci if (unlikely(uf)) { 29428c2ecf20Sopenharmony_ci /* 29438c2ecf20Sopenharmony_ci * If userfaultfd_unmap_prep returns an error the vmas 29448c2ecf20Sopenharmony_ci * will remain splitted, but userland will get a 29458c2ecf20Sopenharmony_ci * highly unexpected error anyway. This is no 29468c2ecf20Sopenharmony_ci * different than the case where the first of the two 29478c2ecf20Sopenharmony_ci * __split_vma fails, but we don't undo the first 29488c2ecf20Sopenharmony_ci * split, despite we could. This is unlikely enough 29498c2ecf20Sopenharmony_ci * failure that it's not worth optimizing it for. 29508c2ecf20Sopenharmony_ci */ 29518c2ecf20Sopenharmony_ci int error = userfaultfd_unmap_prep(vma, start, end, uf); 29528c2ecf20Sopenharmony_ci if (error) 29538c2ecf20Sopenharmony_ci return error; 29548c2ecf20Sopenharmony_ci } 29558c2ecf20Sopenharmony_ci 29568c2ecf20Sopenharmony_ci /* 29578c2ecf20Sopenharmony_ci * unlock any mlock()ed ranges before detaching vmas 29588c2ecf20Sopenharmony_ci */ 29598c2ecf20Sopenharmony_ci if (mm->locked_vm) { 29608c2ecf20Sopenharmony_ci struct vm_area_struct *tmp = vma; 29618c2ecf20Sopenharmony_ci while (tmp && tmp->vm_start < end) { 29628c2ecf20Sopenharmony_ci if (tmp->vm_flags & VM_LOCKED) { 29638c2ecf20Sopenharmony_ci mm->locked_vm -= vma_pages(tmp); 29648c2ecf20Sopenharmony_ci munlock_vma_pages_all(tmp); 29658c2ecf20Sopenharmony_ci } 29668c2ecf20Sopenharmony_ci 29678c2ecf20Sopenharmony_ci tmp = tmp->vm_next; 29688c2ecf20Sopenharmony_ci } 29698c2ecf20Sopenharmony_ci } 29708c2ecf20Sopenharmony_ci 29718c2ecf20Sopenharmony_ci /* Detach vmas from rbtree */ 29728c2ecf20Sopenharmony_ci if (!detach_vmas_to_be_unmapped(mm, vma, prev, end)) 29738c2ecf20Sopenharmony_ci downgrade = false; 29748c2ecf20Sopenharmony_ci 29758c2ecf20Sopenharmony_ci if (downgrade) 29768c2ecf20Sopenharmony_ci mmap_write_downgrade(mm); 29778c2ecf20Sopenharmony_ci 29788c2ecf20Sopenharmony_ci unmap_region(mm, vma, prev, start, end); 29798c2ecf20Sopenharmony_ci 29808c2ecf20Sopenharmony_ci /* Fix up all other VM information */ 29818c2ecf20Sopenharmony_ci remove_vma_list(mm, vma); 29828c2ecf20Sopenharmony_ci 29838c2ecf20Sopenharmony_ci return downgrade ? 1 : 0; 29848c2ecf20Sopenharmony_ci} 29858c2ecf20Sopenharmony_ci 29868c2ecf20Sopenharmony_ciint do_munmap(struct mm_struct *mm, unsigned long start, size_t len, 29878c2ecf20Sopenharmony_ci struct list_head *uf) 29888c2ecf20Sopenharmony_ci{ 29898c2ecf20Sopenharmony_ci return __do_munmap(mm, start, len, uf, false); 29908c2ecf20Sopenharmony_ci} 29918c2ecf20Sopenharmony_ci 29928c2ecf20Sopenharmony_cistatic int __vm_munmap(unsigned long start, size_t len, bool downgrade) 29938c2ecf20Sopenharmony_ci{ 29948c2ecf20Sopenharmony_ci int ret; 29958c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 29968c2ecf20Sopenharmony_ci LIST_HEAD(uf); 29978c2ecf20Sopenharmony_ci 29988c2ecf20Sopenharmony_ci if (mmap_write_lock_killable(mm)) 29998c2ecf20Sopenharmony_ci return -EINTR; 30008c2ecf20Sopenharmony_ci 30018c2ecf20Sopenharmony_ci ret = __do_munmap(mm, start, len, &uf, downgrade); 30028c2ecf20Sopenharmony_ci /* 30038c2ecf20Sopenharmony_ci * Returning 1 indicates mmap_lock is downgraded. 30048c2ecf20Sopenharmony_ci * But 1 is not legal return value of vm_munmap() and munmap(), reset 30058c2ecf20Sopenharmony_ci * it to 0 before return. 30068c2ecf20Sopenharmony_ci */ 30078c2ecf20Sopenharmony_ci if (ret == 1) { 30088c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 30098c2ecf20Sopenharmony_ci ret = 0; 30108c2ecf20Sopenharmony_ci } else 30118c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 30128c2ecf20Sopenharmony_ci 30138c2ecf20Sopenharmony_ci userfaultfd_unmap_complete(mm, &uf); 30148c2ecf20Sopenharmony_ci return ret; 30158c2ecf20Sopenharmony_ci} 30168c2ecf20Sopenharmony_ci 30178c2ecf20Sopenharmony_ciint vm_munmap(unsigned long start, size_t len) 30188c2ecf20Sopenharmony_ci{ 30198c2ecf20Sopenharmony_ci return __vm_munmap(start, len, false); 30208c2ecf20Sopenharmony_ci} 30218c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vm_munmap); 30228c2ecf20Sopenharmony_ci 30238c2ecf20Sopenharmony_ciSYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) 30248c2ecf20Sopenharmony_ci{ 30258c2ecf20Sopenharmony_ci addr = untagged_addr(addr); 30268c2ecf20Sopenharmony_ci profile_munmap(addr); 30278c2ecf20Sopenharmony_ci return __vm_munmap(addr, len, true); 30288c2ecf20Sopenharmony_ci} 30298c2ecf20Sopenharmony_ci 30308c2ecf20Sopenharmony_ci 30318c2ecf20Sopenharmony_ci/* 30328c2ecf20Sopenharmony_ci * Emulation of deprecated remap_file_pages() syscall. 30338c2ecf20Sopenharmony_ci */ 30348c2ecf20Sopenharmony_ciSYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, 30358c2ecf20Sopenharmony_ci unsigned long, prot, unsigned long, pgoff, unsigned long, flags) 30368c2ecf20Sopenharmony_ci{ 30378c2ecf20Sopenharmony_ci 30388c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 30398c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 30408c2ecf20Sopenharmony_ci unsigned long populate = 0; 30418c2ecf20Sopenharmony_ci unsigned long ret = -EINVAL; 30428c2ecf20Sopenharmony_ci struct file *file; 30438c2ecf20Sopenharmony_ci 30448c2ecf20Sopenharmony_ci pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n", 30458c2ecf20Sopenharmony_ci current->comm, current->pid); 30468c2ecf20Sopenharmony_ci 30478c2ecf20Sopenharmony_ci if (prot) 30488c2ecf20Sopenharmony_ci return ret; 30498c2ecf20Sopenharmony_ci start = start & PAGE_MASK; 30508c2ecf20Sopenharmony_ci size = size & PAGE_MASK; 30518c2ecf20Sopenharmony_ci 30528c2ecf20Sopenharmony_ci if (start + size <= start) 30538c2ecf20Sopenharmony_ci return ret; 30548c2ecf20Sopenharmony_ci 30558c2ecf20Sopenharmony_ci /* Does pgoff wrap? */ 30568c2ecf20Sopenharmony_ci if (pgoff + (size >> PAGE_SHIFT) < pgoff) 30578c2ecf20Sopenharmony_ci return ret; 30588c2ecf20Sopenharmony_ci 30598c2ecf20Sopenharmony_ci if (mmap_write_lock_killable(mm)) 30608c2ecf20Sopenharmony_ci return -EINTR; 30618c2ecf20Sopenharmony_ci 30628c2ecf20Sopenharmony_ci vma = find_vma(mm, start); 30638c2ecf20Sopenharmony_ci 30648c2ecf20Sopenharmony_ci if (!vma || !(vma->vm_flags & VM_SHARED)) 30658c2ecf20Sopenharmony_ci goto out; 30668c2ecf20Sopenharmony_ci 30678c2ecf20Sopenharmony_ci if (start < vma->vm_start) 30688c2ecf20Sopenharmony_ci goto out; 30698c2ecf20Sopenharmony_ci 30708c2ecf20Sopenharmony_ci if (start + size > vma->vm_end) { 30718c2ecf20Sopenharmony_ci struct vm_area_struct *next; 30728c2ecf20Sopenharmony_ci 30738c2ecf20Sopenharmony_ci for (next = vma->vm_next; next; next = next->vm_next) { 30748c2ecf20Sopenharmony_ci /* hole between vmas ? */ 30758c2ecf20Sopenharmony_ci if (next->vm_start != next->vm_prev->vm_end) 30768c2ecf20Sopenharmony_ci goto out; 30778c2ecf20Sopenharmony_ci 30788c2ecf20Sopenharmony_ci if (next->vm_file != vma->vm_file) 30798c2ecf20Sopenharmony_ci goto out; 30808c2ecf20Sopenharmony_ci 30818c2ecf20Sopenharmony_ci if (next->vm_flags != vma->vm_flags) 30828c2ecf20Sopenharmony_ci goto out; 30838c2ecf20Sopenharmony_ci 30848c2ecf20Sopenharmony_ci if (start + size <= next->vm_end) 30858c2ecf20Sopenharmony_ci break; 30868c2ecf20Sopenharmony_ci } 30878c2ecf20Sopenharmony_ci 30888c2ecf20Sopenharmony_ci if (!next) 30898c2ecf20Sopenharmony_ci goto out; 30908c2ecf20Sopenharmony_ci } 30918c2ecf20Sopenharmony_ci 30928c2ecf20Sopenharmony_ci prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; 30938c2ecf20Sopenharmony_ci prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; 30948c2ecf20Sopenharmony_ci prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; 30958c2ecf20Sopenharmony_ci 30968c2ecf20Sopenharmony_ci flags &= MAP_NONBLOCK; 30978c2ecf20Sopenharmony_ci flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; 30988c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_LOCKED) { 30998c2ecf20Sopenharmony_ci struct vm_area_struct *tmp; 31008c2ecf20Sopenharmony_ci flags |= MAP_LOCKED; 31018c2ecf20Sopenharmony_ci 31028c2ecf20Sopenharmony_ci /* drop PG_Mlocked flag for over-mapped range */ 31038c2ecf20Sopenharmony_ci for (tmp = vma; tmp->vm_start >= start + size; 31048c2ecf20Sopenharmony_ci tmp = tmp->vm_next) { 31058c2ecf20Sopenharmony_ci /* 31068c2ecf20Sopenharmony_ci * Split pmd and munlock page on the border 31078c2ecf20Sopenharmony_ci * of the range. 31088c2ecf20Sopenharmony_ci */ 31098c2ecf20Sopenharmony_ci vma_adjust_trans_huge(tmp, start, start + size, 0); 31108c2ecf20Sopenharmony_ci 31118c2ecf20Sopenharmony_ci munlock_vma_pages_range(tmp, 31128c2ecf20Sopenharmony_ci max(tmp->vm_start, start), 31138c2ecf20Sopenharmony_ci min(tmp->vm_end, start + size)); 31148c2ecf20Sopenharmony_ci } 31158c2ecf20Sopenharmony_ci } 31168c2ecf20Sopenharmony_ci 31178c2ecf20Sopenharmony_ci file = get_file(vma->vm_file); 31188c2ecf20Sopenharmony_ci ret = do_mmap(vma->vm_file, start, size, 31198c2ecf20Sopenharmony_ci prot, flags, pgoff, &populate, NULL); 31208c2ecf20Sopenharmony_ci fput(file); 31218c2ecf20Sopenharmony_ciout: 31228c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 31238c2ecf20Sopenharmony_ci if (populate) 31248c2ecf20Sopenharmony_ci mm_populate(ret, populate); 31258c2ecf20Sopenharmony_ci if (!IS_ERR_VALUE(ret)) 31268c2ecf20Sopenharmony_ci ret = 0; 31278c2ecf20Sopenharmony_ci return ret; 31288c2ecf20Sopenharmony_ci} 31298c2ecf20Sopenharmony_ci 31308c2ecf20Sopenharmony_ci/* 31318c2ecf20Sopenharmony_ci * this is really a simplified "do_mmap". it only handles 31328c2ecf20Sopenharmony_ci * anonymous maps. eventually we may be able to do some 31338c2ecf20Sopenharmony_ci * brk-specific accounting here. 31348c2ecf20Sopenharmony_ci */ 31358c2ecf20Sopenharmony_cistatic int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf) 31368c2ecf20Sopenharmony_ci{ 31378c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 31388c2ecf20Sopenharmony_ci struct vm_area_struct *vma, *prev; 31398c2ecf20Sopenharmony_ci struct rb_node **rb_link, *rb_parent; 31408c2ecf20Sopenharmony_ci pgoff_t pgoff = addr >> PAGE_SHIFT; 31418c2ecf20Sopenharmony_ci int error; 31428c2ecf20Sopenharmony_ci unsigned long mapped_addr; 31438c2ecf20Sopenharmony_ci 31448c2ecf20Sopenharmony_ci /* Until we need other flags, refuse anything except VM_EXEC. */ 31458c2ecf20Sopenharmony_ci if ((flags & (~VM_EXEC)) != 0) 31468c2ecf20Sopenharmony_ci return -EINVAL; 31478c2ecf20Sopenharmony_ci flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; 31488c2ecf20Sopenharmony_ci 31498c2ecf20Sopenharmony_ci mapped_addr = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); 31508c2ecf20Sopenharmony_ci if (IS_ERR_VALUE(mapped_addr)) 31518c2ecf20Sopenharmony_ci return mapped_addr; 31528c2ecf20Sopenharmony_ci 31538c2ecf20Sopenharmony_ci error = mlock_future_check(mm, mm->def_flags, len); 31548c2ecf20Sopenharmony_ci if (error) 31558c2ecf20Sopenharmony_ci return error; 31568c2ecf20Sopenharmony_ci 31578c2ecf20Sopenharmony_ci /* Clear old maps, set up prev, rb_link, rb_parent, and uf */ 31588c2ecf20Sopenharmony_ci if (munmap_vma_range(mm, addr, len, &prev, &rb_link, &rb_parent, uf)) 31598c2ecf20Sopenharmony_ci return -ENOMEM; 31608c2ecf20Sopenharmony_ci 31618c2ecf20Sopenharmony_ci /* Check against address space limits *after* clearing old maps... */ 31628c2ecf20Sopenharmony_ci if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT)) 31638c2ecf20Sopenharmony_ci return -ENOMEM; 31648c2ecf20Sopenharmony_ci 31658c2ecf20Sopenharmony_ci if (mm->map_count > sysctl_max_map_count) 31668c2ecf20Sopenharmony_ci return -ENOMEM; 31678c2ecf20Sopenharmony_ci 31688c2ecf20Sopenharmony_ci if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT)) 31698c2ecf20Sopenharmony_ci return -ENOMEM; 31708c2ecf20Sopenharmony_ci 31718c2ecf20Sopenharmony_ci /* Can we just expand an old private anonymous mapping? */ 31728c2ecf20Sopenharmony_ci vma = vma_merge(mm, prev, addr, addr + len, flags, 31738c2ecf20Sopenharmony_ci NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL); 31748c2ecf20Sopenharmony_ci if (vma) 31758c2ecf20Sopenharmony_ci goto out; 31768c2ecf20Sopenharmony_ci 31778c2ecf20Sopenharmony_ci /* 31788c2ecf20Sopenharmony_ci * create a vma struct for an anonymous mapping 31798c2ecf20Sopenharmony_ci */ 31808c2ecf20Sopenharmony_ci vma = vm_area_alloc(mm); 31818c2ecf20Sopenharmony_ci if (!vma) { 31828c2ecf20Sopenharmony_ci vm_unacct_memory(len >> PAGE_SHIFT); 31838c2ecf20Sopenharmony_ci return -ENOMEM; 31848c2ecf20Sopenharmony_ci } 31858c2ecf20Sopenharmony_ci 31868c2ecf20Sopenharmony_ci vma_set_anonymous(vma); 31878c2ecf20Sopenharmony_ci vma->vm_start = addr; 31888c2ecf20Sopenharmony_ci vma->vm_end = addr + len; 31898c2ecf20Sopenharmony_ci vma->vm_pgoff = pgoff; 31908c2ecf20Sopenharmony_ci vma->vm_flags = flags; 31918c2ecf20Sopenharmony_ci vma->vm_page_prot = vm_get_page_prot(flags); 31928c2ecf20Sopenharmony_ci vma_link(mm, vma, prev, rb_link, rb_parent); 31938c2ecf20Sopenharmony_ciout: 31948c2ecf20Sopenharmony_ci perf_event_mmap(vma); 31958c2ecf20Sopenharmony_ci mm->total_vm += len >> PAGE_SHIFT; 31968c2ecf20Sopenharmony_ci mm->data_vm += len >> PAGE_SHIFT; 31978c2ecf20Sopenharmony_ci if (flags & VM_LOCKED) 31988c2ecf20Sopenharmony_ci mm->locked_vm += (len >> PAGE_SHIFT); 31998c2ecf20Sopenharmony_ci vma->vm_flags |= VM_SOFTDIRTY; 32008c2ecf20Sopenharmony_ci return 0; 32018c2ecf20Sopenharmony_ci} 32028c2ecf20Sopenharmony_ci 32038c2ecf20Sopenharmony_ciint vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) 32048c2ecf20Sopenharmony_ci{ 32058c2ecf20Sopenharmony_ci struct mm_struct *mm = current->mm; 32068c2ecf20Sopenharmony_ci unsigned long len; 32078c2ecf20Sopenharmony_ci int ret; 32088c2ecf20Sopenharmony_ci bool populate; 32098c2ecf20Sopenharmony_ci LIST_HEAD(uf); 32108c2ecf20Sopenharmony_ci 32118c2ecf20Sopenharmony_ci len = PAGE_ALIGN(request); 32128c2ecf20Sopenharmony_ci if (len < request) 32138c2ecf20Sopenharmony_ci return -ENOMEM; 32148c2ecf20Sopenharmony_ci if (!len) 32158c2ecf20Sopenharmony_ci return 0; 32168c2ecf20Sopenharmony_ci 32178c2ecf20Sopenharmony_ci if (mmap_write_lock_killable(mm)) 32188c2ecf20Sopenharmony_ci return -EINTR; 32198c2ecf20Sopenharmony_ci 32208c2ecf20Sopenharmony_ci ret = do_brk_flags(addr, len, flags, &uf); 32218c2ecf20Sopenharmony_ci populate = ((mm->def_flags & VM_LOCKED) != 0); 32228c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 32238c2ecf20Sopenharmony_ci userfaultfd_unmap_complete(mm, &uf); 32248c2ecf20Sopenharmony_ci if (populate && !ret) 32258c2ecf20Sopenharmony_ci mm_populate(addr, len); 32268c2ecf20Sopenharmony_ci return ret; 32278c2ecf20Sopenharmony_ci} 32288c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vm_brk_flags); 32298c2ecf20Sopenharmony_ci 32308c2ecf20Sopenharmony_ciint vm_brk(unsigned long addr, unsigned long len) 32318c2ecf20Sopenharmony_ci{ 32328c2ecf20Sopenharmony_ci return vm_brk_flags(addr, len, 0); 32338c2ecf20Sopenharmony_ci} 32348c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vm_brk); 32358c2ecf20Sopenharmony_ci 32368c2ecf20Sopenharmony_ci/* Release all mmaps. */ 32378c2ecf20Sopenharmony_civoid exit_mmap(struct mm_struct *mm) 32388c2ecf20Sopenharmony_ci{ 32398c2ecf20Sopenharmony_ci struct mmu_gather tlb; 32408c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 32418c2ecf20Sopenharmony_ci unsigned long nr_accounted = 0; 32428c2ecf20Sopenharmony_ci 32438c2ecf20Sopenharmony_ci /* mm's last user has gone, and its about to be pulled down */ 32448c2ecf20Sopenharmony_ci mmu_notifier_release(mm); 32458c2ecf20Sopenharmony_ci 32468c2ecf20Sopenharmony_ci if (unlikely(mm_is_oom_victim(mm))) { 32478c2ecf20Sopenharmony_ci /* 32488c2ecf20Sopenharmony_ci * Manually reap the mm to free as much memory as possible. 32498c2ecf20Sopenharmony_ci * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard 32508c2ecf20Sopenharmony_ci * this mm from further consideration. Taking mm->mmap_lock for 32518c2ecf20Sopenharmony_ci * write after setting MMF_OOM_SKIP will guarantee that the oom 32528c2ecf20Sopenharmony_ci * reaper will not run on this mm again after mmap_lock is 32538c2ecf20Sopenharmony_ci * dropped. 32548c2ecf20Sopenharmony_ci * 32558c2ecf20Sopenharmony_ci * Nothing can be holding mm->mmap_lock here and the above call 32568c2ecf20Sopenharmony_ci * to mmu_notifier_release(mm) ensures mmu notifier callbacks in 32578c2ecf20Sopenharmony_ci * __oom_reap_task_mm() will not block. 32588c2ecf20Sopenharmony_ci * 32598c2ecf20Sopenharmony_ci * This needs to be done before calling munlock_vma_pages_all(), 32608c2ecf20Sopenharmony_ci * which clears VM_LOCKED, otherwise the oom reaper cannot 32618c2ecf20Sopenharmony_ci * reliably test it. 32628c2ecf20Sopenharmony_ci */ 32638c2ecf20Sopenharmony_ci (void)__oom_reap_task_mm(mm); 32648c2ecf20Sopenharmony_ci 32658c2ecf20Sopenharmony_ci set_bit(MMF_OOM_SKIP, &mm->flags); 32668c2ecf20Sopenharmony_ci mmap_write_lock(mm); 32678c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 32688c2ecf20Sopenharmony_ci } 32698c2ecf20Sopenharmony_ci 32708c2ecf20Sopenharmony_ci if (mm->locked_vm) { 32718c2ecf20Sopenharmony_ci vma = mm->mmap; 32728c2ecf20Sopenharmony_ci while (vma) { 32738c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_LOCKED) 32748c2ecf20Sopenharmony_ci munlock_vma_pages_all(vma); 32758c2ecf20Sopenharmony_ci vma = vma->vm_next; 32768c2ecf20Sopenharmony_ci } 32778c2ecf20Sopenharmony_ci } 32788c2ecf20Sopenharmony_ci 32798c2ecf20Sopenharmony_ci arch_exit_mmap(mm); 32808c2ecf20Sopenharmony_ci 32818c2ecf20Sopenharmony_ci vma = mm->mmap; 32828c2ecf20Sopenharmony_ci if (!vma) /* Can happen if dup_mmap() received an OOM */ 32838c2ecf20Sopenharmony_ci return; 32848c2ecf20Sopenharmony_ci 32858c2ecf20Sopenharmony_ci lru_add_drain(); 32868c2ecf20Sopenharmony_ci flush_cache_mm(mm); 32878c2ecf20Sopenharmony_ci tlb_gather_mmu(&tlb, mm, 0, -1); 32888c2ecf20Sopenharmony_ci /* update_hiwater_rss(mm) here? but nobody should be looking */ 32898c2ecf20Sopenharmony_ci /* Use -1 here to ensure all VMAs in the mm are unmapped */ 32908c2ecf20Sopenharmony_ci unmap_vmas(&tlb, vma, 0, -1); 32918c2ecf20Sopenharmony_ci free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); 32928c2ecf20Sopenharmony_ci tlb_finish_mmu(&tlb, 0, -1); 32938c2ecf20Sopenharmony_ci 32948c2ecf20Sopenharmony_ci /* 32958c2ecf20Sopenharmony_ci * Walk the list again, actually closing and freeing it, 32968c2ecf20Sopenharmony_ci * with preemption enabled, without holding any MM locks. 32978c2ecf20Sopenharmony_ci */ 32988c2ecf20Sopenharmony_ci while (vma) { 32998c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_ACCOUNT) 33008c2ecf20Sopenharmony_ci nr_accounted += vma_pages(vma); 33018c2ecf20Sopenharmony_ci vma = remove_vma(vma); 33028c2ecf20Sopenharmony_ci cond_resched(); 33038c2ecf20Sopenharmony_ci } 33048c2ecf20Sopenharmony_ci vm_unacct_memory(nr_accounted); 33058c2ecf20Sopenharmony_ci} 33068c2ecf20Sopenharmony_ci 33078c2ecf20Sopenharmony_ci/* Insert vm structure into process list sorted by address 33088c2ecf20Sopenharmony_ci * and into the inode's i_mmap tree. If vm_file is non-NULL 33098c2ecf20Sopenharmony_ci * then i_mmap_rwsem is taken here. 33108c2ecf20Sopenharmony_ci */ 33118c2ecf20Sopenharmony_ciint insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) 33128c2ecf20Sopenharmony_ci{ 33138c2ecf20Sopenharmony_ci struct vm_area_struct *prev; 33148c2ecf20Sopenharmony_ci struct rb_node **rb_link, *rb_parent; 33158c2ecf20Sopenharmony_ci 33168c2ecf20Sopenharmony_ci if (find_vma_links(mm, vma->vm_start, vma->vm_end, 33178c2ecf20Sopenharmony_ci &prev, &rb_link, &rb_parent)) 33188c2ecf20Sopenharmony_ci return -ENOMEM; 33198c2ecf20Sopenharmony_ci if ((vma->vm_flags & VM_ACCOUNT) && 33208c2ecf20Sopenharmony_ci security_vm_enough_memory_mm(mm, vma_pages(vma))) 33218c2ecf20Sopenharmony_ci return -ENOMEM; 33228c2ecf20Sopenharmony_ci 33238c2ecf20Sopenharmony_ci /* 33248c2ecf20Sopenharmony_ci * The vm_pgoff of a purely anonymous vma should be irrelevant 33258c2ecf20Sopenharmony_ci * until its first write fault, when page's anon_vma and index 33268c2ecf20Sopenharmony_ci * are set. But now set the vm_pgoff it will almost certainly 33278c2ecf20Sopenharmony_ci * end up with (unless mremap moves it elsewhere before that 33288c2ecf20Sopenharmony_ci * first wfault), so /proc/pid/maps tells a consistent story. 33298c2ecf20Sopenharmony_ci * 33308c2ecf20Sopenharmony_ci * By setting it to reflect the virtual start address of the 33318c2ecf20Sopenharmony_ci * vma, merges and splits can happen in a seamless way, just 33328c2ecf20Sopenharmony_ci * using the existing file pgoff checks and manipulations. 33338c2ecf20Sopenharmony_ci * Similarly in do_mmap and in do_brk_flags. 33348c2ecf20Sopenharmony_ci */ 33358c2ecf20Sopenharmony_ci if (vma_is_anonymous(vma)) { 33368c2ecf20Sopenharmony_ci BUG_ON(vma->anon_vma); 33378c2ecf20Sopenharmony_ci vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; 33388c2ecf20Sopenharmony_ci } 33398c2ecf20Sopenharmony_ci 33408c2ecf20Sopenharmony_ci vma_link(mm, vma, prev, rb_link, rb_parent); 33418c2ecf20Sopenharmony_ci return 0; 33428c2ecf20Sopenharmony_ci} 33438c2ecf20Sopenharmony_ci 33448c2ecf20Sopenharmony_ci/* 33458c2ecf20Sopenharmony_ci * Copy the vma structure to a new location in the same mm, 33468c2ecf20Sopenharmony_ci * prior to moving page table entries, to effect an mremap move. 33478c2ecf20Sopenharmony_ci */ 33488c2ecf20Sopenharmony_cistruct vm_area_struct *copy_vma(struct vm_area_struct **vmap, 33498c2ecf20Sopenharmony_ci unsigned long addr, unsigned long len, pgoff_t pgoff, 33508c2ecf20Sopenharmony_ci bool *need_rmap_locks) 33518c2ecf20Sopenharmony_ci{ 33528c2ecf20Sopenharmony_ci struct vm_area_struct *vma = *vmap; 33538c2ecf20Sopenharmony_ci unsigned long vma_start = vma->vm_start; 33548c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 33558c2ecf20Sopenharmony_ci struct vm_area_struct *new_vma, *prev; 33568c2ecf20Sopenharmony_ci struct rb_node **rb_link, *rb_parent; 33578c2ecf20Sopenharmony_ci bool faulted_in_anon_vma = true; 33588c2ecf20Sopenharmony_ci 33598c2ecf20Sopenharmony_ci /* 33608c2ecf20Sopenharmony_ci * If anonymous vma has not yet been faulted, update new pgoff 33618c2ecf20Sopenharmony_ci * to match new location, to increase its chance of merging. 33628c2ecf20Sopenharmony_ci */ 33638c2ecf20Sopenharmony_ci if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) { 33648c2ecf20Sopenharmony_ci pgoff = addr >> PAGE_SHIFT; 33658c2ecf20Sopenharmony_ci faulted_in_anon_vma = false; 33668c2ecf20Sopenharmony_ci } 33678c2ecf20Sopenharmony_ci 33688c2ecf20Sopenharmony_ci if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) 33698c2ecf20Sopenharmony_ci return NULL; /* should never get here */ 33708c2ecf20Sopenharmony_ci new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, 33718c2ecf20Sopenharmony_ci vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), 33728c2ecf20Sopenharmony_ci vma->vm_userfaultfd_ctx, anon_vma_name(vma)); 33738c2ecf20Sopenharmony_ci if (new_vma) { 33748c2ecf20Sopenharmony_ci /* 33758c2ecf20Sopenharmony_ci * Source vma may have been merged into new_vma 33768c2ecf20Sopenharmony_ci */ 33778c2ecf20Sopenharmony_ci if (unlikely(vma_start >= new_vma->vm_start && 33788c2ecf20Sopenharmony_ci vma_start < new_vma->vm_end)) { 33798c2ecf20Sopenharmony_ci /* 33808c2ecf20Sopenharmony_ci * The only way we can get a vma_merge with 33818c2ecf20Sopenharmony_ci * self during an mremap is if the vma hasn't 33828c2ecf20Sopenharmony_ci * been faulted in yet and we were allowed to 33838c2ecf20Sopenharmony_ci * reset the dst vma->vm_pgoff to the 33848c2ecf20Sopenharmony_ci * destination address of the mremap to allow 33858c2ecf20Sopenharmony_ci * the merge to happen. mremap must change the 33868c2ecf20Sopenharmony_ci * vm_pgoff linearity between src and dst vmas 33878c2ecf20Sopenharmony_ci * (in turn preventing a vma_merge) to be 33888c2ecf20Sopenharmony_ci * safe. It is only safe to keep the vm_pgoff 33898c2ecf20Sopenharmony_ci * linear if there are no pages mapped yet. 33908c2ecf20Sopenharmony_ci */ 33918c2ecf20Sopenharmony_ci VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma); 33928c2ecf20Sopenharmony_ci *vmap = vma = new_vma; 33938c2ecf20Sopenharmony_ci } 33948c2ecf20Sopenharmony_ci *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff); 33958c2ecf20Sopenharmony_ci } else { 33968c2ecf20Sopenharmony_ci new_vma = vm_area_dup(vma); 33978c2ecf20Sopenharmony_ci if (!new_vma) 33988c2ecf20Sopenharmony_ci goto out; 33998c2ecf20Sopenharmony_ci new_vma->vm_start = addr; 34008c2ecf20Sopenharmony_ci new_vma->vm_end = addr + len; 34018c2ecf20Sopenharmony_ci new_vma->vm_pgoff = pgoff; 34028c2ecf20Sopenharmony_ci if (vma_dup_policy(vma, new_vma)) 34038c2ecf20Sopenharmony_ci goto out_free_vma; 34048c2ecf20Sopenharmony_ci if (anon_vma_clone(new_vma, vma)) 34058c2ecf20Sopenharmony_ci goto out_free_mempol; 34068c2ecf20Sopenharmony_ci if (new_vma->vm_file) 34078c2ecf20Sopenharmony_ci get_file(new_vma->vm_file); 34088c2ecf20Sopenharmony_ci if (new_vma->vm_ops && new_vma->vm_ops->open) 34098c2ecf20Sopenharmony_ci new_vma->vm_ops->open(new_vma); 34108c2ecf20Sopenharmony_ci vma_link(mm, new_vma, prev, rb_link, rb_parent); 34118c2ecf20Sopenharmony_ci *need_rmap_locks = false; 34128c2ecf20Sopenharmony_ci } 34138c2ecf20Sopenharmony_ci return new_vma; 34148c2ecf20Sopenharmony_ci 34158c2ecf20Sopenharmony_ciout_free_mempol: 34168c2ecf20Sopenharmony_ci mpol_put(vma_policy(new_vma)); 34178c2ecf20Sopenharmony_ciout_free_vma: 34188c2ecf20Sopenharmony_ci vm_area_free(new_vma); 34198c2ecf20Sopenharmony_ciout: 34208c2ecf20Sopenharmony_ci return NULL; 34218c2ecf20Sopenharmony_ci} 34228c2ecf20Sopenharmony_ci 34238c2ecf20Sopenharmony_ci/* 34248c2ecf20Sopenharmony_ci * Return true if the calling process may expand its vm space by the passed 34258c2ecf20Sopenharmony_ci * number of pages 34268c2ecf20Sopenharmony_ci */ 34278c2ecf20Sopenharmony_cibool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) 34288c2ecf20Sopenharmony_ci{ 34298c2ecf20Sopenharmony_ci if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT) 34308c2ecf20Sopenharmony_ci return false; 34318c2ecf20Sopenharmony_ci 34328c2ecf20Sopenharmony_ci if (is_data_mapping(flags) && 34338c2ecf20Sopenharmony_ci mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) { 34348c2ecf20Sopenharmony_ci /* Workaround for Valgrind */ 34358c2ecf20Sopenharmony_ci if (rlimit(RLIMIT_DATA) == 0 && 34368c2ecf20Sopenharmony_ci mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT) 34378c2ecf20Sopenharmony_ci return true; 34388c2ecf20Sopenharmony_ci 34398c2ecf20Sopenharmony_ci pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits%s.\n", 34408c2ecf20Sopenharmony_ci current->comm, current->pid, 34418c2ecf20Sopenharmony_ci (mm->data_vm + npages) << PAGE_SHIFT, 34428c2ecf20Sopenharmony_ci rlimit(RLIMIT_DATA), 34438c2ecf20Sopenharmony_ci ignore_rlimit_data ? "" : " or use boot option ignore_rlimit_data"); 34448c2ecf20Sopenharmony_ci 34458c2ecf20Sopenharmony_ci if (!ignore_rlimit_data) 34468c2ecf20Sopenharmony_ci return false; 34478c2ecf20Sopenharmony_ci } 34488c2ecf20Sopenharmony_ci 34498c2ecf20Sopenharmony_ci return true; 34508c2ecf20Sopenharmony_ci} 34518c2ecf20Sopenharmony_ci 34528c2ecf20Sopenharmony_civoid vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages) 34538c2ecf20Sopenharmony_ci{ 34548c2ecf20Sopenharmony_ci mm->total_vm += npages; 34558c2ecf20Sopenharmony_ci 34568c2ecf20Sopenharmony_ci if (is_exec_mapping(flags)) 34578c2ecf20Sopenharmony_ci mm->exec_vm += npages; 34588c2ecf20Sopenharmony_ci else if (is_stack_mapping(flags)) 34598c2ecf20Sopenharmony_ci mm->stack_vm += npages; 34608c2ecf20Sopenharmony_ci else if (is_data_mapping(flags)) 34618c2ecf20Sopenharmony_ci mm->data_vm += npages; 34628c2ecf20Sopenharmony_ci} 34638c2ecf20Sopenharmony_ci 34648c2ecf20Sopenharmony_cistatic vm_fault_t special_mapping_fault(struct vm_fault *vmf); 34658c2ecf20Sopenharmony_ci 34668c2ecf20Sopenharmony_ci/* 34678c2ecf20Sopenharmony_ci * Having a close hook prevents vma merging regardless of flags. 34688c2ecf20Sopenharmony_ci */ 34698c2ecf20Sopenharmony_cistatic void special_mapping_close(struct vm_area_struct *vma) 34708c2ecf20Sopenharmony_ci{ 34718c2ecf20Sopenharmony_ci} 34728c2ecf20Sopenharmony_ci 34738c2ecf20Sopenharmony_cistatic const char *special_mapping_name(struct vm_area_struct *vma) 34748c2ecf20Sopenharmony_ci{ 34758c2ecf20Sopenharmony_ci return ((struct vm_special_mapping *)vma->vm_private_data)->name; 34768c2ecf20Sopenharmony_ci} 34778c2ecf20Sopenharmony_ci 34788c2ecf20Sopenharmony_cistatic int special_mapping_mremap(struct vm_area_struct *new_vma) 34798c2ecf20Sopenharmony_ci{ 34808c2ecf20Sopenharmony_ci struct vm_special_mapping *sm = new_vma->vm_private_data; 34818c2ecf20Sopenharmony_ci 34828c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(current->mm != new_vma->vm_mm)) 34838c2ecf20Sopenharmony_ci return -EFAULT; 34848c2ecf20Sopenharmony_ci 34858c2ecf20Sopenharmony_ci if (sm->mremap) 34868c2ecf20Sopenharmony_ci return sm->mremap(sm, new_vma); 34878c2ecf20Sopenharmony_ci 34888c2ecf20Sopenharmony_ci return 0; 34898c2ecf20Sopenharmony_ci} 34908c2ecf20Sopenharmony_ci 34918c2ecf20Sopenharmony_cistatic const struct vm_operations_struct special_mapping_vmops = { 34928c2ecf20Sopenharmony_ci .close = special_mapping_close, 34938c2ecf20Sopenharmony_ci .fault = special_mapping_fault, 34948c2ecf20Sopenharmony_ci .mremap = special_mapping_mremap, 34958c2ecf20Sopenharmony_ci .name = special_mapping_name, 34968c2ecf20Sopenharmony_ci /* vDSO code relies that VVAR can't be accessed remotely */ 34978c2ecf20Sopenharmony_ci .access = NULL, 34988c2ecf20Sopenharmony_ci}; 34998c2ecf20Sopenharmony_ci 35008c2ecf20Sopenharmony_cistatic const struct vm_operations_struct legacy_special_mapping_vmops = { 35018c2ecf20Sopenharmony_ci .close = special_mapping_close, 35028c2ecf20Sopenharmony_ci .fault = special_mapping_fault, 35038c2ecf20Sopenharmony_ci}; 35048c2ecf20Sopenharmony_ci 35058c2ecf20Sopenharmony_cistatic vm_fault_t special_mapping_fault(struct vm_fault *vmf) 35068c2ecf20Sopenharmony_ci{ 35078c2ecf20Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 35088c2ecf20Sopenharmony_ci pgoff_t pgoff; 35098c2ecf20Sopenharmony_ci struct page **pages; 35108c2ecf20Sopenharmony_ci 35118c2ecf20Sopenharmony_ci if (vma->vm_ops == &legacy_special_mapping_vmops) { 35128c2ecf20Sopenharmony_ci pages = vma->vm_private_data; 35138c2ecf20Sopenharmony_ci } else { 35148c2ecf20Sopenharmony_ci struct vm_special_mapping *sm = vma->vm_private_data; 35158c2ecf20Sopenharmony_ci 35168c2ecf20Sopenharmony_ci if (sm->fault) 35178c2ecf20Sopenharmony_ci return sm->fault(sm, vmf->vma, vmf); 35188c2ecf20Sopenharmony_ci 35198c2ecf20Sopenharmony_ci pages = sm->pages; 35208c2ecf20Sopenharmony_ci } 35218c2ecf20Sopenharmony_ci 35228c2ecf20Sopenharmony_ci for (pgoff = vmf->pgoff; pgoff && *pages; ++pages) 35238c2ecf20Sopenharmony_ci pgoff--; 35248c2ecf20Sopenharmony_ci 35258c2ecf20Sopenharmony_ci if (*pages) { 35268c2ecf20Sopenharmony_ci struct page *page = *pages; 35278c2ecf20Sopenharmony_ci get_page(page); 35288c2ecf20Sopenharmony_ci vmf->page = page; 35298c2ecf20Sopenharmony_ci return 0; 35308c2ecf20Sopenharmony_ci } 35318c2ecf20Sopenharmony_ci 35328c2ecf20Sopenharmony_ci return VM_FAULT_SIGBUS; 35338c2ecf20Sopenharmony_ci} 35348c2ecf20Sopenharmony_ci 35358c2ecf20Sopenharmony_cistatic struct vm_area_struct *__install_special_mapping( 35368c2ecf20Sopenharmony_ci struct mm_struct *mm, 35378c2ecf20Sopenharmony_ci unsigned long addr, unsigned long len, 35388c2ecf20Sopenharmony_ci unsigned long vm_flags, void *priv, 35398c2ecf20Sopenharmony_ci const struct vm_operations_struct *ops) 35408c2ecf20Sopenharmony_ci{ 35418c2ecf20Sopenharmony_ci int ret; 35428c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 35438c2ecf20Sopenharmony_ci 35448c2ecf20Sopenharmony_ci vma = vm_area_alloc(mm); 35458c2ecf20Sopenharmony_ci if (unlikely(vma == NULL)) 35468c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 35478c2ecf20Sopenharmony_ci 35488c2ecf20Sopenharmony_ci vma->vm_start = addr; 35498c2ecf20Sopenharmony_ci vma->vm_end = addr + len; 35508c2ecf20Sopenharmony_ci 35518c2ecf20Sopenharmony_ci vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY; 35528c2ecf20Sopenharmony_ci vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 35538c2ecf20Sopenharmony_ci 35548c2ecf20Sopenharmony_ci vma->vm_ops = ops; 35558c2ecf20Sopenharmony_ci vma->vm_private_data = priv; 35568c2ecf20Sopenharmony_ci 35578c2ecf20Sopenharmony_ci ret = insert_vm_struct(mm, vma); 35588c2ecf20Sopenharmony_ci if (ret) 35598c2ecf20Sopenharmony_ci goto out; 35608c2ecf20Sopenharmony_ci 35618c2ecf20Sopenharmony_ci vm_stat_account(mm, vma->vm_flags, len >> PAGE_SHIFT); 35628c2ecf20Sopenharmony_ci 35638c2ecf20Sopenharmony_ci perf_event_mmap(vma); 35648c2ecf20Sopenharmony_ci 35658c2ecf20Sopenharmony_ci return vma; 35668c2ecf20Sopenharmony_ci 35678c2ecf20Sopenharmony_ciout: 35688c2ecf20Sopenharmony_ci vm_area_free(vma); 35698c2ecf20Sopenharmony_ci return ERR_PTR(ret); 35708c2ecf20Sopenharmony_ci} 35718c2ecf20Sopenharmony_ci 35728c2ecf20Sopenharmony_cibool vma_is_special_mapping(const struct vm_area_struct *vma, 35738c2ecf20Sopenharmony_ci const struct vm_special_mapping *sm) 35748c2ecf20Sopenharmony_ci{ 35758c2ecf20Sopenharmony_ci return vma->vm_private_data == sm && 35768c2ecf20Sopenharmony_ci (vma->vm_ops == &special_mapping_vmops || 35778c2ecf20Sopenharmony_ci vma->vm_ops == &legacy_special_mapping_vmops); 35788c2ecf20Sopenharmony_ci} 35798c2ecf20Sopenharmony_ci 35808c2ecf20Sopenharmony_ci/* 35818c2ecf20Sopenharmony_ci * Called with mm->mmap_lock held for writing. 35828c2ecf20Sopenharmony_ci * Insert a new vma covering the given region, with the given flags. 35838c2ecf20Sopenharmony_ci * Its pages are supplied by the given array of struct page *. 35848c2ecf20Sopenharmony_ci * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated. 35858c2ecf20Sopenharmony_ci * The region past the last page supplied will always produce SIGBUS. 35868c2ecf20Sopenharmony_ci * The array pointer and the pages it points to are assumed to stay alive 35878c2ecf20Sopenharmony_ci * for as long as this mapping might exist. 35888c2ecf20Sopenharmony_ci */ 35898c2ecf20Sopenharmony_cistruct vm_area_struct *_install_special_mapping( 35908c2ecf20Sopenharmony_ci struct mm_struct *mm, 35918c2ecf20Sopenharmony_ci unsigned long addr, unsigned long len, 35928c2ecf20Sopenharmony_ci unsigned long vm_flags, const struct vm_special_mapping *spec) 35938c2ecf20Sopenharmony_ci{ 35948c2ecf20Sopenharmony_ci return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec, 35958c2ecf20Sopenharmony_ci &special_mapping_vmops); 35968c2ecf20Sopenharmony_ci} 35978c2ecf20Sopenharmony_ci 35988c2ecf20Sopenharmony_ciint install_special_mapping(struct mm_struct *mm, 35998c2ecf20Sopenharmony_ci unsigned long addr, unsigned long len, 36008c2ecf20Sopenharmony_ci unsigned long vm_flags, struct page **pages) 36018c2ecf20Sopenharmony_ci{ 36028c2ecf20Sopenharmony_ci struct vm_area_struct *vma = __install_special_mapping( 36038c2ecf20Sopenharmony_ci mm, addr, len, vm_flags, (void *)pages, 36048c2ecf20Sopenharmony_ci &legacy_special_mapping_vmops); 36058c2ecf20Sopenharmony_ci 36068c2ecf20Sopenharmony_ci return PTR_ERR_OR_ZERO(vma); 36078c2ecf20Sopenharmony_ci} 36088c2ecf20Sopenharmony_ci 36098c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(mm_all_locks_mutex); 36108c2ecf20Sopenharmony_ci 36118c2ecf20Sopenharmony_cistatic void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) 36128c2ecf20Sopenharmony_ci{ 36138c2ecf20Sopenharmony_ci if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) { 36148c2ecf20Sopenharmony_ci /* 36158c2ecf20Sopenharmony_ci * The LSB of head.next can't change from under us 36168c2ecf20Sopenharmony_ci * because we hold the mm_all_locks_mutex. 36178c2ecf20Sopenharmony_ci */ 36188c2ecf20Sopenharmony_ci down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock); 36198c2ecf20Sopenharmony_ci /* 36208c2ecf20Sopenharmony_ci * We can safely modify head.next after taking the 36218c2ecf20Sopenharmony_ci * anon_vma->root->rwsem. If some other vma in this mm shares 36228c2ecf20Sopenharmony_ci * the same anon_vma we won't take it again. 36238c2ecf20Sopenharmony_ci * 36248c2ecf20Sopenharmony_ci * No need of atomic instructions here, head.next 36258c2ecf20Sopenharmony_ci * can't change from under us thanks to the 36268c2ecf20Sopenharmony_ci * anon_vma->root->rwsem. 36278c2ecf20Sopenharmony_ci */ 36288c2ecf20Sopenharmony_ci if (__test_and_set_bit(0, (unsigned long *) 36298c2ecf20Sopenharmony_ci &anon_vma->root->rb_root.rb_root.rb_node)) 36308c2ecf20Sopenharmony_ci BUG(); 36318c2ecf20Sopenharmony_ci } 36328c2ecf20Sopenharmony_ci} 36338c2ecf20Sopenharmony_ci 36348c2ecf20Sopenharmony_cistatic void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) 36358c2ecf20Sopenharmony_ci{ 36368c2ecf20Sopenharmony_ci if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { 36378c2ecf20Sopenharmony_ci /* 36388c2ecf20Sopenharmony_ci * AS_MM_ALL_LOCKS can't change from under us because 36398c2ecf20Sopenharmony_ci * we hold the mm_all_locks_mutex. 36408c2ecf20Sopenharmony_ci * 36418c2ecf20Sopenharmony_ci * Operations on ->flags have to be atomic because 36428c2ecf20Sopenharmony_ci * even if AS_MM_ALL_LOCKS is stable thanks to the 36438c2ecf20Sopenharmony_ci * mm_all_locks_mutex, there may be other cpus 36448c2ecf20Sopenharmony_ci * changing other bitflags in parallel to us. 36458c2ecf20Sopenharmony_ci */ 36468c2ecf20Sopenharmony_ci if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) 36478c2ecf20Sopenharmony_ci BUG(); 36488c2ecf20Sopenharmony_ci down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock); 36498c2ecf20Sopenharmony_ci } 36508c2ecf20Sopenharmony_ci} 36518c2ecf20Sopenharmony_ci 36528c2ecf20Sopenharmony_ci/* 36538c2ecf20Sopenharmony_ci * This operation locks against the VM for all pte/vma/mm related 36548c2ecf20Sopenharmony_ci * operations that could ever happen on a certain mm. This includes 36558c2ecf20Sopenharmony_ci * vmtruncate, try_to_unmap, and all page faults. 36568c2ecf20Sopenharmony_ci * 36578c2ecf20Sopenharmony_ci * The caller must take the mmap_lock in write mode before calling 36588c2ecf20Sopenharmony_ci * mm_take_all_locks(). The caller isn't allowed to release the 36598c2ecf20Sopenharmony_ci * mmap_lock until mm_drop_all_locks() returns. 36608c2ecf20Sopenharmony_ci * 36618c2ecf20Sopenharmony_ci * mmap_lock in write mode is required in order to block all operations 36628c2ecf20Sopenharmony_ci * that could modify pagetables and free pages without need of 36638c2ecf20Sopenharmony_ci * altering the vma layout. It's also needed in write mode to avoid new 36648c2ecf20Sopenharmony_ci * anon_vmas to be associated with existing vmas. 36658c2ecf20Sopenharmony_ci * 36668c2ecf20Sopenharmony_ci * A single task can't take more than one mm_take_all_locks() in a row 36678c2ecf20Sopenharmony_ci * or it would deadlock. 36688c2ecf20Sopenharmony_ci * 36698c2ecf20Sopenharmony_ci * The LSB in anon_vma->rb_root.rb_node and the AS_MM_ALL_LOCKS bitflag in 36708c2ecf20Sopenharmony_ci * mapping->flags avoid to take the same lock twice, if more than one 36718c2ecf20Sopenharmony_ci * vma in this mm is backed by the same anon_vma or address_space. 36728c2ecf20Sopenharmony_ci * 36738c2ecf20Sopenharmony_ci * We take locks in following order, accordingly to comment at beginning 36748c2ecf20Sopenharmony_ci * of mm/rmap.c: 36758c2ecf20Sopenharmony_ci * - all hugetlbfs_i_mmap_rwsem_key locks (aka mapping->i_mmap_rwsem for 36768c2ecf20Sopenharmony_ci * hugetlb mapping); 36778c2ecf20Sopenharmony_ci * - all i_mmap_rwsem locks; 36788c2ecf20Sopenharmony_ci * - all anon_vma->rwseml 36798c2ecf20Sopenharmony_ci * 36808c2ecf20Sopenharmony_ci * We can take all locks within these types randomly because the VM code 36818c2ecf20Sopenharmony_ci * doesn't nest them and we protected from parallel mm_take_all_locks() by 36828c2ecf20Sopenharmony_ci * mm_all_locks_mutex. 36838c2ecf20Sopenharmony_ci * 36848c2ecf20Sopenharmony_ci * mm_take_all_locks() and mm_drop_all_locks are expensive operations 36858c2ecf20Sopenharmony_ci * that may have to take thousand of locks. 36868c2ecf20Sopenharmony_ci * 36878c2ecf20Sopenharmony_ci * mm_take_all_locks() can fail if it's interrupted by signals. 36888c2ecf20Sopenharmony_ci */ 36898c2ecf20Sopenharmony_ciint mm_take_all_locks(struct mm_struct *mm) 36908c2ecf20Sopenharmony_ci{ 36918c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 36928c2ecf20Sopenharmony_ci struct anon_vma_chain *avc; 36938c2ecf20Sopenharmony_ci 36948c2ecf20Sopenharmony_ci BUG_ON(mmap_read_trylock(mm)); 36958c2ecf20Sopenharmony_ci 36968c2ecf20Sopenharmony_ci mutex_lock(&mm_all_locks_mutex); 36978c2ecf20Sopenharmony_ci 36988c2ecf20Sopenharmony_ci for (vma = mm->mmap; vma; vma = vma->vm_next) { 36998c2ecf20Sopenharmony_ci if (signal_pending(current)) 37008c2ecf20Sopenharmony_ci goto out_unlock; 37018c2ecf20Sopenharmony_ci if (vma->vm_file && vma->vm_file->f_mapping && 37028c2ecf20Sopenharmony_ci is_vm_hugetlb_page(vma)) 37038c2ecf20Sopenharmony_ci vm_lock_mapping(mm, vma->vm_file->f_mapping); 37048c2ecf20Sopenharmony_ci } 37058c2ecf20Sopenharmony_ci 37068c2ecf20Sopenharmony_ci for (vma = mm->mmap; vma; vma = vma->vm_next) { 37078c2ecf20Sopenharmony_ci if (signal_pending(current)) 37088c2ecf20Sopenharmony_ci goto out_unlock; 37098c2ecf20Sopenharmony_ci if (vma->vm_file && vma->vm_file->f_mapping && 37108c2ecf20Sopenharmony_ci !is_vm_hugetlb_page(vma)) 37118c2ecf20Sopenharmony_ci vm_lock_mapping(mm, vma->vm_file->f_mapping); 37128c2ecf20Sopenharmony_ci } 37138c2ecf20Sopenharmony_ci 37148c2ecf20Sopenharmony_ci for (vma = mm->mmap; vma; vma = vma->vm_next) { 37158c2ecf20Sopenharmony_ci if (signal_pending(current)) 37168c2ecf20Sopenharmony_ci goto out_unlock; 37178c2ecf20Sopenharmony_ci if (vma->anon_vma) 37188c2ecf20Sopenharmony_ci list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) 37198c2ecf20Sopenharmony_ci vm_lock_anon_vma(mm, avc->anon_vma); 37208c2ecf20Sopenharmony_ci } 37218c2ecf20Sopenharmony_ci 37228c2ecf20Sopenharmony_ci return 0; 37238c2ecf20Sopenharmony_ci 37248c2ecf20Sopenharmony_ciout_unlock: 37258c2ecf20Sopenharmony_ci mm_drop_all_locks(mm); 37268c2ecf20Sopenharmony_ci return -EINTR; 37278c2ecf20Sopenharmony_ci} 37288c2ecf20Sopenharmony_ci 37298c2ecf20Sopenharmony_cistatic void vm_unlock_anon_vma(struct anon_vma *anon_vma) 37308c2ecf20Sopenharmony_ci{ 37318c2ecf20Sopenharmony_ci if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) { 37328c2ecf20Sopenharmony_ci /* 37338c2ecf20Sopenharmony_ci * The LSB of head.next can't change to 0 from under 37348c2ecf20Sopenharmony_ci * us because we hold the mm_all_locks_mutex. 37358c2ecf20Sopenharmony_ci * 37368c2ecf20Sopenharmony_ci * We must however clear the bitflag before unlocking 37378c2ecf20Sopenharmony_ci * the vma so the users using the anon_vma->rb_root will 37388c2ecf20Sopenharmony_ci * never see our bitflag. 37398c2ecf20Sopenharmony_ci * 37408c2ecf20Sopenharmony_ci * No need of atomic instructions here, head.next 37418c2ecf20Sopenharmony_ci * can't change from under us until we release the 37428c2ecf20Sopenharmony_ci * anon_vma->root->rwsem. 37438c2ecf20Sopenharmony_ci */ 37448c2ecf20Sopenharmony_ci if (!__test_and_clear_bit(0, (unsigned long *) 37458c2ecf20Sopenharmony_ci &anon_vma->root->rb_root.rb_root.rb_node)) 37468c2ecf20Sopenharmony_ci BUG(); 37478c2ecf20Sopenharmony_ci anon_vma_unlock_write(anon_vma); 37488c2ecf20Sopenharmony_ci } 37498c2ecf20Sopenharmony_ci} 37508c2ecf20Sopenharmony_ci 37518c2ecf20Sopenharmony_cistatic void vm_unlock_mapping(struct address_space *mapping) 37528c2ecf20Sopenharmony_ci{ 37538c2ecf20Sopenharmony_ci if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { 37548c2ecf20Sopenharmony_ci /* 37558c2ecf20Sopenharmony_ci * AS_MM_ALL_LOCKS can't change to 0 from under us 37568c2ecf20Sopenharmony_ci * because we hold the mm_all_locks_mutex. 37578c2ecf20Sopenharmony_ci */ 37588c2ecf20Sopenharmony_ci i_mmap_unlock_write(mapping); 37598c2ecf20Sopenharmony_ci if (!test_and_clear_bit(AS_MM_ALL_LOCKS, 37608c2ecf20Sopenharmony_ci &mapping->flags)) 37618c2ecf20Sopenharmony_ci BUG(); 37628c2ecf20Sopenharmony_ci } 37638c2ecf20Sopenharmony_ci} 37648c2ecf20Sopenharmony_ci 37658c2ecf20Sopenharmony_ci/* 37668c2ecf20Sopenharmony_ci * The mmap_lock cannot be released by the caller until 37678c2ecf20Sopenharmony_ci * mm_drop_all_locks() returns. 37688c2ecf20Sopenharmony_ci */ 37698c2ecf20Sopenharmony_civoid mm_drop_all_locks(struct mm_struct *mm) 37708c2ecf20Sopenharmony_ci{ 37718c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 37728c2ecf20Sopenharmony_ci struct anon_vma_chain *avc; 37738c2ecf20Sopenharmony_ci 37748c2ecf20Sopenharmony_ci BUG_ON(mmap_read_trylock(mm)); 37758c2ecf20Sopenharmony_ci BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); 37768c2ecf20Sopenharmony_ci 37778c2ecf20Sopenharmony_ci for (vma = mm->mmap; vma; vma = vma->vm_next) { 37788c2ecf20Sopenharmony_ci if (vma->anon_vma) 37798c2ecf20Sopenharmony_ci list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) 37808c2ecf20Sopenharmony_ci vm_unlock_anon_vma(avc->anon_vma); 37818c2ecf20Sopenharmony_ci if (vma->vm_file && vma->vm_file->f_mapping) 37828c2ecf20Sopenharmony_ci vm_unlock_mapping(vma->vm_file->f_mapping); 37838c2ecf20Sopenharmony_ci } 37848c2ecf20Sopenharmony_ci 37858c2ecf20Sopenharmony_ci mutex_unlock(&mm_all_locks_mutex); 37868c2ecf20Sopenharmony_ci} 37878c2ecf20Sopenharmony_ci 37888c2ecf20Sopenharmony_ci/* 37898c2ecf20Sopenharmony_ci * initialise the percpu counter for VM 37908c2ecf20Sopenharmony_ci */ 37918c2ecf20Sopenharmony_civoid __init mmap_init(void) 37928c2ecf20Sopenharmony_ci{ 37938c2ecf20Sopenharmony_ci int ret; 37948c2ecf20Sopenharmony_ci 37958c2ecf20Sopenharmony_ci ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL); 37968c2ecf20Sopenharmony_ci VM_BUG_ON(ret); 37978c2ecf20Sopenharmony_ci} 37988c2ecf20Sopenharmony_ci 37998c2ecf20Sopenharmony_ci/* 38008c2ecf20Sopenharmony_ci * Initialise sysctl_user_reserve_kbytes. 38018c2ecf20Sopenharmony_ci * 38028c2ecf20Sopenharmony_ci * This is intended to prevent a user from starting a single memory hogging 38038c2ecf20Sopenharmony_ci * process, such that they cannot recover (kill the hog) in OVERCOMMIT_NEVER 38048c2ecf20Sopenharmony_ci * mode. 38058c2ecf20Sopenharmony_ci * 38068c2ecf20Sopenharmony_ci * The default value is min(3% of free memory, 128MB) 38078c2ecf20Sopenharmony_ci * 128MB is enough to recover with sshd/login, bash, and top/kill. 38088c2ecf20Sopenharmony_ci */ 38098c2ecf20Sopenharmony_cistatic int init_user_reserve(void) 38108c2ecf20Sopenharmony_ci{ 38118c2ecf20Sopenharmony_ci unsigned long free_kbytes; 38128c2ecf20Sopenharmony_ci 38138c2ecf20Sopenharmony_ci free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 38148c2ecf20Sopenharmony_ci 38158c2ecf20Sopenharmony_ci sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); 38168c2ecf20Sopenharmony_ci return 0; 38178c2ecf20Sopenharmony_ci} 38188c2ecf20Sopenharmony_cisubsys_initcall(init_user_reserve); 38198c2ecf20Sopenharmony_ci 38208c2ecf20Sopenharmony_ci/* 38218c2ecf20Sopenharmony_ci * Initialise sysctl_admin_reserve_kbytes. 38228c2ecf20Sopenharmony_ci * 38238c2ecf20Sopenharmony_ci * The purpose of sysctl_admin_reserve_kbytes is to allow the sys admin 38248c2ecf20Sopenharmony_ci * to log in and kill a memory hogging process. 38258c2ecf20Sopenharmony_ci * 38268c2ecf20Sopenharmony_ci * Systems with more than 256MB will reserve 8MB, enough to recover 38278c2ecf20Sopenharmony_ci * with sshd, bash, and top in OVERCOMMIT_GUESS. Smaller systems will 38288c2ecf20Sopenharmony_ci * only reserve 3% of free pages by default. 38298c2ecf20Sopenharmony_ci */ 38308c2ecf20Sopenharmony_cistatic int init_admin_reserve(void) 38318c2ecf20Sopenharmony_ci{ 38328c2ecf20Sopenharmony_ci unsigned long free_kbytes; 38338c2ecf20Sopenharmony_ci 38348c2ecf20Sopenharmony_ci free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 38358c2ecf20Sopenharmony_ci 38368c2ecf20Sopenharmony_ci sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); 38378c2ecf20Sopenharmony_ci return 0; 38388c2ecf20Sopenharmony_ci} 38398c2ecf20Sopenharmony_cisubsys_initcall(init_admin_reserve); 38408c2ecf20Sopenharmony_ci 38418c2ecf20Sopenharmony_ci/* 38428c2ecf20Sopenharmony_ci * Reinititalise user and admin reserves if memory is added or removed. 38438c2ecf20Sopenharmony_ci * 38448c2ecf20Sopenharmony_ci * The default user reserve max is 128MB, and the default max for the 38458c2ecf20Sopenharmony_ci * admin reserve is 8MB. These are usually, but not always, enough to 38468c2ecf20Sopenharmony_ci * enable recovery from a memory hogging process using login/sshd, a shell, 38478c2ecf20Sopenharmony_ci * and tools like top. It may make sense to increase or even disable the 38488c2ecf20Sopenharmony_ci * reserve depending on the existence of swap or variations in the recovery 38498c2ecf20Sopenharmony_ci * tools. So, the admin may have changed them. 38508c2ecf20Sopenharmony_ci * 38518c2ecf20Sopenharmony_ci * If memory is added and the reserves have been eliminated or increased above 38528c2ecf20Sopenharmony_ci * the default max, then we'll trust the admin. 38538c2ecf20Sopenharmony_ci * 38548c2ecf20Sopenharmony_ci * If memory is removed and there isn't enough free memory, then we 38558c2ecf20Sopenharmony_ci * need to reset the reserves. 38568c2ecf20Sopenharmony_ci * 38578c2ecf20Sopenharmony_ci * Otherwise keep the reserve set by the admin. 38588c2ecf20Sopenharmony_ci */ 38598c2ecf20Sopenharmony_cistatic int reserve_mem_notifier(struct notifier_block *nb, 38608c2ecf20Sopenharmony_ci unsigned long action, void *data) 38618c2ecf20Sopenharmony_ci{ 38628c2ecf20Sopenharmony_ci unsigned long tmp, free_kbytes; 38638c2ecf20Sopenharmony_ci 38648c2ecf20Sopenharmony_ci switch (action) { 38658c2ecf20Sopenharmony_ci case MEM_ONLINE: 38668c2ecf20Sopenharmony_ci /* Default max is 128MB. Leave alone if modified by operator. */ 38678c2ecf20Sopenharmony_ci tmp = sysctl_user_reserve_kbytes; 38688c2ecf20Sopenharmony_ci if (0 < tmp && tmp < (1UL << 17)) 38698c2ecf20Sopenharmony_ci init_user_reserve(); 38708c2ecf20Sopenharmony_ci 38718c2ecf20Sopenharmony_ci /* Default max is 8MB. Leave alone if modified by operator. */ 38728c2ecf20Sopenharmony_ci tmp = sysctl_admin_reserve_kbytes; 38738c2ecf20Sopenharmony_ci if (0 < tmp && tmp < (1UL << 13)) 38748c2ecf20Sopenharmony_ci init_admin_reserve(); 38758c2ecf20Sopenharmony_ci 38768c2ecf20Sopenharmony_ci break; 38778c2ecf20Sopenharmony_ci case MEM_OFFLINE: 38788c2ecf20Sopenharmony_ci free_kbytes = global_zone_page_state(NR_FREE_PAGES) << (PAGE_SHIFT - 10); 38798c2ecf20Sopenharmony_ci 38808c2ecf20Sopenharmony_ci if (sysctl_user_reserve_kbytes > free_kbytes) { 38818c2ecf20Sopenharmony_ci init_user_reserve(); 38828c2ecf20Sopenharmony_ci pr_info("vm.user_reserve_kbytes reset to %lu\n", 38838c2ecf20Sopenharmony_ci sysctl_user_reserve_kbytes); 38848c2ecf20Sopenharmony_ci } 38858c2ecf20Sopenharmony_ci 38868c2ecf20Sopenharmony_ci if (sysctl_admin_reserve_kbytes > free_kbytes) { 38878c2ecf20Sopenharmony_ci init_admin_reserve(); 38888c2ecf20Sopenharmony_ci pr_info("vm.admin_reserve_kbytes reset to %lu\n", 38898c2ecf20Sopenharmony_ci sysctl_admin_reserve_kbytes); 38908c2ecf20Sopenharmony_ci } 38918c2ecf20Sopenharmony_ci break; 38928c2ecf20Sopenharmony_ci default: 38938c2ecf20Sopenharmony_ci break; 38948c2ecf20Sopenharmony_ci } 38958c2ecf20Sopenharmony_ci return NOTIFY_OK; 38968c2ecf20Sopenharmony_ci} 38978c2ecf20Sopenharmony_ci 38988c2ecf20Sopenharmony_cistatic struct notifier_block reserve_mem_nb = { 38998c2ecf20Sopenharmony_ci .notifier_call = reserve_mem_notifier, 39008c2ecf20Sopenharmony_ci}; 39018c2ecf20Sopenharmony_ci 39028c2ecf20Sopenharmony_cistatic int __meminit init_reserve_notifier(void) 39038c2ecf20Sopenharmony_ci{ 39048c2ecf20Sopenharmony_ci if (register_hotmemory_notifier(&reserve_mem_nb)) 39058c2ecf20Sopenharmony_ci pr_err("Failed registering memory add/remove notifier for admin reserve\n"); 39068c2ecf20Sopenharmony_ci 39078c2ecf20Sopenharmony_ci return 0; 39088c2ecf20Sopenharmony_ci} 39098c2ecf20Sopenharmony_cisubsys_initcall(init_reserve_notifier); 3910