18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/mm/madvise.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 1999 Linus Torvalds 68c2ecf20Sopenharmony_ci * Copyright (C) 2002 Christoph Hellwig 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#include <linux/mman.h> 108c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 118c2ecf20Sopenharmony_ci#include <linux/syscalls.h> 128c2ecf20Sopenharmony_ci#include <linux/mempolicy.h> 138c2ecf20Sopenharmony_ci#include <linux/page-isolation.h> 148c2ecf20Sopenharmony_ci#include <linux/page_idle.h> 158c2ecf20Sopenharmony_ci#include <linux/userfaultfd_k.h> 168c2ecf20Sopenharmony_ci#include <linux/hugetlb.h> 178c2ecf20Sopenharmony_ci#include <linux/falloc.h> 188c2ecf20Sopenharmony_ci#include <linux/fadvise.h> 198c2ecf20Sopenharmony_ci#include <linux/sched.h> 208c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 218c2ecf20Sopenharmony_ci#include <linux/mm_inline.h> 228c2ecf20Sopenharmony_ci#include <linux/string.h> 238c2ecf20Sopenharmony_ci#include <linux/uio.h> 248c2ecf20Sopenharmony_ci#include <linux/ksm.h> 258c2ecf20Sopenharmony_ci#include <linux/fs.h> 268c2ecf20Sopenharmony_ci#include <linux/file.h> 278c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 288c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 298c2ecf20Sopenharmony_ci#include <linux/pagewalk.h> 308c2ecf20Sopenharmony_ci#include <linux/swap.h> 318c2ecf20Sopenharmony_ci#include <linux/swapops.h> 328c2ecf20Sopenharmony_ci#include <linux/shmem_fs.h> 338c2ecf20Sopenharmony_ci#include <linux/mmu_notifier.h> 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_ci#include <asm/tlb.h> 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#include "internal.h" 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_cistruct madvise_walk_private { 408c2ecf20Sopenharmony_ci struct mmu_gather *tlb; 418c2ecf20Sopenharmony_ci bool pageout; 428c2ecf20Sopenharmony_ci}; 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci/* 458c2ecf20Sopenharmony_ci * Any behaviour which results in changes to the vma->vm_flags needs to 468c2ecf20Sopenharmony_ci * take mmap_lock for writing. Others, which simply traverse vmas, need 478c2ecf20Sopenharmony_ci * to only take it for reading. 488c2ecf20Sopenharmony_ci */ 498c2ecf20Sopenharmony_cistatic int madvise_need_mmap_write(int behavior) 508c2ecf20Sopenharmony_ci{ 518c2ecf20Sopenharmony_ci switch (behavior) { 528c2ecf20Sopenharmony_ci case MADV_REMOVE: 538c2ecf20Sopenharmony_ci case MADV_WILLNEED: 548c2ecf20Sopenharmony_ci case MADV_DONTNEED: 558c2ecf20Sopenharmony_ci case MADV_COLD: 568c2ecf20Sopenharmony_ci case MADV_PAGEOUT: 578c2ecf20Sopenharmony_ci case MADV_FREE: 588c2ecf20Sopenharmony_ci return 0; 598c2ecf20Sopenharmony_ci default: 608c2ecf20Sopenharmony_ci /* be safe, default to 1. list exceptions explicitly */ 618c2ecf20Sopenharmony_ci return 1; 628c2ecf20Sopenharmony_ci } 638c2ecf20Sopenharmony_ci} 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci#ifdef CONFIG_ANON_VMA_NAME 668c2ecf20Sopenharmony_cistruct anon_vma_name *anon_vma_name_alloc(const char *name) 678c2ecf20Sopenharmony_ci{ 688c2ecf20Sopenharmony_ci struct anon_vma_name *anon_name; 698c2ecf20Sopenharmony_ci size_t count; 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci /* Add 1 for NUL terminator at the end of the anon_name->name */ 728c2ecf20Sopenharmony_ci count = strlen(name) + 1; 738c2ecf20Sopenharmony_ci anon_name = kmalloc(struct_size(anon_name, name, count), GFP_KERNEL); 748c2ecf20Sopenharmony_ci if (anon_name) { 758c2ecf20Sopenharmony_ci kref_init(&anon_name->kref); 768c2ecf20Sopenharmony_ci memcpy(anon_name->name, name, count); 778c2ecf20Sopenharmony_ci } 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci return anon_name; 808c2ecf20Sopenharmony_ci} 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_civoid anon_vma_name_free(struct kref *kref) 838c2ecf20Sopenharmony_ci{ 848c2ecf20Sopenharmony_ci struct anon_vma_name *anon_name = 858c2ecf20Sopenharmony_ci container_of(kref, struct anon_vma_name, kref); 868c2ecf20Sopenharmony_ci kfree(anon_name); 878c2ecf20Sopenharmony_ci} 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_cistruct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) 908c2ecf20Sopenharmony_ci{ 918c2ecf20Sopenharmony_ci mmap_assert_locked(vma->vm_mm); 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci if (vma->vm_file) 948c2ecf20Sopenharmony_ci return NULL; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci return vma->anon_name; 978c2ecf20Sopenharmony_ci} 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci/* mmap_lock should be write-locked */ 1008c2ecf20Sopenharmony_cistatic int replace_anon_vma_name(struct vm_area_struct *vma, 1018c2ecf20Sopenharmony_ci struct anon_vma_name *anon_name) 1028c2ecf20Sopenharmony_ci{ 1038c2ecf20Sopenharmony_ci struct anon_vma_name *orig_name = anon_vma_name(vma); 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci if (!anon_name) { 1068c2ecf20Sopenharmony_ci vma->anon_name = NULL; 1078c2ecf20Sopenharmony_ci anon_vma_name_put(orig_name); 1088c2ecf20Sopenharmony_ci return 0; 1098c2ecf20Sopenharmony_ci } 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci if (anon_vma_name_eq(orig_name, anon_name)) 1128c2ecf20Sopenharmony_ci return 0; 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci vma->anon_name = anon_vma_name_reuse(anon_name); 1158c2ecf20Sopenharmony_ci anon_vma_name_put(orig_name); 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci return 0; 1188c2ecf20Sopenharmony_ci} 1198c2ecf20Sopenharmony_ci#else /* CONFIG_ANON_VMA_NAME */ 1208c2ecf20Sopenharmony_cistatic int replace_anon_vma_name(struct vm_area_struct *vma, 1218c2ecf20Sopenharmony_ci struct anon_vma_name *anon_name) 1228c2ecf20Sopenharmony_ci{ 1238c2ecf20Sopenharmony_ci if (anon_name) 1248c2ecf20Sopenharmony_ci return -EINVAL; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci return 0; 1278c2ecf20Sopenharmony_ci} 1288c2ecf20Sopenharmony_ci#endif /* CONFIG_ANON_VMA_NAME */ 1298c2ecf20Sopenharmony_ci/* 1308c2ecf20Sopenharmony_ci * Update the vm_flags on region of a vma, splitting it or merging it as 1318c2ecf20Sopenharmony_ci * necessary. Must be called with mmap_sem held for writing; 1328c2ecf20Sopenharmony_ci * Caller should ensure anon_name stability by raising its refcount even when 1338c2ecf20Sopenharmony_ci * anon_name belongs to a valid vma because this function might free that vma. 1348c2ecf20Sopenharmony_ci */ 1358c2ecf20Sopenharmony_cistatic int madvise_update_vma(struct vm_area_struct *vma, 1368c2ecf20Sopenharmony_ci struct vm_area_struct **prev, unsigned long start, 1378c2ecf20Sopenharmony_ci unsigned long end, unsigned long new_flags, 1388c2ecf20Sopenharmony_ci struct anon_vma_name *anon_name) 1398c2ecf20Sopenharmony_ci{ 1408c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 1418c2ecf20Sopenharmony_ci int error; 1428c2ecf20Sopenharmony_ci pgoff_t pgoff; 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci if (new_flags == vma->vm_flags && anon_vma_name_eq(anon_vma_name(vma), anon_name)) { 1458c2ecf20Sopenharmony_ci *prev = vma; 1468c2ecf20Sopenharmony_ci return 0; 1478c2ecf20Sopenharmony_ci } 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 1508c2ecf20Sopenharmony_ci *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, 1518c2ecf20Sopenharmony_ci vma->vm_file, pgoff, vma_policy(vma), 1528c2ecf20Sopenharmony_ci vma->vm_userfaultfd_ctx, anon_name); 1538c2ecf20Sopenharmony_ci if (*prev) { 1548c2ecf20Sopenharmony_ci vma = *prev; 1558c2ecf20Sopenharmony_ci goto success; 1568c2ecf20Sopenharmony_ci } 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci *prev = vma; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci if (start != vma->vm_start) { 1618c2ecf20Sopenharmony_ci if (unlikely(mm->map_count >= sysctl_max_map_count)) 1628c2ecf20Sopenharmony_ci return -ENOMEM; 1638c2ecf20Sopenharmony_ci error = __split_vma(mm, vma, start, 1); 1648c2ecf20Sopenharmony_ci if (error) 1658c2ecf20Sopenharmony_ci return error; 1668c2ecf20Sopenharmony_ci } 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci if (end != vma->vm_end) { 1698c2ecf20Sopenharmony_ci if (unlikely(mm->map_count >= sysctl_max_map_count)) 1708c2ecf20Sopenharmony_ci return -ENOMEM; 1718c2ecf20Sopenharmony_ci error = __split_vma(mm, vma, end, 0); 1728c2ecf20Sopenharmony_ci if (error) 1738c2ecf20Sopenharmony_ci return error; 1748c2ecf20Sopenharmony_ci } 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_cisuccess: 1778c2ecf20Sopenharmony_ci /* 1788c2ecf20Sopenharmony_ci * vm_flags is protected by the mmap_lock held in write mode. 1798c2ecf20Sopenharmony_ci */ 1808c2ecf20Sopenharmony_ci vma->vm_flags = new_flags; 1818c2ecf20Sopenharmony_ci if (!vma->vm_file) { 1828c2ecf20Sopenharmony_ci error = replace_anon_vma_name(vma, anon_name); 1838c2ecf20Sopenharmony_ci if (error) 1848c2ecf20Sopenharmony_ci return error; 1858c2ecf20Sopenharmony_ci } 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci return 0; 1888c2ecf20Sopenharmony_ci} 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci#ifdef CONFIG_SWAP 1918c2ecf20Sopenharmony_cistatic int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, 1928c2ecf20Sopenharmony_ci unsigned long end, struct mm_walk *walk) 1938c2ecf20Sopenharmony_ci{ 1948c2ecf20Sopenharmony_ci pte_t *orig_pte; 1958c2ecf20Sopenharmony_ci struct vm_area_struct *vma = walk->private; 1968c2ecf20Sopenharmony_ci unsigned long index; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci if (pmd_none_or_trans_huge_or_clear_bad(pmd)) 1998c2ecf20Sopenharmony_ci return 0; 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci for (index = start; index != end; index += PAGE_SIZE) { 2028c2ecf20Sopenharmony_ci pte_t pte; 2038c2ecf20Sopenharmony_ci swp_entry_t entry; 2048c2ecf20Sopenharmony_ci struct page *page; 2058c2ecf20Sopenharmony_ci spinlock_t *ptl; 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl); 2088c2ecf20Sopenharmony_ci pte = *(orig_pte + ((index - start) / PAGE_SIZE)); 2098c2ecf20Sopenharmony_ci pte_unmap_unlock(orig_pte, ptl); 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci if (pte_present(pte) || pte_none(pte)) 2128c2ecf20Sopenharmony_ci continue; 2138c2ecf20Sopenharmony_ci entry = pte_to_swp_entry(pte); 2148c2ecf20Sopenharmony_ci if (unlikely(non_swap_entry(entry))) 2158c2ecf20Sopenharmony_ci continue; 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE, 2188c2ecf20Sopenharmony_ci vma, index, false); 2198c2ecf20Sopenharmony_ci if (page) 2208c2ecf20Sopenharmony_ci put_page(page); 2218c2ecf20Sopenharmony_ci } 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci return 0; 2248c2ecf20Sopenharmony_ci} 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_cistatic const struct mm_walk_ops swapin_walk_ops = { 2278c2ecf20Sopenharmony_ci .pmd_entry = swapin_walk_pmd_entry, 2288c2ecf20Sopenharmony_ci}; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_cistatic void force_shm_swapin_readahead(struct vm_area_struct *vma, 2318c2ecf20Sopenharmony_ci unsigned long start, unsigned long end, 2328c2ecf20Sopenharmony_ci struct address_space *mapping) 2338c2ecf20Sopenharmony_ci{ 2348c2ecf20Sopenharmony_ci XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start)); 2358c2ecf20Sopenharmony_ci pgoff_t end_index = linear_page_index(vma, end + PAGE_SIZE - 1); 2368c2ecf20Sopenharmony_ci struct page *page; 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci rcu_read_lock(); 2398c2ecf20Sopenharmony_ci xas_for_each(&xas, page, end_index) { 2408c2ecf20Sopenharmony_ci swp_entry_t swap; 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci if (!xa_is_value(page)) 2438c2ecf20Sopenharmony_ci continue; 2448c2ecf20Sopenharmony_ci xas_pause(&xas); 2458c2ecf20Sopenharmony_ci rcu_read_unlock(); 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci swap = radix_to_swp_entry(page); 2488c2ecf20Sopenharmony_ci page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE, 2498c2ecf20Sopenharmony_ci NULL, 0, false); 2508c2ecf20Sopenharmony_ci if (page) 2518c2ecf20Sopenharmony_ci put_page(page); 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci rcu_read_lock(); 2548c2ecf20Sopenharmony_ci } 2558c2ecf20Sopenharmony_ci rcu_read_unlock(); 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci lru_add_drain(); /* Push any new pages onto the LRU now */ 2588c2ecf20Sopenharmony_ci} 2598c2ecf20Sopenharmony_ci#endif /* CONFIG_SWAP */ 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci/* 2628c2ecf20Sopenharmony_ci * Schedule all required I/O operations. Do not wait for completion. 2638c2ecf20Sopenharmony_ci */ 2648c2ecf20Sopenharmony_cistatic long madvise_willneed(struct vm_area_struct *vma, 2658c2ecf20Sopenharmony_ci struct vm_area_struct **prev, 2668c2ecf20Sopenharmony_ci unsigned long start, unsigned long end) 2678c2ecf20Sopenharmony_ci{ 2688c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 2698c2ecf20Sopenharmony_ci struct file *file = vma->vm_file; 2708c2ecf20Sopenharmony_ci loff_t offset; 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci *prev = vma; 2738c2ecf20Sopenharmony_ci#ifdef CONFIG_SWAP 2748c2ecf20Sopenharmony_ci if (!file) { 2758c2ecf20Sopenharmony_ci walk_page_range(vma->vm_mm, start, end, &swapin_walk_ops, vma); 2768c2ecf20Sopenharmony_ci lru_add_drain(); /* Push any new pages onto the LRU now */ 2778c2ecf20Sopenharmony_ci return 0; 2788c2ecf20Sopenharmony_ci } 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_ci if (shmem_mapping(file->f_mapping)) { 2818c2ecf20Sopenharmony_ci force_shm_swapin_readahead(vma, start, end, 2828c2ecf20Sopenharmony_ci file->f_mapping); 2838c2ecf20Sopenharmony_ci return 0; 2848c2ecf20Sopenharmony_ci } 2858c2ecf20Sopenharmony_ci#else 2868c2ecf20Sopenharmony_ci if (!file) 2878c2ecf20Sopenharmony_ci return -EBADF; 2888c2ecf20Sopenharmony_ci#endif 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci if (IS_DAX(file_inode(file))) { 2918c2ecf20Sopenharmony_ci /* no bad return value, but ignore advice */ 2928c2ecf20Sopenharmony_ci return 0; 2938c2ecf20Sopenharmony_ci } 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci /* 2968c2ecf20Sopenharmony_ci * Filesystem's fadvise may need to take various locks. We need to 2978c2ecf20Sopenharmony_ci * explicitly grab a reference because the vma (and hence the 2988c2ecf20Sopenharmony_ci * vma's reference to the file) can go away as soon as we drop 2998c2ecf20Sopenharmony_ci * mmap_lock. 3008c2ecf20Sopenharmony_ci */ 3018c2ecf20Sopenharmony_ci *prev = NULL; /* tell sys_madvise we drop mmap_lock */ 3028c2ecf20Sopenharmony_ci get_file(file); 3038c2ecf20Sopenharmony_ci offset = (loff_t)(start - vma->vm_start) 3048c2ecf20Sopenharmony_ci + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 3058c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 3068c2ecf20Sopenharmony_ci vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED); 3078c2ecf20Sopenharmony_ci fput(file); 3088c2ecf20Sopenharmony_ci mmap_read_lock(mm); 3098c2ecf20Sopenharmony_ci return 0; 3108c2ecf20Sopenharmony_ci} 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_cistatic int madvise_cold_or_pageout_pte_range(pmd_t *pmd, 3138c2ecf20Sopenharmony_ci unsigned long addr, unsigned long end, 3148c2ecf20Sopenharmony_ci struct mm_walk *walk) 3158c2ecf20Sopenharmony_ci{ 3168c2ecf20Sopenharmony_ci struct madvise_walk_private *private = walk->private; 3178c2ecf20Sopenharmony_ci struct mmu_gather *tlb = private->tlb; 3188c2ecf20Sopenharmony_ci bool pageout = private->pageout; 3198c2ecf20Sopenharmony_ci struct mm_struct *mm = tlb->mm; 3208c2ecf20Sopenharmony_ci struct vm_area_struct *vma = walk->vma; 3218c2ecf20Sopenharmony_ci pte_t *orig_pte, *pte, ptent; 3228c2ecf20Sopenharmony_ci spinlock_t *ptl; 3238c2ecf20Sopenharmony_ci struct page *page = NULL; 3248c2ecf20Sopenharmony_ci LIST_HEAD(page_list); 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci if (fatal_signal_pending(current)) 3278c2ecf20Sopenharmony_ci return -EINTR; 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 3308c2ecf20Sopenharmony_ci if (pmd_trans_huge(*pmd)) { 3318c2ecf20Sopenharmony_ci pmd_t orig_pmd; 3328c2ecf20Sopenharmony_ci unsigned long next = pmd_addr_end(addr, end); 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci tlb_change_page_size(tlb, HPAGE_PMD_SIZE); 3358c2ecf20Sopenharmony_ci ptl = pmd_trans_huge_lock(pmd, vma); 3368c2ecf20Sopenharmony_ci if (!ptl) 3378c2ecf20Sopenharmony_ci return 0; 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci orig_pmd = *pmd; 3408c2ecf20Sopenharmony_ci if (is_huge_zero_pmd(orig_pmd)) 3418c2ecf20Sopenharmony_ci goto huge_unlock; 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci if (unlikely(!pmd_present(orig_pmd))) { 3448c2ecf20Sopenharmony_ci VM_BUG_ON(thp_migration_supported() && 3458c2ecf20Sopenharmony_ci !is_pmd_migration_entry(orig_pmd)); 3468c2ecf20Sopenharmony_ci goto huge_unlock; 3478c2ecf20Sopenharmony_ci } 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci page = pmd_page(orig_pmd); 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci /* Do not interfere with other mappings of this page */ 3528c2ecf20Sopenharmony_ci if (page_mapcount(page) != 1) 3538c2ecf20Sopenharmony_ci goto huge_unlock; 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci if (next - addr != HPAGE_PMD_SIZE) { 3568c2ecf20Sopenharmony_ci int err; 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci get_page(page); 3598c2ecf20Sopenharmony_ci spin_unlock(ptl); 3608c2ecf20Sopenharmony_ci lock_page(page); 3618c2ecf20Sopenharmony_ci err = split_huge_page(page); 3628c2ecf20Sopenharmony_ci unlock_page(page); 3638c2ecf20Sopenharmony_ci put_page(page); 3648c2ecf20Sopenharmony_ci if (!err) 3658c2ecf20Sopenharmony_ci goto regular_page; 3668c2ecf20Sopenharmony_ci return 0; 3678c2ecf20Sopenharmony_ci } 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ci if (pmd_young(orig_pmd)) { 3708c2ecf20Sopenharmony_ci pmdp_invalidate(vma, addr, pmd); 3718c2ecf20Sopenharmony_ci orig_pmd = pmd_mkold(orig_pmd); 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci set_pmd_at(mm, addr, pmd, orig_pmd); 3748c2ecf20Sopenharmony_ci tlb_remove_pmd_tlb_entry(tlb, pmd, addr); 3758c2ecf20Sopenharmony_ci } 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci ClearPageReferenced(page); 3788c2ecf20Sopenharmony_ci test_and_clear_page_young(page); 3798c2ecf20Sopenharmony_ci if (pageout) { 3808c2ecf20Sopenharmony_ci if (!isolate_lru_page(page)) { 3818c2ecf20Sopenharmony_ci if (PageUnevictable(page)) 3828c2ecf20Sopenharmony_ci putback_lru_page(page); 3838c2ecf20Sopenharmony_ci else 3848c2ecf20Sopenharmony_ci list_add(&page->lru, &page_list); 3858c2ecf20Sopenharmony_ci } 3868c2ecf20Sopenharmony_ci } else 3878c2ecf20Sopenharmony_ci deactivate_page(page); 3888c2ecf20Sopenharmony_cihuge_unlock: 3898c2ecf20Sopenharmony_ci spin_unlock(ptl); 3908c2ecf20Sopenharmony_ci if (pageout) 3918c2ecf20Sopenharmony_ci reclaim_pages(&page_list); 3928c2ecf20Sopenharmony_ci return 0; 3938c2ecf20Sopenharmony_ci } 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ciregular_page: 3968c2ecf20Sopenharmony_ci if (pmd_trans_unstable(pmd)) 3978c2ecf20Sopenharmony_ci return 0; 3988c2ecf20Sopenharmony_ci#endif 3998c2ecf20Sopenharmony_ci tlb_change_page_size(tlb, PAGE_SIZE); 4008c2ecf20Sopenharmony_ci orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 4018c2ecf20Sopenharmony_ci flush_tlb_batched_pending(mm); 4028c2ecf20Sopenharmony_ci arch_enter_lazy_mmu_mode(); 4038c2ecf20Sopenharmony_ci for (; addr < end; pte++, addr += PAGE_SIZE) { 4048c2ecf20Sopenharmony_ci ptent = *pte; 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci if (pte_none(ptent)) 4078c2ecf20Sopenharmony_ci continue; 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci if (!pte_present(ptent)) 4108c2ecf20Sopenharmony_ci continue; 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci page = vm_normal_page(vma, addr, ptent); 4138c2ecf20Sopenharmony_ci if (!page) 4148c2ecf20Sopenharmony_ci continue; 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci /* 4178c2ecf20Sopenharmony_ci * Creating a THP page is expensive so split it only if we 4188c2ecf20Sopenharmony_ci * are sure it's worth. Split it if we are only owner. 4198c2ecf20Sopenharmony_ci */ 4208c2ecf20Sopenharmony_ci if (PageTransCompound(page)) { 4218c2ecf20Sopenharmony_ci if (page_mapcount(page) != 1) 4228c2ecf20Sopenharmony_ci break; 4238c2ecf20Sopenharmony_ci get_page(page); 4248c2ecf20Sopenharmony_ci if (!trylock_page(page)) { 4258c2ecf20Sopenharmony_ci put_page(page); 4268c2ecf20Sopenharmony_ci break; 4278c2ecf20Sopenharmony_ci } 4288c2ecf20Sopenharmony_ci pte_unmap_unlock(orig_pte, ptl); 4298c2ecf20Sopenharmony_ci if (split_huge_page(page)) { 4308c2ecf20Sopenharmony_ci unlock_page(page); 4318c2ecf20Sopenharmony_ci put_page(page); 4328c2ecf20Sopenharmony_ci pte_offset_map_lock(mm, pmd, addr, &ptl); 4338c2ecf20Sopenharmony_ci break; 4348c2ecf20Sopenharmony_ci } 4358c2ecf20Sopenharmony_ci unlock_page(page); 4368c2ecf20Sopenharmony_ci put_page(page); 4378c2ecf20Sopenharmony_ci pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 4388c2ecf20Sopenharmony_ci pte--; 4398c2ecf20Sopenharmony_ci addr -= PAGE_SIZE; 4408c2ecf20Sopenharmony_ci continue; 4418c2ecf20Sopenharmony_ci } 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci /* 4448c2ecf20Sopenharmony_ci * Do not interfere with other mappings of this page and 4458c2ecf20Sopenharmony_ci * non-LRU page. 4468c2ecf20Sopenharmony_ci */ 4478c2ecf20Sopenharmony_ci if (!PageLRU(page) || page_mapcount(page) != 1) 4488c2ecf20Sopenharmony_ci continue; 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageTransCompound(page), page); 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_ci if (pte_young(ptent)) { 4538c2ecf20Sopenharmony_ci ptent = ptep_get_and_clear_full(mm, addr, pte, 4548c2ecf20Sopenharmony_ci tlb->fullmm); 4558c2ecf20Sopenharmony_ci ptent = pte_mkold(ptent); 4568c2ecf20Sopenharmony_ci set_pte_at(mm, addr, pte, ptent); 4578c2ecf20Sopenharmony_ci tlb_remove_tlb_entry(tlb, pte, addr); 4588c2ecf20Sopenharmony_ci } 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci /* 4618c2ecf20Sopenharmony_ci * We are deactivating a page for accelerating reclaiming. 4628c2ecf20Sopenharmony_ci * VM couldn't reclaim the page unless we clear PG_young. 4638c2ecf20Sopenharmony_ci * As a side effect, it makes confuse idle-page tracking 4648c2ecf20Sopenharmony_ci * because they will miss recent referenced history. 4658c2ecf20Sopenharmony_ci */ 4668c2ecf20Sopenharmony_ci ClearPageReferenced(page); 4678c2ecf20Sopenharmony_ci test_and_clear_page_young(page); 4688c2ecf20Sopenharmony_ci if (pageout) { 4698c2ecf20Sopenharmony_ci if (!isolate_lru_page(page)) { 4708c2ecf20Sopenharmony_ci if (PageUnevictable(page)) 4718c2ecf20Sopenharmony_ci putback_lru_page(page); 4728c2ecf20Sopenharmony_ci else 4738c2ecf20Sopenharmony_ci list_add(&page->lru, &page_list); 4748c2ecf20Sopenharmony_ci } 4758c2ecf20Sopenharmony_ci } else 4768c2ecf20Sopenharmony_ci deactivate_page(page); 4778c2ecf20Sopenharmony_ci } 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci arch_leave_lazy_mmu_mode(); 4808c2ecf20Sopenharmony_ci pte_unmap_unlock(orig_pte, ptl); 4818c2ecf20Sopenharmony_ci if (pageout) 4828c2ecf20Sopenharmony_ci reclaim_pages(&page_list); 4838c2ecf20Sopenharmony_ci cond_resched(); 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci return 0; 4868c2ecf20Sopenharmony_ci} 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_cistatic const struct mm_walk_ops cold_walk_ops = { 4898c2ecf20Sopenharmony_ci .pmd_entry = madvise_cold_or_pageout_pte_range, 4908c2ecf20Sopenharmony_ci}; 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_cistatic void madvise_cold_page_range(struct mmu_gather *tlb, 4938c2ecf20Sopenharmony_ci struct vm_area_struct *vma, 4948c2ecf20Sopenharmony_ci unsigned long addr, unsigned long end) 4958c2ecf20Sopenharmony_ci{ 4968c2ecf20Sopenharmony_ci struct madvise_walk_private walk_private = { 4978c2ecf20Sopenharmony_ci .pageout = false, 4988c2ecf20Sopenharmony_ci .tlb = tlb, 4998c2ecf20Sopenharmony_ci }; 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ci tlb_start_vma(tlb, vma); 5028c2ecf20Sopenharmony_ci walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); 5038c2ecf20Sopenharmony_ci tlb_end_vma(tlb, vma); 5048c2ecf20Sopenharmony_ci} 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_cistatic long madvise_cold(struct vm_area_struct *vma, 5078c2ecf20Sopenharmony_ci struct vm_area_struct **prev, 5088c2ecf20Sopenharmony_ci unsigned long start_addr, unsigned long end_addr) 5098c2ecf20Sopenharmony_ci{ 5108c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 5118c2ecf20Sopenharmony_ci struct mmu_gather tlb; 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci *prev = vma; 5148c2ecf20Sopenharmony_ci if (!can_madv_lru_vma(vma)) 5158c2ecf20Sopenharmony_ci return -EINVAL; 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci lru_add_drain(); 5188c2ecf20Sopenharmony_ci tlb_gather_mmu(&tlb, mm, start_addr, end_addr); 5198c2ecf20Sopenharmony_ci madvise_cold_page_range(&tlb, vma, start_addr, end_addr); 5208c2ecf20Sopenharmony_ci tlb_finish_mmu(&tlb, start_addr, end_addr); 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci return 0; 5238c2ecf20Sopenharmony_ci} 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_cistatic void madvise_pageout_page_range(struct mmu_gather *tlb, 5268c2ecf20Sopenharmony_ci struct vm_area_struct *vma, 5278c2ecf20Sopenharmony_ci unsigned long addr, unsigned long end) 5288c2ecf20Sopenharmony_ci{ 5298c2ecf20Sopenharmony_ci struct madvise_walk_private walk_private = { 5308c2ecf20Sopenharmony_ci .pageout = true, 5318c2ecf20Sopenharmony_ci .tlb = tlb, 5328c2ecf20Sopenharmony_ci }; 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci tlb_start_vma(tlb, vma); 5358c2ecf20Sopenharmony_ci walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private); 5368c2ecf20Sopenharmony_ci tlb_end_vma(tlb, vma); 5378c2ecf20Sopenharmony_ci} 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_cistatic inline bool can_do_pageout(struct vm_area_struct *vma) 5408c2ecf20Sopenharmony_ci{ 5418c2ecf20Sopenharmony_ci if (vma_is_anonymous(vma)) 5428c2ecf20Sopenharmony_ci return true; 5438c2ecf20Sopenharmony_ci if (!vma->vm_file) 5448c2ecf20Sopenharmony_ci return false; 5458c2ecf20Sopenharmony_ci /* 5468c2ecf20Sopenharmony_ci * paging out pagecache only for non-anonymous mappings that correspond 5478c2ecf20Sopenharmony_ci * to the files the calling process could (if tried) open for writing; 5488c2ecf20Sopenharmony_ci * otherwise we'd be including shared non-exclusive mappings, which 5498c2ecf20Sopenharmony_ci * opens a side channel. 5508c2ecf20Sopenharmony_ci */ 5518c2ecf20Sopenharmony_ci return inode_owner_or_capable(file_inode(vma->vm_file)) || 5528c2ecf20Sopenharmony_ci inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0; 5538c2ecf20Sopenharmony_ci} 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_cistatic long madvise_pageout(struct vm_area_struct *vma, 5568c2ecf20Sopenharmony_ci struct vm_area_struct **prev, 5578c2ecf20Sopenharmony_ci unsigned long start_addr, unsigned long end_addr) 5588c2ecf20Sopenharmony_ci{ 5598c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 5608c2ecf20Sopenharmony_ci struct mmu_gather tlb; 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci *prev = vma; 5638c2ecf20Sopenharmony_ci if (!can_madv_lru_vma(vma)) 5648c2ecf20Sopenharmony_ci return -EINVAL; 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ci if (!can_do_pageout(vma)) 5678c2ecf20Sopenharmony_ci return 0; 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci lru_add_drain(); 5708c2ecf20Sopenharmony_ci tlb_gather_mmu(&tlb, mm, start_addr, end_addr); 5718c2ecf20Sopenharmony_ci madvise_pageout_page_range(&tlb, vma, start_addr, end_addr); 5728c2ecf20Sopenharmony_ci tlb_finish_mmu(&tlb, start_addr, end_addr); 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci return 0; 5758c2ecf20Sopenharmony_ci} 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_cistatic int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, 5788c2ecf20Sopenharmony_ci unsigned long end, struct mm_walk *walk) 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci{ 5818c2ecf20Sopenharmony_ci struct mmu_gather *tlb = walk->private; 5828c2ecf20Sopenharmony_ci struct mm_struct *mm = tlb->mm; 5838c2ecf20Sopenharmony_ci struct vm_area_struct *vma = walk->vma; 5848c2ecf20Sopenharmony_ci spinlock_t *ptl; 5858c2ecf20Sopenharmony_ci pte_t *orig_pte, *pte, ptent; 5868c2ecf20Sopenharmony_ci struct page *page; 5878c2ecf20Sopenharmony_ci int nr_swap = 0; 5888c2ecf20Sopenharmony_ci unsigned long next; 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci next = pmd_addr_end(addr, end); 5918c2ecf20Sopenharmony_ci if (pmd_trans_huge(*pmd)) 5928c2ecf20Sopenharmony_ci if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next)) 5938c2ecf20Sopenharmony_ci goto next; 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci if (pmd_trans_unstable(pmd)) 5968c2ecf20Sopenharmony_ci return 0; 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci tlb_change_page_size(tlb, PAGE_SIZE); 5998c2ecf20Sopenharmony_ci orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 6008c2ecf20Sopenharmony_ci flush_tlb_batched_pending(mm); 6018c2ecf20Sopenharmony_ci arch_enter_lazy_mmu_mode(); 6028c2ecf20Sopenharmony_ci for (; addr != end; pte++, addr += PAGE_SIZE) { 6038c2ecf20Sopenharmony_ci ptent = *pte; 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci if (pte_none(ptent)) 6068c2ecf20Sopenharmony_ci continue; 6078c2ecf20Sopenharmony_ci /* 6088c2ecf20Sopenharmony_ci * If the pte has swp_entry, just clear page table to 6098c2ecf20Sopenharmony_ci * prevent swap-in which is more expensive rather than 6108c2ecf20Sopenharmony_ci * (page allocation + zeroing). 6118c2ecf20Sopenharmony_ci */ 6128c2ecf20Sopenharmony_ci if (!pte_present(ptent)) { 6138c2ecf20Sopenharmony_ci swp_entry_t entry; 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_ci entry = pte_to_swp_entry(ptent); 6168c2ecf20Sopenharmony_ci if (non_swap_entry(entry)) 6178c2ecf20Sopenharmony_ci continue; 6188c2ecf20Sopenharmony_ci nr_swap--; 6198c2ecf20Sopenharmony_ci free_swap_and_cache(entry); 6208c2ecf20Sopenharmony_ci pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); 6218c2ecf20Sopenharmony_ci continue; 6228c2ecf20Sopenharmony_ci } 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_ci page = vm_normal_page(vma, addr, ptent); 6258c2ecf20Sopenharmony_ci if (!page) 6268c2ecf20Sopenharmony_ci continue; 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci /* 6298c2ecf20Sopenharmony_ci * If pmd isn't transhuge but the page is THP and 6308c2ecf20Sopenharmony_ci * is owned by only this process, split it and 6318c2ecf20Sopenharmony_ci * deactivate all pages. 6328c2ecf20Sopenharmony_ci */ 6338c2ecf20Sopenharmony_ci if (PageTransCompound(page)) { 6348c2ecf20Sopenharmony_ci if (page_mapcount(page) != 1) 6358c2ecf20Sopenharmony_ci goto out; 6368c2ecf20Sopenharmony_ci get_page(page); 6378c2ecf20Sopenharmony_ci if (!trylock_page(page)) { 6388c2ecf20Sopenharmony_ci put_page(page); 6398c2ecf20Sopenharmony_ci goto out; 6408c2ecf20Sopenharmony_ci } 6418c2ecf20Sopenharmony_ci pte_unmap_unlock(orig_pte, ptl); 6428c2ecf20Sopenharmony_ci if (split_huge_page(page)) { 6438c2ecf20Sopenharmony_ci unlock_page(page); 6448c2ecf20Sopenharmony_ci put_page(page); 6458c2ecf20Sopenharmony_ci pte_offset_map_lock(mm, pmd, addr, &ptl); 6468c2ecf20Sopenharmony_ci goto out; 6478c2ecf20Sopenharmony_ci } 6488c2ecf20Sopenharmony_ci unlock_page(page); 6498c2ecf20Sopenharmony_ci put_page(page); 6508c2ecf20Sopenharmony_ci pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 6518c2ecf20Sopenharmony_ci pte--; 6528c2ecf20Sopenharmony_ci addr -= PAGE_SIZE; 6538c2ecf20Sopenharmony_ci continue; 6548c2ecf20Sopenharmony_ci } 6558c2ecf20Sopenharmony_ci 6568c2ecf20Sopenharmony_ci VM_BUG_ON_PAGE(PageTransCompound(page), page); 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci if (PageSwapCache(page) || PageDirty(page)) { 6598c2ecf20Sopenharmony_ci if (!trylock_page(page)) 6608c2ecf20Sopenharmony_ci continue; 6618c2ecf20Sopenharmony_ci /* 6628c2ecf20Sopenharmony_ci * If page is shared with others, we couldn't clear 6638c2ecf20Sopenharmony_ci * PG_dirty of the page. 6648c2ecf20Sopenharmony_ci */ 6658c2ecf20Sopenharmony_ci if (page_mapcount(page) != 1) { 6668c2ecf20Sopenharmony_ci unlock_page(page); 6678c2ecf20Sopenharmony_ci continue; 6688c2ecf20Sopenharmony_ci } 6698c2ecf20Sopenharmony_ci 6708c2ecf20Sopenharmony_ci if (PageSwapCache(page) && !try_to_free_swap(page)) { 6718c2ecf20Sopenharmony_ci unlock_page(page); 6728c2ecf20Sopenharmony_ci continue; 6738c2ecf20Sopenharmony_ci } 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci ClearPageDirty(page); 6768c2ecf20Sopenharmony_ci unlock_page(page); 6778c2ecf20Sopenharmony_ci } 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ci if (pte_young(ptent) || pte_dirty(ptent)) { 6808c2ecf20Sopenharmony_ci /* 6818c2ecf20Sopenharmony_ci * Some of architecture(ex, PPC) don't update TLB 6828c2ecf20Sopenharmony_ci * with set_pte_at and tlb_remove_tlb_entry so for 6838c2ecf20Sopenharmony_ci * the portability, remap the pte with old|clean 6848c2ecf20Sopenharmony_ci * after pte clearing. 6858c2ecf20Sopenharmony_ci */ 6868c2ecf20Sopenharmony_ci ptent = ptep_get_and_clear_full(mm, addr, pte, 6878c2ecf20Sopenharmony_ci tlb->fullmm); 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci ptent = pte_mkold(ptent); 6908c2ecf20Sopenharmony_ci ptent = pte_mkclean(ptent); 6918c2ecf20Sopenharmony_ci set_pte_at(mm, addr, pte, ptent); 6928c2ecf20Sopenharmony_ci tlb_remove_tlb_entry(tlb, pte, addr); 6938c2ecf20Sopenharmony_ci } 6948c2ecf20Sopenharmony_ci mark_page_lazyfree(page); 6958c2ecf20Sopenharmony_ci } 6968c2ecf20Sopenharmony_ciout: 6978c2ecf20Sopenharmony_ci if (nr_swap) { 6988c2ecf20Sopenharmony_ci if (current->mm == mm) 6998c2ecf20Sopenharmony_ci sync_mm_rss(mm); 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_ci add_mm_counter(mm, MM_SWAPENTS, nr_swap); 7028c2ecf20Sopenharmony_ci } 7038c2ecf20Sopenharmony_ci arch_leave_lazy_mmu_mode(); 7048c2ecf20Sopenharmony_ci pte_unmap_unlock(orig_pte, ptl); 7058c2ecf20Sopenharmony_ci cond_resched(); 7068c2ecf20Sopenharmony_cinext: 7078c2ecf20Sopenharmony_ci return 0; 7088c2ecf20Sopenharmony_ci} 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_cistatic const struct mm_walk_ops madvise_free_walk_ops = { 7118c2ecf20Sopenharmony_ci .pmd_entry = madvise_free_pte_range, 7128c2ecf20Sopenharmony_ci}; 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_cistatic int madvise_free_single_vma(struct vm_area_struct *vma, 7158c2ecf20Sopenharmony_ci unsigned long start_addr, unsigned long end_addr) 7168c2ecf20Sopenharmony_ci{ 7178c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 7188c2ecf20Sopenharmony_ci struct mmu_notifier_range range; 7198c2ecf20Sopenharmony_ci struct mmu_gather tlb; 7208c2ecf20Sopenharmony_ci 7218c2ecf20Sopenharmony_ci /* MADV_FREE works for only anon vma at the moment */ 7228c2ecf20Sopenharmony_ci if (!vma_is_anonymous(vma)) 7238c2ecf20Sopenharmony_ci return -EINVAL; 7248c2ecf20Sopenharmony_ci 7258c2ecf20Sopenharmony_ci range.start = max(vma->vm_start, start_addr); 7268c2ecf20Sopenharmony_ci if (range.start >= vma->vm_end) 7278c2ecf20Sopenharmony_ci return -EINVAL; 7288c2ecf20Sopenharmony_ci range.end = min(vma->vm_end, end_addr); 7298c2ecf20Sopenharmony_ci if (range.end <= vma->vm_start) 7308c2ecf20Sopenharmony_ci return -EINVAL; 7318c2ecf20Sopenharmony_ci mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, 7328c2ecf20Sopenharmony_ci range.start, range.end); 7338c2ecf20Sopenharmony_ci 7348c2ecf20Sopenharmony_ci lru_add_drain(); 7358c2ecf20Sopenharmony_ci tlb_gather_mmu(&tlb, mm, range.start, range.end); 7368c2ecf20Sopenharmony_ci update_hiwater_rss(mm); 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci mmu_notifier_invalidate_range_start(&range); 7398c2ecf20Sopenharmony_ci tlb_start_vma(&tlb, vma); 7408c2ecf20Sopenharmony_ci walk_page_range(vma->vm_mm, range.start, range.end, 7418c2ecf20Sopenharmony_ci &madvise_free_walk_ops, &tlb); 7428c2ecf20Sopenharmony_ci tlb_end_vma(&tlb, vma); 7438c2ecf20Sopenharmony_ci mmu_notifier_invalidate_range_end(&range); 7448c2ecf20Sopenharmony_ci tlb_finish_mmu(&tlb, range.start, range.end); 7458c2ecf20Sopenharmony_ci 7468c2ecf20Sopenharmony_ci return 0; 7478c2ecf20Sopenharmony_ci} 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_ci/* 7508c2ecf20Sopenharmony_ci * Application no longer needs these pages. If the pages are dirty, 7518c2ecf20Sopenharmony_ci * it's OK to just throw them away. The app will be more careful about 7528c2ecf20Sopenharmony_ci * data it wants to keep. Be sure to free swap resources too. The 7538c2ecf20Sopenharmony_ci * zap_page_range call sets things up for shrink_active_list to actually free 7548c2ecf20Sopenharmony_ci * these pages later if no one else has touched them in the meantime, 7558c2ecf20Sopenharmony_ci * although we could add these pages to a global reuse list for 7568c2ecf20Sopenharmony_ci * shrink_active_list to pick up before reclaiming other pages. 7578c2ecf20Sopenharmony_ci * 7588c2ecf20Sopenharmony_ci * NB: This interface discards data rather than pushes it out to swap, 7598c2ecf20Sopenharmony_ci * as some implementations do. This has performance implications for 7608c2ecf20Sopenharmony_ci * applications like large transactional databases which want to discard 7618c2ecf20Sopenharmony_ci * pages in anonymous maps after committing to backing store the data 7628c2ecf20Sopenharmony_ci * that was kept in them. There is no reason to write this data out to 7638c2ecf20Sopenharmony_ci * the swap area if the application is discarding it. 7648c2ecf20Sopenharmony_ci * 7658c2ecf20Sopenharmony_ci * An interface that causes the system to free clean pages and flush 7668c2ecf20Sopenharmony_ci * dirty pages is already available as msync(MS_INVALIDATE). 7678c2ecf20Sopenharmony_ci */ 7688c2ecf20Sopenharmony_cistatic long madvise_dontneed_single_vma(struct vm_area_struct *vma, 7698c2ecf20Sopenharmony_ci unsigned long start, unsigned long end) 7708c2ecf20Sopenharmony_ci{ 7718c2ecf20Sopenharmony_ci zap_page_range(vma, start, end - start); 7728c2ecf20Sopenharmony_ci return 0; 7738c2ecf20Sopenharmony_ci} 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_cistatic long madvise_dontneed_free(struct vm_area_struct *vma, 7768c2ecf20Sopenharmony_ci struct vm_area_struct **prev, 7778c2ecf20Sopenharmony_ci unsigned long start, unsigned long end, 7788c2ecf20Sopenharmony_ci int behavior) 7798c2ecf20Sopenharmony_ci{ 7808c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 7818c2ecf20Sopenharmony_ci 7828c2ecf20Sopenharmony_ci *prev = vma; 7838c2ecf20Sopenharmony_ci if (!can_madv_lru_vma(vma)) 7848c2ecf20Sopenharmony_ci return -EINVAL; 7858c2ecf20Sopenharmony_ci 7868c2ecf20Sopenharmony_ci if (!userfaultfd_remove(vma, start, end)) { 7878c2ecf20Sopenharmony_ci *prev = NULL; /* mmap_lock has been dropped, prev is stale */ 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci mmap_read_lock(mm); 7908c2ecf20Sopenharmony_ci vma = find_vma(mm, start); 7918c2ecf20Sopenharmony_ci if (!vma) 7928c2ecf20Sopenharmony_ci return -ENOMEM; 7938c2ecf20Sopenharmony_ci if (start < vma->vm_start) { 7948c2ecf20Sopenharmony_ci /* 7958c2ecf20Sopenharmony_ci * This "vma" under revalidation is the one 7968c2ecf20Sopenharmony_ci * with the lowest vma->vm_start where start 7978c2ecf20Sopenharmony_ci * is also < vma->vm_end. If start < 7988c2ecf20Sopenharmony_ci * vma->vm_start it means an hole materialized 7998c2ecf20Sopenharmony_ci * in the user address space within the 8008c2ecf20Sopenharmony_ci * virtual range passed to MADV_DONTNEED 8018c2ecf20Sopenharmony_ci * or MADV_FREE. 8028c2ecf20Sopenharmony_ci */ 8038c2ecf20Sopenharmony_ci return -ENOMEM; 8048c2ecf20Sopenharmony_ci } 8058c2ecf20Sopenharmony_ci if (!can_madv_lru_vma(vma)) 8068c2ecf20Sopenharmony_ci return -EINVAL; 8078c2ecf20Sopenharmony_ci if (end > vma->vm_end) { 8088c2ecf20Sopenharmony_ci /* 8098c2ecf20Sopenharmony_ci * Don't fail if end > vma->vm_end. If the old 8108c2ecf20Sopenharmony_ci * vma was splitted while the mmap_lock was 8118c2ecf20Sopenharmony_ci * released the effect of the concurrent 8128c2ecf20Sopenharmony_ci * operation may not cause madvise() to 8138c2ecf20Sopenharmony_ci * have an undefined result. There may be an 8148c2ecf20Sopenharmony_ci * adjacent next vma that we'll walk 8158c2ecf20Sopenharmony_ci * next. userfaultfd_remove() will generate an 8168c2ecf20Sopenharmony_ci * UFFD_EVENT_REMOVE repetition on the 8178c2ecf20Sopenharmony_ci * end-vma->vm_end range, but the manager can 8188c2ecf20Sopenharmony_ci * handle a repetition fine. 8198c2ecf20Sopenharmony_ci */ 8208c2ecf20Sopenharmony_ci end = vma->vm_end; 8218c2ecf20Sopenharmony_ci } 8228c2ecf20Sopenharmony_ci VM_WARN_ON(start >= end); 8238c2ecf20Sopenharmony_ci } 8248c2ecf20Sopenharmony_ci 8258c2ecf20Sopenharmony_ci if (behavior == MADV_DONTNEED) 8268c2ecf20Sopenharmony_ci return madvise_dontneed_single_vma(vma, start, end); 8278c2ecf20Sopenharmony_ci else if (behavior == MADV_FREE) 8288c2ecf20Sopenharmony_ci return madvise_free_single_vma(vma, start, end); 8298c2ecf20Sopenharmony_ci else 8308c2ecf20Sopenharmony_ci return -EINVAL; 8318c2ecf20Sopenharmony_ci} 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci/* 8348c2ecf20Sopenharmony_ci * Application wants to free up the pages and associated backing store. 8358c2ecf20Sopenharmony_ci * This is effectively punching a hole into the middle of a file. 8368c2ecf20Sopenharmony_ci */ 8378c2ecf20Sopenharmony_cistatic long madvise_remove(struct vm_area_struct *vma, 8388c2ecf20Sopenharmony_ci struct vm_area_struct **prev, 8398c2ecf20Sopenharmony_ci unsigned long start, unsigned long end) 8408c2ecf20Sopenharmony_ci{ 8418c2ecf20Sopenharmony_ci loff_t offset; 8428c2ecf20Sopenharmony_ci int error; 8438c2ecf20Sopenharmony_ci struct file *f; 8448c2ecf20Sopenharmony_ci struct mm_struct *mm = vma->vm_mm; 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_ci *prev = NULL; /* tell sys_madvise we drop mmap_lock */ 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_LOCKED) 8498c2ecf20Sopenharmony_ci return -EINVAL; 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci f = vma->vm_file; 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci if (!f || !f->f_mapping || !f->f_mapping->host) { 8548c2ecf20Sopenharmony_ci return -EINVAL; 8558c2ecf20Sopenharmony_ci } 8568c2ecf20Sopenharmony_ci 8578c2ecf20Sopenharmony_ci if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)) 8588c2ecf20Sopenharmony_ci return -EACCES; 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_ci offset = (loff_t)(start - vma->vm_start) 8618c2ecf20Sopenharmony_ci + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_ci /* 8648c2ecf20Sopenharmony_ci * Filesystem's fallocate may need to take i_mutex. We need to 8658c2ecf20Sopenharmony_ci * explicitly grab a reference because the vma (and hence the 8668c2ecf20Sopenharmony_ci * vma's reference to the file) can go away as soon as we drop 8678c2ecf20Sopenharmony_ci * mmap_lock. 8688c2ecf20Sopenharmony_ci */ 8698c2ecf20Sopenharmony_ci get_file(f); 8708c2ecf20Sopenharmony_ci if (userfaultfd_remove(vma, start, end)) { 8718c2ecf20Sopenharmony_ci /* mmap_lock was not released by userfaultfd_remove() */ 8728c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 8738c2ecf20Sopenharmony_ci } 8748c2ecf20Sopenharmony_ci error = vfs_fallocate(f, 8758c2ecf20Sopenharmony_ci FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 8768c2ecf20Sopenharmony_ci offset, end - start); 8778c2ecf20Sopenharmony_ci fput(f); 8788c2ecf20Sopenharmony_ci mmap_read_lock(mm); 8798c2ecf20Sopenharmony_ci return error; 8808c2ecf20Sopenharmony_ci} 8818c2ecf20Sopenharmony_ci 8828c2ecf20Sopenharmony_ci/* 8838c2ecf20Sopenharmony_ci * Apply an madvise behavior to a region of a vma. madvise_update_vma 8848c2ecf20Sopenharmony_ci * will handle splitting a vm area into separate areas, each area with its own 8858c2ecf20Sopenharmony_ci * behavior. 8868c2ecf20Sopenharmony_ci */ 8878c2ecf20Sopenharmony_cistatic int madvise_vma_behavior(struct vm_area_struct *vma, 8888c2ecf20Sopenharmony_ci struct vm_area_struct **prev, 8898c2ecf20Sopenharmony_ci unsigned long start, unsigned long end, 8908c2ecf20Sopenharmony_ci unsigned long behavior) 8918c2ecf20Sopenharmony_ci{ 8928c2ecf20Sopenharmony_ci int error; 8938c2ecf20Sopenharmony_ci struct anon_vma_name *anon_name; 8948c2ecf20Sopenharmony_ci unsigned long new_flags = vma->vm_flags; 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci switch (behavior) { 8978c2ecf20Sopenharmony_ci case MADV_REMOVE: 8988c2ecf20Sopenharmony_ci return madvise_remove(vma, prev, start, end); 8998c2ecf20Sopenharmony_ci case MADV_WILLNEED: 9008c2ecf20Sopenharmony_ci return madvise_willneed(vma, prev, start, end); 9018c2ecf20Sopenharmony_ci case MADV_COLD: 9028c2ecf20Sopenharmony_ci return madvise_cold(vma, prev, start, end); 9038c2ecf20Sopenharmony_ci case MADV_PAGEOUT: 9048c2ecf20Sopenharmony_ci return madvise_pageout(vma, prev, start, end); 9058c2ecf20Sopenharmony_ci case MADV_FREE: 9068c2ecf20Sopenharmony_ci case MADV_DONTNEED: 9078c2ecf20Sopenharmony_ci return madvise_dontneed_free(vma, prev, start, end, behavior); 9088c2ecf20Sopenharmony_ci case MADV_NORMAL: 9098c2ecf20Sopenharmony_ci new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; 9108c2ecf20Sopenharmony_ci break; 9118c2ecf20Sopenharmony_ci case MADV_SEQUENTIAL: 9128c2ecf20Sopenharmony_ci new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; 9138c2ecf20Sopenharmony_ci break; 9148c2ecf20Sopenharmony_ci case MADV_RANDOM: 9158c2ecf20Sopenharmony_ci new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; 9168c2ecf20Sopenharmony_ci break; 9178c2ecf20Sopenharmony_ci case MADV_DONTFORK: 9188c2ecf20Sopenharmony_ci new_flags |= VM_DONTCOPY; 9198c2ecf20Sopenharmony_ci break; 9208c2ecf20Sopenharmony_ci case MADV_DOFORK: 9218c2ecf20Sopenharmony_ci if (vma->vm_flags & VM_IO) 9228c2ecf20Sopenharmony_ci return -EINVAL; 9238c2ecf20Sopenharmony_ci new_flags &= ~VM_DONTCOPY; 9248c2ecf20Sopenharmony_ci break; 9258c2ecf20Sopenharmony_ci case MADV_WIPEONFORK: 9268c2ecf20Sopenharmony_ci /* MADV_WIPEONFORK is only supported on anonymous memory. */ 9278c2ecf20Sopenharmony_ci if (vma->vm_file || vma->vm_flags & VM_SHARED) 9288c2ecf20Sopenharmony_ci return -EINVAL; 9298c2ecf20Sopenharmony_ci new_flags |= VM_WIPEONFORK; 9308c2ecf20Sopenharmony_ci break; 9318c2ecf20Sopenharmony_ci case MADV_KEEPONFORK: 9328c2ecf20Sopenharmony_ci new_flags &= ~VM_WIPEONFORK; 9338c2ecf20Sopenharmony_ci break; 9348c2ecf20Sopenharmony_ci case MADV_DONTDUMP: 9358c2ecf20Sopenharmony_ci new_flags |= VM_DONTDUMP; 9368c2ecf20Sopenharmony_ci break; 9378c2ecf20Sopenharmony_ci case MADV_DODUMP: 9388c2ecf20Sopenharmony_ci if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) 9398c2ecf20Sopenharmony_ci return -EINVAL; 9408c2ecf20Sopenharmony_ci new_flags &= ~VM_DONTDUMP; 9418c2ecf20Sopenharmony_ci break; 9428c2ecf20Sopenharmony_ci case MADV_MERGEABLE: 9438c2ecf20Sopenharmony_ci case MADV_UNMERGEABLE: 9448c2ecf20Sopenharmony_ci error = ksm_madvise(vma, start, end, behavior, &new_flags); 9458c2ecf20Sopenharmony_ci if (error) 9468c2ecf20Sopenharmony_ci goto out; 9478c2ecf20Sopenharmony_ci break; 9488c2ecf20Sopenharmony_ci case MADV_HUGEPAGE: 9498c2ecf20Sopenharmony_ci case MADV_NOHUGEPAGE: 9508c2ecf20Sopenharmony_ci error = hugepage_madvise(vma, &new_flags, behavior); 9518c2ecf20Sopenharmony_ci if (error) 9528c2ecf20Sopenharmony_ci goto out; 9538c2ecf20Sopenharmony_ci break; 9548c2ecf20Sopenharmony_ci } 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ci anon_name = anon_vma_name(vma); 9578c2ecf20Sopenharmony_ci anon_vma_name_get(anon_name); 9588c2ecf20Sopenharmony_ci error = madvise_update_vma(vma, prev, start, end, new_flags, 9598c2ecf20Sopenharmony_ci anon_name); 9608c2ecf20Sopenharmony_ci anon_vma_name_put(anon_name); 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ciout: 9638c2ecf20Sopenharmony_ci /* 9648c2ecf20Sopenharmony_ci * madvise() returns EAGAIN if kernel resources, such as 9658c2ecf20Sopenharmony_ci * slab, are temporarily unavailable. 9668c2ecf20Sopenharmony_ci */ 9678c2ecf20Sopenharmony_ci if (error == -ENOMEM) 9688c2ecf20Sopenharmony_ci error = -EAGAIN; 9698c2ecf20Sopenharmony_ci return error; 9708c2ecf20Sopenharmony_ci} 9718c2ecf20Sopenharmony_ci 9728c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_FAILURE 9738c2ecf20Sopenharmony_ci/* 9748c2ecf20Sopenharmony_ci * Error injection support for memory error handling. 9758c2ecf20Sopenharmony_ci */ 9768c2ecf20Sopenharmony_cistatic int madvise_inject_error(int behavior, 9778c2ecf20Sopenharmony_ci unsigned long start, unsigned long end) 9788c2ecf20Sopenharmony_ci{ 9798c2ecf20Sopenharmony_ci struct zone *zone; 9808c2ecf20Sopenharmony_ci unsigned long size; 9818c2ecf20Sopenharmony_ci 9828c2ecf20Sopenharmony_ci if (!capable(CAP_SYS_ADMIN)) 9838c2ecf20Sopenharmony_ci return -EPERM; 9848c2ecf20Sopenharmony_ci 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci for (; start < end; start += size) { 9878c2ecf20Sopenharmony_ci unsigned long pfn; 9888c2ecf20Sopenharmony_ci struct page *page; 9898c2ecf20Sopenharmony_ci int ret; 9908c2ecf20Sopenharmony_ci 9918c2ecf20Sopenharmony_ci ret = get_user_pages_fast(start, 1, 0, &page); 9928c2ecf20Sopenharmony_ci if (ret != 1) 9938c2ecf20Sopenharmony_ci return ret; 9948c2ecf20Sopenharmony_ci pfn = page_to_pfn(page); 9958c2ecf20Sopenharmony_ci 9968c2ecf20Sopenharmony_ci /* 9978c2ecf20Sopenharmony_ci * When soft offlining hugepages, after migrating the page 9988c2ecf20Sopenharmony_ci * we dissolve it, therefore in the second loop "page" will 9998c2ecf20Sopenharmony_ci * no longer be a compound page. 10008c2ecf20Sopenharmony_ci */ 10018c2ecf20Sopenharmony_ci size = page_size(compound_head(page)); 10028c2ecf20Sopenharmony_ci 10038c2ecf20Sopenharmony_ci if (behavior == MADV_SOFT_OFFLINE) { 10048c2ecf20Sopenharmony_ci pr_info("Soft offlining pfn %#lx at process virtual address %#lx\n", 10058c2ecf20Sopenharmony_ci pfn, start); 10068c2ecf20Sopenharmony_ci ret = soft_offline_page(pfn, MF_COUNT_INCREASED); 10078c2ecf20Sopenharmony_ci } else { 10088c2ecf20Sopenharmony_ci pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n", 10098c2ecf20Sopenharmony_ci pfn, start); 10108c2ecf20Sopenharmony_ci ret = memory_failure(pfn, MF_COUNT_INCREASED); 10118c2ecf20Sopenharmony_ci } 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_ci if (ret) 10148c2ecf20Sopenharmony_ci return ret; 10158c2ecf20Sopenharmony_ci } 10168c2ecf20Sopenharmony_ci 10178c2ecf20Sopenharmony_ci /* Ensure that all poisoned pages are removed from per-cpu lists */ 10188c2ecf20Sopenharmony_ci for_each_populated_zone(zone) 10198c2ecf20Sopenharmony_ci drain_all_pages(zone); 10208c2ecf20Sopenharmony_ci 10218c2ecf20Sopenharmony_ci return 0; 10228c2ecf20Sopenharmony_ci} 10238c2ecf20Sopenharmony_ci#endif 10248c2ecf20Sopenharmony_ci 10258c2ecf20Sopenharmony_cistatic bool 10268c2ecf20Sopenharmony_cimadvise_behavior_valid(int behavior) 10278c2ecf20Sopenharmony_ci{ 10288c2ecf20Sopenharmony_ci switch (behavior) { 10298c2ecf20Sopenharmony_ci case MADV_DOFORK: 10308c2ecf20Sopenharmony_ci case MADV_DONTFORK: 10318c2ecf20Sopenharmony_ci case MADV_NORMAL: 10328c2ecf20Sopenharmony_ci case MADV_SEQUENTIAL: 10338c2ecf20Sopenharmony_ci case MADV_RANDOM: 10348c2ecf20Sopenharmony_ci case MADV_REMOVE: 10358c2ecf20Sopenharmony_ci case MADV_WILLNEED: 10368c2ecf20Sopenharmony_ci case MADV_DONTNEED: 10378c2ecf20Sopenharmony_ci case MADV_FREE: 10388c2ecf20Sopenharmony_ci case MADV_COLD: 10398c2ecf20Sopenharmony_ci case MADV_PAGEOUT: 10408c2ecf20Sopenharmony_ci#ifdef CONFIG_KSM 10418c2ecf20Sopenharmony_ci case MADV_MERGEABLE: 10428c2ecf20Sopenharmony_ci case MADV_UNMERGEABLE: 10438c2ecf20Sopenharmony_ci#endif 10448c2ecf20Sopenharmony_ci#ifdef CONFIG_TRANSPARENT_HUGEPAGE 10458c2ecf20Sopenharmony_ci case MADV_HUGEPAGE: 10468c2ecf20Sopenharmony_ci case MADV_NOHUGEPAGE: 10478c2ecf20Sopenharmony_ci#endif 10488c2ecf20Sopenharmony_ci case MADV_DONTDUMP: 10498c2ecf20Sopenharmony_ci case MADV_DODUMP: 10508c2ecf20Sopenharmony_ci case MADV_WIPEONFORK: 10518c2ecf20Sopenharmony_ci case MADV_KEEPONFORK: 10528c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_FAILURE 10538c2ecf20Sopenharmony_ci case MADV_SOFT_OFFLINE: 10548c2ecf20Sopenharmony_ci case MADV_HWPOISON: 10558c2ecf20Sopenharmony_ci#endif 10568c2ecf20Sopenharmony_ci return true; 10578c2ecf20Sopenharmony_ci 10588c2ecf20Sopenharmony_ci default: 10598c2ecf20Sopenharmony_ci return false; 10608c2ecf20Sopenharmony_ci } 10618c2ecf20Sopenharmony_ci} 10628c2ecf20Sopenharmony_ci 10638c2ecf20Sopenharmony_cistatic bool 10648c2ecf20Sopenharmony_ciprocess_madvise_behavior_valid(int behavior) 10658c2ecf20Sopenharmony_ci{ 10668c2ecf20Sopenharmony_ci switch (behavior) { 10678c2ecf20Sopenharmony_ci case MADV_COLD: 10688c2ecf20Sopenharmony_ci case MADV_PAGEOUT: 10698c2ecf20Sopenharmony_ci return true; 10708c2ecf20Sopenharmony_ci default: 10718c2ecf20Sopenharmony_ci return false; 10728c2ecf20Sopenharmony_ci } 10738c2ecf20Sopenharmony_ci} 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci/* 10768c2ecf20Sopenharmony_ci * Walk the vmas in range [start,end), and call the visit function on each one. 10778c2ecf20Sopenharmony_ci * The visit function will get start and end parameters that cover the overlap 10788c2ecf20Sopenharmony_ci * between the current vma and the original range. Any unmapped regions in the 10798c2ecf20Sopenharmony_ci * original range will result in this function returning -ENOMEM while still 10808c2ecf20Sopenharmony_ci * calling the visit function on all of the existing vmas in the range. 10818c2ecf20Sopenharmony_ci * Must be called with the mmap_lock held for reading or writing. 10828c2ecf20Sopenharmony_ci */ 10838c2ecf20Sopenharmony_cistatic 10848c2ecf20Sopenharmony_ciint madvise_walk_vmas(struct mm_struct *mm, unsigned long start, 10858c2ecf20Sopenharmony_ci unsigned long end, unsigned long arg, 10868c2ecf20Sopenharmony_ci int (*visit)(struct vm_area_struct *vma, 10878c2ecf20Sopenharmony_ci struct vm_area_struct **prev, unsigned long start, 10888c2ecf20Sopenharmony_ci unsigned long end, unsigned long arg)) 10898c2ecf20Sopenharmony_ci{ 10908c2ecf20Sopenharmony_ci struct vm_area_struct *vma; 10918c2ecf20Sopenharmony_ci struct vm_area_struct *prev; 10928c2ecf20Sopenharmony_ci unsigned long tmp; 10938c2ecf20Sopenharmony_ci int unmapped_error = 0; 10948c2ecf20Sopenharmony_ci 10958c2ecf20Sopenharmony_ci /* 10968c2ecf20Sopenharmony_ci * If the interval [start,end) covers some unmapped address 10978c2ecf20Sopenharmony_ci * ranges, just ignore them, but return -ENOMEM at the end. 10988c2ecf20Sopenharmony_ci * - different from the way of handling in mlock etc. 10998c2ecf20Sopenharmony_ci */ 11008c2ecf20Sopenharmony_ci vma = find_vma_prev(mm, start, &prev); 11018c2ecf20Sopenharmony_ci if (vma && start > vma->vm_start) 11028c2ecf20Sopenharmony_ci prev = vma; 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ci for (;;) { 11058c2ecf20Sopenharmony_ci int error; 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci /* Still start < end. */ 11088c2ecf20Sopenharmony_ci if (!vma) 11098c2ecf20Sopenharmony_ci return -ENOMEM; 11108c2ecf20Sopenharmony_ci 11118c2ecf20Sopenharmony_ci /* Here start < (end|vma->vm_end). */ 11128c2ecf20Sopenharmony_ci if (start < vma->vm_start) { 11138c2ecf20Sopenharmony_ci unmapped_error = -ENOMEM; 11148c2ecf20Sopenharmony_ci start = vma->vm_start; 11158c2ecf20Sopenharmony_ci if (start >= end) 11168c2ecf20Sopenharmony_ci break; 11178c2ecf20Sopenharmony_ci } 11188c2ecf20Sopenharmony_ci 11198c2ecf20Sopenharmony_ci /* Here vma->vm_start <= start < (end|vma->vm_end) */ 11208c2ecf20Sopenharmony_ci tmp = vma->vm_end; 11218c2ecf20Sopenharmony_ci if (end < tmp) 11228c2ecf20Sopenharmony_ci tmp = end; 11238c2ecf20Sopenharmony_ci 11248c2ecf20Sopenharmony_ci /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ 11258c2ecf20Sopenharmony_ci error = visit(vma, &prev, start, tmp, arg); 11268c2ecf20Sopenharmony_ci if (error) 11278c2ecf20Sopenharmony_ci return error; 11288c2ecf20Sopenharmony_ci start = tmp; 11298c2ecf20Sopenharmony_ci if (prev && start < prev->vm_end) 11308c2ecf20Sopenharmony_ci start = prev->vm_end; 11318c2ecf20Sopenharmony_ci if (start >= end) 11328c2ecf20Sopenharmony_ci break; 11338c2ecf20Sopenharmony_ci if (prev) 11348c2ecf20Sopenharmony_ci vma = prev->vm_next; 11358c2ecf20Sopenharmony_ci else /* madvise_remove dropped mmap_lock */ 11368c2ecf20Sopenharmony_ci vma = find_vma(mm, start); 11378c2ecf20Sopenharmony_ci } 11388c2ecf20Sopenharmony_ci 11398c2ecf20Sopenharmony_ci return unmapped_error; 11408c2ecf20Sopenharmony_ci} 11418c2ecf20Sopenharmony_ci 11428c2ecf20Sopenharmony_ci#ifdef CONFIG_ANON_VMA_NAME 11438c2ecf20Sopenharmony_cistatic int madvise_vma_anon_name(struct vm_area_struct *vma, 11448c2ecf20Sopenharmony_ci struct vm_area_struct **prev, 11458c2ecf20Sopenharmony_ci unsigned long start, unsigned long end, 11468c2ecf20Sopenharmony_ci unsigned long anon_name) 11478c2ecf20Sopenharmony_ci{ 11488c2ecf20Sopenharmony_ci int error; 11498c2ecf20Sopenharmony_ci 11508c2ecf20Sopenharmony_ci /* Only anonymous mappings can be named */ 11518c2ecf20Sopenharmony_ci if (vma->vm_file) 11528c2ecf20Sopenharmony_ci return -EBADF; 11538c2ecf20Sopenharmony_ci 11548c2ecf20Sopenharmony_ci error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, 11558c2ecf20Sopenharmony_ci (struct anon_vma_name *)anon_name); 11568c2ecf20Sopenharmony_ci 11578c2ecf20Sopenharmony_ci /* 11588c2ecf20Sopenharmony_ci * madvise() returns EAGAIN if kernel resources, such as 11598c2ecf20Sopenharmony_ci * slab, are temporarily unavailable. 11608c2ecf20Sopenharmony_ci */ 11618c2ecf20Sopenharmony_ci if (error == -ENOMEM) 11628c2ecf20Sopenharmony_ci error = -EAGAIN; 11638c2ecf20Sopenharmony_ci return error; 11648c2ecf20Sopenharmony_ci} 11658c2ecf20Sopenharmony_ci 11668c2ecf20Sopenharmony_ciint madvise_set_anon_name(struct mm_struct *mm, unsigned long start, 11678c2ecf20Sopenharmony_ci unsigned long len_in, struct anon_vma_name *anon_name) 11688c2ecf20Sopenharmony_ci{ 11698c2ecf20Sopenharmony_ci unsigned long end; 11708c2ecf20Sopenharmony_ci unsigned long len; 11718c2ecf20Sopenharmony_ci 11728c2ecf20Sopenharmony_ci if (start & ~PAGE_MASK) 11738c2ecf20Sopenharmony_ci return -EINVAL; 11748c2ecf20Sopenharmony_ci len = (len_in + ~PAGE_MASK) & PAGE_MASK; 11758c2ecf20Sopenharmony_ci 11768c2ecf20Sopenharmony_ci /* Check to see whether len was rounded up from small -ve to zero */ 11778c2ecf20Sopenharmony_ci if (len_in && !len) 11788c2ecf20Sopenharmony_ci return -EINVAL; 11798c2ecf20Sopenharmony_ci 11808c2ecf20Sopenharmony_ci end = start + len; 11818c2ecf20Sopenharmony_ci if (end < start) 11828c2ecf20Sopenharmony_ci return -EINVAL; 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci if (end == start) 11858c2ecf20Sopenharmony_ci return 0; 11868c2ecf20Sopenharmony_ci 11878c2ecf20Sopenharmony_ci return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name, 11888c2ecf20Sopenharmony_ci madvise_vma_anon_name); 11898c2ecf20Sopenharmony_ci} 11908c2ecf20Sopenharmony_ci#endif /* CONFIG_ANON_VMA_NAME */ 11918c2ecf20Sopenharmony_ci/* 11928c2ecf20Sopenharmony_ci * The madvise(2) system call. 11938c2ecf20Sopenharmony_ci * 11948c2ecf20Sopenharmony_ci * Applications can use madvise() to advise the kernel how it should 11958c2ecf20Sopenharmony_ci * handle paging I/O in this VM area. The idea is to help the kernel 11968c2ecf20Sopenharmony_ci * use appropriate read-ahead and caching techniques. The information 11978c2ecf20Sopenharmony_ci * provided is advisory only, and can be safely disregarded by the 11988c2ecf20Sopenharmony_ci * kernel without affecting the correct operation of the application. 11998c2ecf20Sopenharmony_ci * 12008c2ecf20Sopenharmony_ci * behavior values: 12018c2ecf20Sopenharmony_ci * MADV_NORMAL - the default behavior is to read clusters. This 12028c2ecf20Sopenharmony_ci * results in some read-ahead and read-behind. 12038c2ecf20Sopenharmony_ci * MADV_RANDOM - the system should read the minimum amount of data 12048c2ecf20Sopenharmony_ci * on any access, since it is unlikely that the appli- 12058c2ecf20Sopenharmony_ci * cation will need more than what it asks for. 12068c2ecf20Sopenharmony_ci * MADV_SEQUENTIAL - pages in the given range will probably be accessed 12078c2ecf20Sopenharmony_ci * once, so they can be aggressively read ahead, and 12088c2ecf20Sopenharmony_ci * can be freed soon after they are accessed. 12098c2ecf20Sopenharmony_ci * MADV_WILLNEED - the application is notifying the system to read 12108c2ecf20Sopenharmony_ci * some pages ahead. 12118c2ecf20Sopenharmony_ci * MADV_DONTNEED - the application is finished with the given range, 12128c2ecf20Sopenharmony_ci * so the kernel can free resources associated with it. 12138c2ecf20Sopenharmony_ci * MADV_FREE - the application marks pages in the given range as lazy free, 12148c2ecf20Sopenharmony_ci * where actual purges are postponed until memory pressure happens. 12158c2ecf20Sopenharmony_ci * MADV_REMOVE - the application wants to free up the given range of 12168c2ecf20Sopenharmony_ci * pages and associated backing store. 12178c2ecf20Sopenharmony_ci * MADV_DONTFORK - omit this area from child's address space when forking: 12188c2ecf20Sopenharmony_ci * typically, to avoid COWing pages pinned by get_user_pages(). 12198c2ecf20Sopenharmony_ci * MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking. 12208c2ecf20Sopenharmony_ci * MADV_WIPEONFORK - present the child process with zero-filled memory in this 12218c2ecf20Sopenharmony_ci * range after a fork. 12228c2ecf20Sopenharmony_ci * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK 12238c2ecf20Sopenharmony_ci * MADV_HWPOISON - trigger memory error handler as if the given memory range 12248c2ecf20Sopenharmony_ci * were corrupted by unrecoverable hardware memory failure. 12258c2ecf20Sopenharmony_ci * MADV_SOFT_OFFLINE - try to soft-offline the given range of memory. 12268c2ecf20Sopenharmony_ci * MADV_MERGEABLE - the application recommends that KSM try to merge pages in 12278c2ecf20Sopenharmony_ci * this area with pages of identical content from other such areas. 12288c2ecf20Sopenharmony_ci * MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others. 12298c2ecf20Sopenharmony_ci * MADV_HUGEPAGE - the application wants to back the given range by transparent 12308c2ecf20Sopenharmony_ci * huge pages in the future. Existing pages might be coalesced and 12318c2ecf20Sopenharmony_ci * new pages might be allocated as THP. 12328c2ecf20Sopenharmony_ci * MADV_NOHUGEPAGE - mark the given range as not worth being backed by 12338c2ecf20Sopenharmony_ci * transparent huge pages so the existing pages will not be 12348c2ecf20Sopenharmony_ci * coalesced into THP and new pages will not be allocated as THP. 12358c2ecf20Sopenharmony_ci * MADV_DONTDUMP - the application wants to prevent pages in the given range 12368c2ecf20Sopenharmony_ci * from being included in its core dump. 12378c2ecf20Sopenharmony_ci * MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump. 12388c2ecf20Sopenharmony_ci * MADV_COLD - the application is not expected to use this memory soon, 12398c2ecf20Sopenharmony_ci * deactivate pages in this range so that they can be reclaimed 12408c2ecf20Sopenharmony_ci * easily if memory pressure hanppens. 12418c2ecf20Sopenharmony_ci * MADV_PAGEOUT - the application is not expected to use this memory soon, 12428c2ecf20Sopenharmony_ci * page out the pages in this range immediately. 12438c2ecf20Sopenharmony_ci * 12448c2ecf20Sopenharmony_ci * return values: 12458c2ecf20Sopenharmony_ci * zero - success 12468c2ecf20Sopenharmony_ci * -EINVAL - start + len < 0, start is not page-aligned, 12478c2ecf20Sopenharmony_ci * "behavior" is not a valid value, or application 12488c2ecf20Sopenharmony_ci * is attempting to release locked or shared pages, 12498c2ecf20Sopenharmony_ci * or the specified address range includes file, Huge TLB, 12508c2ecf20Sopenharmony_ci * MAP_SHARED or VMPFNMAP range. 12518c2ecf20Sopenharmony_ci * -ENOMEM - addresses in the specified range are not currently 12528c2ecf20Sopenharmony_ci * mapped, or are outside the AS of the process. 12538c2ecf20Sopenharmony_ci * -EIO - an I/O error occurred while paging in data. 12548c2ecf20Sopenharmony_ci * -EBADF - map exists, but area maps something that isn't a file. 12558c2ecf20Sopenharmony_ci * -EAGAIN - a kernel resource was temporarily unavailable. 12568c2ecf20Sopenharmony_ci */ 12578c2ecf20Sopenharmony_ciint do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) 12588c2ecf20Sopenharmony_ci{ 12598c2ecf20Sopenharmony_ci unsigned long end; 12608c2ecf20Sopenharmony_ci int error; 12618c2ecf20Sopenharmony_ci int write; 12628c2ecf20Sopenharmony_ci size_t len; 12638c2ecf20Sopenharmony_ci struct blk_plug plug; 12648c2ecf20Sopenharmony_ci 12658c2ecf20Sopenharmony_ci start = untagged_addr(start); 12668c2ecf20Sopenharmony_ci 12678c2ecf20Sopenharmony_ci if (!madvise_behavior_valid(behavior)) 12688c2ecf20Sopenharmony_ci return -EINVAL; 12698c2ecf20Sopenharmony_ci 12708c2ecf20Sopenharmony_ci if (!PAGE_ALIGNED(start)) 12718c2ecf20Sopenharmony_ci return -EINVAL; 12728c2ecf20Sopenharmony_ci len = PAGE_ALIGN(len_in); 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci /* Check to see whether len was rounded up from small -ve to zero */ 12758c2ecf20Sopenharmony_ci if (len_in && !len) 12768c2ecf20Sopenharmony_ci return -EINVAL; 12778c2ecf20Sopenharmony_ci 12788c2ecf20Sopenharmony_ci end = start + len; 12798c2ecf20Sopenharmony_ci if (end < start) 12808c2ecf20Sopenharmony_ci return -EINVAL; 12818c2ecf20Sopenharmony_ci 12828c2ecf20Sopenharmony_ci if (end == start) 12838c2ecf20Sopenharmony_ci return 0; 12848c2ecf20Sopenharmony_ci 12858c2ecf20Sopenharmony_ci#ifdef CONFIG_MEMORY_FAILURE 12868c2ecf20Sopenharmony_ci if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) 12878c2ecf20Sopenharmony_ci return madvise_inject_error(behavior, start, start + len_in); 12888c2ecf20Sopenharmony_ci#endif 12898c2ecf20Sopenharmony_ci 12908c2ecf20Sopenharmony_ci write = madvise_need_mmap_write(behavior); 12918c2ecf20Sopenharmony_ci if (write) { 12928c2ecf20Sopenharmony_ci if (mmap_write_lock_killable(mm)) 12938c2ecf20Sopenharmony_ci return -EINTR; 12948c2ecf20Sopenharmony_ci } else { 12958c2ecf20Sopenharmony_ci mmap_read_lock(mm); 12968c2ecf20Sopenharmony_ci } 12978c2ecf20Sopenharmony_ci 12988c2ecf20Sopenharmony_ci blk_start_plug(&plug); 12998c2ecf20Sopenharmony_ci error = madvise_walk_vmas(mm, start, end, behavior, 13008c2ecf20Sopenharmony_ci madvise_vma_behavior); 13018c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 13028c2ecf20Sopenharmony_ci if (write) 13038c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 13048c2ecf20Sopenharmony_ci else 13058c2ecf20Sopenharmony_ci mmap_read_unlock(mm); 13068c2ecf20Sopenharmony_ci 13078c2ecf20Sopenharmony_ci return error; 13088c2ecf20Sopenharmony_ci} 13098c2ecf20Sopenharmony_ci 13108c2ecf20Sopenharmony_ciSYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) 13118c2ecf20Sopenharmony_ci{ 13128c2ecf20Sopenharmony_ci return do_madvise(current->mm, start, len_in, behavior); 13138c2ecf20Sopenharmony_ci} 13148c2ecf20Sopenharmony_ci 13158c2ecf20Sopenharmony_ciSYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, 13168c2ecf20Sopenharmony_ci size_t, vlen, int, behavior, unsigned int, flags) 13178c2ecf20Sopenharmony_ci{ 13188c2ecf20Sopenharmony_ci ssize_t ret; 13198c2ecf20Sopenharmony_ci struct iovec iovstack[UIO_FASTIOV], iovec; 13208c2ecf20Sopenharmony_ci struct iovec *iov = iovstack; 13218c2ecf20Sopenharmony_ci struct iov_iter iter; 13228c2ecf20Sopenharmony_ci struct pid *pid; 13238c2ecf20Sopenharmony_ci struct task_struct *task; 13248c2ecf20Sopenharmony_ci struct mm_struct *mm; 13258c2ecf20Sopenharmony_ci size_t total_len; 13268c2ecf20Sopenharmony_ci unsigned int f_flags; 13278c2ecf20Sopenharmony_ci 13288c2ecf20Sopenharmony_ci if (flags != 0) { 13298c2ecf20Sopenharmony_ci ret = -EINVAL; 13308c2ecf20Sopenharmony_ci goto out; 13318c2ecf20Sopenharmony_ci } 13328c2ecf20Sopenharmony_ci 13338c2ecf20Sopenharmony_ci ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 13348c2ecf20Sopenharmony_ci if (ret < 0) 13358c2ecf20Sopenharmony_ci goto out; 13368c2ecf20Sopenharmony_ci 13378c2ecf20Sopenharmony_ci pid = pidfd_get_pid(pidfd, &f_flags); 13388c2ecf20Sopenharmony_ci if (IS_ERR(pid)) { 13398c2ecf20Sopenharmony_ci ret = PTR_ERR(pid); 13408c2ecf20Sopenharmony_ci goto free_iov; 13418c2ecf20Sopenharmony_ci } 13428c2ecf20Sopenharmony_ci 13438c2ecf20Sopenharmony_ci task = get_pid_task(pid, PIDTYPE_PID); 13448c2ecf20Sopenharmony_ci if (!task) { 13458c2ecf20Sopenharmony_ci ret = -ESRCH; 13468c2ecf20Sopenharmony_ci goto put_pid; 13478c2ecf20Sopenharmony_ci } 13488c2ecf20Sopenharmony_ci 13498c2ecf20Sopenharmony_ci if (!process_madvise_behavior_valid(behavior)) { 13508c2ecf20Sopenharmony_ci ret = -EINVAL; 13518c2ecf20Sopenharmony_ci goto release_task; 13528c2ecf20Sopenharmony_ci } 13538c2ecf20Sopenharmony_ci 13548c2ecf20Sopenharmony_ci /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ 13558c2ecf20Sopenharmony_ci mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); 13568c2ecf20Sopenharmony_ci if (IS_ERR_OR_NULL(mm)) { 13578c2ecf20Sopenharmony_ci ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; 13588c2ecf20Sopenharmony_ci goto release_task; 13598c2ecf20Sopenharmony_ci } 13608c2ecf20Sopenharmony_ci 13618c2ecf20Sopenharmony_ci /* 13628c2ecf20Sopenharmony_ci * Require CAP_SYS_NICE for influencing process performance. Note that 13638c2ecf20Sopenharmony_ci * only non-destructive hints are currently supported. 13648c2ecf20Sopenharmony_ci */ 13658c2ecf20Sopenharmony_ci if (!capable(CAP_SYS_NICE)) { 13668c2ecf20Sopenharmony_ci ret = -EPERM; 13678c2ecf20Sopenharmony_ci goto release_mm; 13688c2ecf20Sopenharmony_ci } 13698c2ecf20Sopenharmony_ci 13708c2ecf20Sopenharmony_ci total_len = iov_iter_count(&iter); 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_ci while (iov_iter_count(&iter)) { 13738c2ecf20Sopenharmony_ci iovec = iov_iter_iovec(&iter); 13748c2ecf20Sopenharmony_ci ret = do_madvise(mm, (unsigned long)iovec.iov_base, 13758c2ecf20Sopenharmony_ci iovec.iov_len, behavior); 13768c2ecf20Sopenharmony_ci if (ret < 0) 13778c2ecf20Sopenharmony_ci break; 13788c2ecf20Sopenharmony_ci iov_iter_advance(&iter, iovec.iov_len); 13798c2ecf20Sopenharmony_ci } 13808c2ecf20Sopenharmony_ci 13818c2ecf20Sopenharmony_ci ret = (total_len - iov_iter_count(&iter)) ? : ret; 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_cirelease_mm: 13848c2ecf20Sopenharmony_ci mmput(mm); 13858c2ecf20Sopenharmony_cirelease_task: 13868c2ecf20Sopenharmony_ci put_task_struct(task); 13878c2ecf20Sopenharmony_ciput_pid: 13888c2ecf20Sopenharmony_ci put_pid(pid); 13898c2ecf20Sopenharmony_cifree_iov: 13908c2ecf20Sopenharmony_ci kfree(iov); 13918c2ecf20Sopenharmony_ciout: 13928c2ecf20Sopenharmony_ci return ret; 13938c2ecf20Sopenharmony_ci} 1394